From 8e36a07beaa2567d4a07bf4100a38c5ad405dcdf Mon Sep 17 00:00:00 2001 From: hanna Date: Wed, 10 Nov 2010 17:59:50 +0000 Subject: [PATCH] Convert GenomeLocParser into an instance variable. This change is required for anything that needs to be simultaneously aware of multiple references, eg Queue's interval sharding code, liftover support, distributed GATK etc. GenomeLocParser instances must now be used to create/parse GenomeLocs. GenomeLocParser instances are available in walkers by calling either -getToolkit().getGenomeLocParser() or -refContext.getGenomeLocParser() This is an intermediate change; GenomeLocParser will eventually be merged with the reference, but we're not clear exactly how to do that yet. This will become clearer when contig aliasing is implemented. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4642 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/AbstractGenomeAnalysisEngine.java | 94 +++--- .../sting/gatk/GenomeAnalysisEngine.java | 16 +- .../sting/gatk/contexts/ReferenceContext.java | 22 +- .../variantcontext/VariantContextUtils.java | 46 +-- .../variantcontext/VariantJEXLContext.java | 23 +- .../datasources/providers/AllLocusView.java | 3 +- .../providers/LocusReferenceView.java | 36 +-- .../providers/LocusShardDataProvider.java | 11 +- .../gatk/datasources/providers/LocusView.java | 7 + .../ReadBasedReferenceOrderedView.java | 16 +- .../providers/ReadReferenceView.java | 4 +- .../providers/ReadShardDataProvider.java | 5 +- .../datasources/providers/ReferenceView.java | 8 +- .../datasources/providers/RodLocusView.java | 8 +- .../providers/ShardDataProvider.java | 21 +- .../datasources/shards/IntervalSharder.java | 16 +- .../shards/LocusShardStrategy.java | 16 +- .../datasources/shards/ReadShardStrategy.java | 2 +- .../shards/ShardStrategyFactory.java | 17 +- .../ReferenceOrderedDataSource.java | 23 +- .../simpleDataSources/ResourcePool.java | 45 ++- .../simpleDataSources/SAMDataSource.java | 19 +- .../gatk/executive/LinearMicroScheduler.java | 6 +- .../sting/gatk/executive/MicroScheduler.java | 6 + .../sting/gatk/executive/ShardTraverser.java | 4 +- .../sting/gatk/executive/WindowMaker.java | 5 +- .../gatk/iterators/GenomeLocusIterator.java | 12 +- .../gatk/iterators/LocusIteratorByState.java | 20 +- .../gatk/iterators/VerifyingSamIterator.java | 20 +- .../gatk/refdata/ReadMetaDataTracker.java | 38 ++- .../ReferenceDependentFeatureCodec.java} | 21 +- .../gatk/refdata/SeekableRODIterator.java | 62 ++-- .../gatk/refdata/VariantContextAdaptors.java | 2 +- .../annotator/AnnotatorInputTableCodec.java | 25 +- .../refdata/features/beagle/BeagleCodec.java | 30 +- .../refdata/features/refseq/RefSeqCodec.java | 30 +- .../features/refseq/RefSeqFeature.java | 19 +- .../refdata/features/table/TableCodec.java | 20 +- .../sting/gatk/refdata/tracks/RMDTrack.java | 19 +- .../tracks/builders/RMDTrackBuilder.java | 27 +- .../utils/FeatureToGATKFeatureIterator.java | 7 +- .../sting/gatk/refdata/utils/GATKFeature.java | 6 +- .../StringToGenomeLocIteratorAdapter.java | 13 +- .../gatk/traversals/TraversalEngine.java | 4 +- .../gatk/traversals/TraverseDuplicates.java | 10 +- .../sting/gatk/traversals/TraverseLoci.java | 2 +- .../sting/gatk/traversals/TraverseReads.java | 2 +- .../walkers/annotator/VariantAnnotator.java | 2 +- .../beagle/ProduceBeagleInputWalker.java | 4 +- .../walkers/coverage/CallableLociWalker.java | 8 +- .../coverage/CompareCallableLociWalker.java | 4 +- .../coverage/DepthOfCoverageWalker.java | 5 +- .../walkers/fasta/FastaReferenceWalker.java | 2 +- .../gatk/walkers/filters/ClusteredSnps.java | 9 +- .../filters/VariantFiltrationWalker.java | 6 +- .../walkers/genotyper/BatchedCallsMerger.java | 2 +- .../SimpleIndelCalculationModel.java | 7 +- .../indels/IndelGenotyperV2Walker.java | 38 +-- .../gatk/walkers/indels/IndelRealigner.java | 26 +- .../indels/RealignerTargetCreator.java | 22 +- ...dSegregatingAlternateAllelesVCFWriter.java | 32 +- ...ergeSegregatingAlternateAllelesWalker.java | 2 +- .../phasing/ReadBackedPhasingWalker.java | 42 +-- .../sting/gatk/walkers/qc/CountRodWalker.java | 13 +- .../walkers/qc/ValidatingPileupWalker.java | 4 +- .../walkers/sequenom/PickSequenomProbes.java | 10 +- .../varianteval/VariantEvalWalker.java | 2 +- .../GenerateVariantClustersWalker.java | 2 +- .../VariantGaussianMixtureModel.java | 9 +- .../VariantRecalibrator.java | 2 +- .../walkers/variantutils/CombineVariants.java | 2 +- .../variantutils/LeftAlignVariants.java | 2 +- .../variantutils/LiftoverVariants.java | 2 +- .../walkers/variantutils/SelectVariants.java | 2 +- .../walkers/CreateTiTvTrack.java | 2 +- .../oneoffprojects/walkers/DSBWalkerV3.java | 9 +- .../walkers/DesignFileGeneratorWalker.java | 4 +- .../walkers/IndelAnnotator.java | 8 +- .../walkers/IndelDBRateWalker.java | 29 +- .../walkers/IndelErrorRateWalker.java | 2 +- .../oneoffprojects/walkers/MarkIntervals.java | 2 +- .../walkers/MendelianViolationClassifier.java | 4 +- .../walkers/ReadQualityScoreWalker.java | 11 +- .../walkers/RealignedReadCounter.java | 6 +- .../walkers/TestReadFishingWalker.java | 2 +- .../walkers/ValidateRODForReads.java | 6 +- .../duplicates/CombineDuplicatesWalker.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 4 +- .../playground/tools/RemapAlignments.java | 6 +- .../sting/playground/utils/GenomicMap.java | 26 +- .../broadinstitute/sting/utils/GenomeLoc.java | 137 ++------- .../sting/utils/GenomeLocParser.java | 281 ++++-------------- .../sting/utils/GenomeLocSortedSet.java | 46 +-- .../sting/utils/bed/BedParser.java | 11 +- .../sting/utils/duplicates/DupUtils.java | 8 +- .../sting/utils/genotype/Haplotype.java | 7 +- .../interval/IntervalFileMergingIterator.java | 7 +- .../sting/utils/interval/IntervalUtils.java | 12 +- .../pileup/ExtendedEventPileupElement.java | 4 - .../utils/sam/ArtificialSAMFileReader.java | 19 +- .../sting/utils/sam/ComparableSAMRecord.java | 8 +- .../org/broadinstitute/sting/WalkerTest.java | 3 - .../VariantJEXLContextUnitTest.java | 8 +- .../providers/AllLocusViewUnitTest.java | 8 +- .../providers/CoveredLocusViewUnitTest.java | 10 +- .../providers/LocusReferenceViewUnitTest.java | 24 +- .../providers/LocusViewTemplate.java | 81 +++-- ...ReadBasedReferenceOrderedViewUnitTest.java | 18 +- .../providers/ReadReferenceViewUnitTest.java | 4 +- .../ReferenceOrderedViewUnitTest.java | 31 +- .../providers/ReferenceViewTemplate.java | 23 +- .../providers/ShardDataProviderUnitTest.java | 2 +- .../datasources/shards/MockLocusShard.java | 5 +- .../ReferenceOrderedDataPoolUnitTest.java | 22 +- .../SAMBAMDataSourceUnitTest.java | 15 +- .../ReadGroupBlackListFilterUnitTest.java | 2 - .../BoundedReadIteratorUnitTest.java | 4 - .../LocusIteratorByStateUnitTest.java | 8 +- .../refdata/ReadMetaDataTrackerUnitTest.java | 36 +-- .../builders/RMDTrackBuilderUnitTest.java | 5 +- .../utils/FlashBackIteratorUnitTest.java | 49 +-- .../TraverseDuplicatesUnitTest.java | 21 +- .../traversals/TraverseReadsUnitTest.java | 20 +- .../sting/utils/GenomeLocParserUnitTest.java | 92 +++--- .../utils/GenomeLocSortedSetUnitTest.java | 65 ++-- .../sting/utils/GenomeLocUnitTest.java | 24 +- .../sting/utils/bed/BedParserUnitTest.java | 9 +- .../utils/genotype/glf/GLFWriterUnitTest.java | 14 +- .../utils/genotype/vcf/VCFWriterUnitTest.java | 7 +- .../IntervalFileMergingIteratorUnitTest.java | 30 +- .../utils/interval/IntervalUtilsTest.java | 17 +- .../NwayIntervalMergingIteratorUnitTest.java | 39 +-- .../gatk/IntervalScatterFunction.scala | 4 +- .../sting/queue/util/PipelineUtils.scala | 4 +- .../IntervalScatterFunctionUnitTest.scala | 126 ++++---- 135 files changed, 1305 insertions(+), 1317 deletions(-) rename java/{test/org/broadinstitute/sting/utils/GenomeLocParserTestUtils.java => src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java} (58%) diff --git a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java index f56e14389..0c7e29304 100755 --- a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java @@ -48,7 +48,6 @@ import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; @@ -69,6 +68,11 @@ public abstract class AbstractGenomeAnalysisEngine { */ private ParsingEngine parsingEngine; + /** + * The genomeLocParser can create and parse GenomeLocs. + */ + private GenomeLocParser genomeLocParser; + /** * Accessor for sharded read data. */ @@ -82,6 +86,10 @@ public abstract class AbstractGenomeAnalysisEngine { return referenceDataSource; } + public GenomeLocParser getGenomeLocParser() { + return genomeLocParser; + } + /** * Accessor for sharded reference data. */ @@ -136,6 +144,14 @@ public abstract class AbstractGenomeAnalysisEngine { this.parsingEngine = parsingEngine; } + /** + * Explicitly set the GenomeLocParser, for unit testing. + * @param genomeLocParser GenomeLocParser to use. + */ + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } + /** * Actually run the engine. * @return the value of this traversal. @@ -188,7 +204,7 @@ public abstract class AbstractGenomeAnalysisEngine { GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) : loadIntervals(argCollection.intervals, argCollection.intervalMerging, - GenomeLocParser.mergeIntervalLocations(checkRODToIntervalArgument(),argCollection.intervalMerging))); + genomeLocParser.mergeIntervalLocations(checkRODToIntervalArgument(),argCollection.intervalMerging))); // if no exclude arguments, can return parseIntervalArguments directly if (argCollection.excludeIntervals == null) @@ -221,11 +237,11 @@ public abstract class AbstractGenomeAnalysisEngine { IntervalMergingRule mergingRule, List additionalIntervals) { - return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals, - IntervalUtils.parseIntervalArguments(argList, - this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), - argCollection.BTIMergeRule), - mergingRule); + return IntervalUtils.sortAndMergeIntervals(genomeLocParser,IntervalUtils.mergeListsBySetOperator(additionalIntervals, + IntervalUtils.parseIntervalArguments(genomeLocParser,argList, + this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), + argCollection.BTIMergeRule), + mergingRule); } /** @@ -298,22 +314,22 @@ public abstract class AbstractGenomeAnalysisEngine { protected void initializeDataSources() { logger.info("Strictness is " + argCollection.strictnessLevel); + validateSuppliedReference(); + referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); + validateSuppliedReads(); - readsDataSource = createReadsDataSource(); + readsDataSource = createReadsDataSource(genomeLocParser); for (SamRecordFilter filter : filters) if (filter instanceof SamRecordHeaderFilter) ((SamRecordHeaderFilter)filter).setHeader(this.getSAMFileHeader()); - validateSuppliedReference(); - referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); - sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles); RMDTrackBuilder manager = new RMDTrackBuilder(); // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference - manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary()); + manager.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser); List tracks = manager.getReferenceMetaDataSources(this,argCollection); validateSuppliedReferenceOrderedData(tracks); @@ -330,7 +346,7 @@ public abstract class AbstractGenomeAnalysisEngine { * @return A unique identifier for the source file of this read. Exception if not found. */ public SAMReaderID getReaderIDForRead(final SAMRecord read) { - return getDataSource().getReaderID(read); + return getReadsDataSource().getReaderID(read); } /** @@ -339,7 +355,7 @@ public abstract class AbstractGenomeAnalysisEngine { * @return The source filename for this read. */ public File getSourceFileForReaderID(final SAMReaderID id) { - return getDataSource().getSAMFile(id); + return getReadsDataSource().getSAMFile(id); } /** @@ -351,7 +367,7 @@ public abstract class AbstractGenomeAnalysisEngine { * @return Sets of samples in the merged input SAM stream, grouped by readers */ public List> getSamplesByReaders() { - List readers = getDataSource().getReaderIDs(); + List readers = getReadsDataSource().getReaderIDs(); List> sample_sets = new ArrayList>(readers.size()); @@ -360,7 +376,7 @@ public abstract class AbstractGenomeAnalysisEngine { Set samples = new HashSet(1); sample_sets.add(samples); - for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) { + for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) { samples.add(g.getSample()); } } @@ -380,7 +396,7 @@ public abstract class AbstractGenomeAnalysisEngine { public List> getLibrariesByReaders() { - List readers = getDataSource().getReaderIDs(); + List readers = getReadsDataSource().getReaderIDs(); List> lib_sets = new ArrayList>(readers.size()); @@ -389,7 +405,7 @@ public abstract class AbstractGenomeAnalysisEngine { Set libs = new HashSet(2); lib_sets.add(libs); - for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) { + for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) { libs.add(g.getLibrary()); } } @@ -406,22 +422,22 @@ public abstract class AbstractGenomeAnalysisEngine { public Map> getFileToReadGroupIdMapping() { // populate the file -> read group mapping Map> fileToReadGroupIdMap = new HashMap>(); - for (SAMReaderID id: getDataSource().getReaderIDs()) { + for (SAMReaderID id: getReadsDataSource().getReaderIDs()) { Set readGroups = new HashSet(5); - for (SAMReadGroupRecord g : getDataSource().getHeader(id).getReadGroups()) { - if (getDataSource().hasReadGroupCollisions()) { + for (SAMReadGroupRecord g : getReadsDataSource().getHeader(id).getReadGroups()) { + if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes. // If there were, use the SamFileHeaderMerger to translate from the // original read group id to the read group id in the merged stream - readGroups.add(getDataSource().getReadGroupId(id,g.getReadGroupId())); + readGroups.add(getReadsDataSource().getReadGroupId(id,g.getReadGroupId())); } else { // otherwise, pass through the unmapped read groups since this is what Picard does as well readGroups.add(g.getReadGroupId()); } } - fileToReadGroupIdMap.put(getDataSource().getSAMFile(id),readGroups); + fileToReadGroupIdMap.put(getReadsDataSource().getSAMFile(id),readGroups); } return fileToReadGroupIdMap; @@ -440,7 +456,7 @@ public abstract class AbstractGenomeAnalysisEngine { public List> getMergedReadGroupsByReaders() { - List readers = getDataSource().getReaderIDs(); + List readers = getReadsDataSource().getReaderIDs(); List> rg_sets = new ArrayList>(readers.size()); @@ -449,11 +465,11 @@ public abstract class AbstractGenomeAnalysisEngine { Set groups = new HashSet(5); rg_sets.add(groups); - for (SAMReadGroupRecord g : getDataSource().getHeader(r).getReadGroups()) { - if (getDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so: + for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) { + if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so: // use HeaderMerger to translate original read group id from the reader into the read group id in the // merged stream, and save that remapped read group id to associate it with specific reader - groups.add(getDataSource().getReadGroupId(r, g.getReadGroupId())); + groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId())); } else { // otherwise, pass through the unmapped read groups since this is what Picard does as well groups.add(g.getReadGroupId()); @@ -533,29 +549,17 @@ public abstract class AbstractGenomeAnalysisEngine { } - - /** - * Convenience function that binds RODs using the old-style command line parser to the new style list for - * a uniform processing. - * - * @param name the name of the rod - * @param type its type - * @param file the file to load the rod from - */ - private void bindConvenienceRods(final String name, final String type, final String file) { - argCollection.RODBindings.add(Utils.join(",", new String[]{name, type, file})); - } - /** * Gets a data source for the given set of reads. * * @return A data source for the given set of reads. */ - private SAMDataSource createReadsDataSource() { + private SAMDataSource createReadsDataSource(GenomeLocParser genomeLocParser) { DownsamplingMethod method = getDownsamplingMethod(); return new SAMDataSource( unpackBAMFileList(argCollection.samFiles), + genomeLocParser, argCollection.useOriginalBaseQualities, argCollection.strictnessLevel, argCollection.readBufferSize, @@ -574,7 +578,7 @@ public abstract class AbstractGenomeAnalysisEngine { */ private ReferenceDataSource openReferenceSequenceFile(File refFile) { ReferenceDataSource ref = new ReferenceDataSource(refFile); - GenomeLocParser.setupRefContigOrdering(ref.getReference()); + genomeLocParser = new GenomeLocParser(ref.getReference()); return ref; } @@ -587,7 +591,7 @@ public abstract class AbstractGenomeAnalysisEngine { private List getReferenceOrderedDataSources(List rods) { List dataSources = new ArrayList(); for (RMDTrack rod : rods) - dataSources.add(new ReferenceOrderedDataSource(rod, flashbackData())); + dataSources.add(new ReferenceOrderedDataSource(referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,rod,flashbackData())); return dataSources; } @@ -614,10 +618,12 @@ public abstract class AbstractGenomeAnalysisEngine { * * @return the reads data source */ - public SAMDataSource getDataSource() { + public SAMDataSource getReadsDataSource() { return this.readsDataSource; } + + /** * Sets the collection of GATK main application arguments. * diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 76ec2ed35..07eed89c7 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -165,7 +165,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { throw new UserException.CommandLineException("Read-based traversals require a reference file but none was given"); } - return MicroScheduler.create(this,my_walker,this.getDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),this.getArguments().numberOfThreads); + return MicroScheduler.create(this,my_walker,this.getReadsDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),this.getArguments().numberOfThreads); } @Override @@ -258,7 +258,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { */ protected ShardStrategy getShardStrategy(ReferenceSequenceFile drivingDataSource) { GenomeLocSortedSet intervals = this.getIntervals(); - SAMDataSource readsDataSource = this.getDataSource(); + SAMDataSource readsDataSource = this.getReadsDataSource(); ValidationExclusion exclusions = (readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null); ReferenceDataSource referenceDataSource = this.getReferenceDataSource(); // Use monolithic sharding if no index is present. Monolithic sharding is always required for the original @@ -286,7 +286,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { else { region = new ArrayList(); for(SAMSequenceRecord sequenceRecord: drivingDataSource.getSequenceDictionary().getSequences()) - region.add(GenomeLocParser.createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength())); + region.add(getGenomeLocParser().createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength())); } return new MonolithicShardStrategy(readsDataSource,shardType,region); @@ -309,13 +309,14 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, drivingDataSource.getSequenceDictionary(), SHARD_SIZE, + getGenomeLocParser(), intervals); } else shardStrategy = ShardStrategyFactory.shatter(readsDataSource, referenceDataSource.getReference(), ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, drivingDataSource.getSequenceDictionary(), - SHARD_SIZE); + SHARD_SIZE,getGenomeLocParser()); } else if (walker instanceof ReadWalker || walker instanceof DuplicateWalker) { shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL; @@ -326,13 +327,15 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { shardType, drivingDataSource.getSequenceDictionary(), SHARD_SIZE, + getGenomeLocParser(), intervals); } else { shardStrategy = ShardStrategyFactory.shatter(readsDataSource, referenceDataSource.getReference(), shardType, drivingDataSource.getSequenceDictionary(), - SHARD_SIZE); + SHARD_SIZE, + getGenomeLocParser()); } } else if (walker instanceof ReadPairWalker) { if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname) @@ -344,7 +347,8 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { referenceDataSource.getReference(), ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, drivingDataSource.getSequenceDictionary(), - SHARD_SIZE); + SHARD_SIZE, + getGenomeLocParser()); } else throw new ReviewedStingException("Unable to support walker of type" + walker.getClass().getName()); diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index 96d49fc2e..830869fdb 100644 --- a/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.contexts; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.BaseUtils; @@ -41,6 +42,11 @@ import net.sf.samtools.util.StringUtil; public class ReferenceContext { final public static boolean UPPERCASE_REFERENCE = true; + /** + * Facilitates creation of new GenomeLocs. + */ + private GenomeLocParser genomeLocParser; + /** * The locus. */ @@ -101,18 +107,18 @@ public class ReferenceContext { * @param locus locus of interest. * @param base reference base at that locus. */ - public ReferenceContext( GenomeLoc locus, byte base ) { - this( locus, locus, new ForwardingProvider(base) ); + public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, byte base ) { + this( genomeLocParser, locus, locus, new ForwardingProvider(base) ); } - public ReferenceContext( GenomeLoc locus, GenomeLoc window, byte[] bases ) { - this( locus, window, new ForwardingProvider(bases) ); + public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, GenomeLoc window, byte[] bases ) { + this( genomeLocParser, locus, window, new ForwardingProvider(bases) ); } - public ReferenceContext( GenomeLoc locus, GenomeLoc window, ReferenceContextRefProvider basesProvider ) { + public ReferenceContext( GenomeLocParser genomeLocParser, GenomeLoc locus, GenomeLoc window, ReferenceContextRefProvider basesProvider ) { // if( !window.containsP(locus) ) // throw new StingException("Invalid locus or window; window does not contain locus"); - + this.genomeLocParser = genomeLocParser; this.locus = locus; this.window = window; this.basesProvider = basesProvider; @@ -125,6 +131,10 @@ public class ReferenceContext { } } + public GenomeLocParser getGenomeLocParser() { + return genomeLocParser; + } + /** * The locus currently being examined. * @return The current locus. diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index e5e029d74..c54f10aaa 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -219,8 +219,8 @@ public class VariantContextUtils { * @param exp expression * @return true if there is a match */ - public static boolean match(VariantContext vc, JexlVCMatchExp exp) { - return match(vc,Arrays.asList(exp)).get(exp); + public static boolean match(GenomeLocParser genomeLocParser,VariantContext vc, JexlVCMatchExp exp) { + return match(genomeLocParser,vc,Arrays.asList(exp)).get(exp); } /** @@ -233,8 +233,8 @@ public class VariantContextUtils { * @param exps expressions * @return true if there is a match */ - public static Map match(VariantContext vc, Collection exps) { - return new JEXLMap(exps,vc); + public static Map match(GenomeLocParser genomeLocParser,VariantContext vc, Collection exps) { + return new JEXLMap(genomeLocParser,exps,vc); } @@ -245,8 +245,8 @@ public class VariantContextUtils { * @param exp expression * @return true if there is a match */ - public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) { - return match(vc,g,Arrays.asList(exp)).get(exp); + public static boolean match(GenomeLocParser genomeLocParser,VariantContext vc, Genotype g, JexlVCMatchExp exp) { + return match(genomeLocParser,vc,g,Arrays.asList(exp)).get(exp); } /** @@ -260,8 +260,8 @@ public class VariantContextUtils { * @param exps expressions * @return true if there is a match */ - public static Map match(VariantContext vc, Genotype g, Collection exps) { - return new JEXLMap(exps,vc,g); + public static Map match(GenomeLocParser genomeLocParser,VariantContext vc, Genotype g, Collection exps) { + return new JEXLMap(genomeLocParser,exps,vc,g); } @@ -306,8 +306,8 @@ public class VariantContextUtils { UNION, INTERSECT } - public static VariantContext simpleMerge(Collection unsortedVCs, byte refBase) { - return simpleMerge(unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase); + public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, byte refBase) { + return simpleMerge(genomeLocParser, unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false, refBase); } @@ -322,14 +322,14 @@ public class VariantContextUtils { * @param genotypeMergeOptions * @return */ - public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, + public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase ) { - return simpleMerge(unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false); + return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, variantMergeOptions, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false); } - public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, + public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, boolean filteredAreUncalled ) { @@ -357,7 +357,7 @@ public class VariantContextUtils { // establish the baseline info from the first VC VariantContext first = VCs.get(0); String name = first.getSource(); - GenomeLoc loc = getLocation(first); + GenomeLoc loc = getLocation(genomeLocParser,first); Set alleles = new TreeSet(); Map genotypes = new TreeMap(); @@ -380,8 +380,8 @@ public class VariantContextUtils { if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) ) throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); - if ( getLocation(vc).size() > loc.size() ) - loc = getLocation(vc); // get the longest location + if ( getLocation(genomeLocParser,vc).size() > loc.size() ) + loc = getLocation(genomeLocParser,vc); // get the longest location nFiltered += vc.isFiltered() ? 1 : 0; nVariant += vc.isVariant() ? 1 : 0; @@ -753,13 +753,13 @@ public class VariantContextUtils { * @param vc the variant context * @return the genomeLoc */ - public static final GenomeLoc getLocation(VariantContext vc) { - return GenomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd()); + public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) { + return genomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd()); } // NOTE: returns null if vc1 and vc2 are not mergeable into a single MNP record - public static VariantContext mergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { - if (!mergeIntoMNPvalidationCheck(vc1, vc2)) + public static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { + if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2)) return null; // Check that it's logically possible to merge the VCs, and that there's a point in doing so (e.g., annotations could be changed): @@ -974,9 +974,9 @@ public class VariantContextUtils { } } - private static boolean mergeIntoMNPvalidationCheck(VariantContext vc1, VariantContext vc2) { - GenomeLoc loc1 = VariantContextUtils.getLocation(vc1); - GenomeLoc loc2 = VariantContextUtils.getLocation(vc2); + private static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser,VariantContext vc1, VariantContext vc2) { + GenomeLoc loc1 = VariantContextUtils.getLocation(genomeLocParser,vc1); + GenomeLoc loc2 = VariantContextUtils.getLocation(genomeLocParser,vc2); if (!loc1.onSameContig(loc2)) throw new ReviewedStingException("Can only merge vc1, vc2 if on the same chromosome"); diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java index ee9ec24ec..f36cf3bf6 100644 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java @@ -27,6 +27,7 @@ import org.apache.commons.jexl2.JexlContext; import org.apache.commons.jexl2.MapContext; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -49,6 +50,7 @@ import java.util.*; */ class VariantJEXLContext implements JexlContext { + private GenomeLocParser genomeLocParser; // our stored variant context private VariantContext vc; @@ -73,7 +75,8 @@ class VariantJEXLContext implements JexlContext { x.put("homVarCount", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getHomVarCount(); }}); } - public VariantJEXLContext(VariantContext vc) { + public VariantJEXLContext(GenomeLocParser genomeLocParser,VariantContext vc) { + this.genomeLocParser = genomeLocParser; this.vc = vc; } @@ -119,6 +122,7 @@ class VariantJEXLContext implements JexlContext { */ class JEXLMap implements Map { + private final GenomeLocParser genomeLocParser; // our variant context and/or Genotype private final VariantContext vc; private final Genotype g; @@ -130,18 +134,19 @@ class JEXLMap implements Map { private Map jexl; - public JEXLMap(Collection jexlCollection, VariantContext vc, Genotype g) { + public JEXLMap(GenomeLocParser genomeLocParser,Collection jexlCollection, VariantContext vc, Genotype g) { + this.genomeLocParser = genomeLocParser; this.vc = vc; this.g = g; initialize(jexlCollection); } - public JEXLMap(Collection jexlCollection, VariantContext vc) { - this(jexlCollection, vc, null); + public JEXLMap(GenomeLocParser genomeLocParser,Collection jexlCollection, VariantContext vc) { + this(genomeLocParser,jexlCollection, vc, null); } - public JEXLMap(Collection jexlCollection, Genotype g) { - this(jexlCollection, null, g); + public JEXLMap(GenomeLocParser genomeLocParser,Collection jexlCollection, Genotype g) { + this(genomeLocParser,jexlCollection, null, g); } private void initialize(Collection jexlCollection) { @@ -159,14 +164,14 @@ class JEXLMap implements Map { private void createContext() { if ( g == null ) { // todo -- remove dependancy on g to the entire system - jContext = new VariantJEXLContext(vc); + jContext = new VariantJEXLContext(genomeLocParser,vc); } else { Map infoMap = new HashMap(); if ( vc != null ) { // create a mapping of what we know about the variant context, its Chromosome, positions, etc. - infoMap.put("CHROM", VariantContextUtils.getLocation(vc).getContig()); - infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(vc).getStart())); + infoMap.put("CHROM", VariantContextUtils.getLocation(genomeLocParser,vc).getContig()); + infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(genomeLocParser,vc).getStart())); infoMap.put("TYPE", vc.getType().toString()); infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual())); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java index 2aad7242e..6d7631a13 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLoc; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; /** * User: hanna @@ -47,7 +48,7 @@ public class AllLocusView extends LocusView { public AllLocusView(LocusShardDataProvider provider) { super( provider ); // Seed the state tracking members with the first possible seek position and the first possible locus context. - locusIterator = new GenomeLocusIterator(provider.getLocus()); + locusIterator = new GenomeLocusIterator(genomeLocParser,provider.getLocus()); if( locusIterator.hasNext() ) { // cache next position and next alignment context nextPosition = locusIterator.next(); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java index eff273d0e..330a9e4f7 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceView.java @@ -97,9 +97,9 @@ public class LocusReferenceView extends ReferenceView { } if(bounds != null) { - long expandedStart = getWindowStart( bounds ); - long expandedStop = getWindowStop( bounds ); - initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); + int expandedStart = getWindowStart( bounds ); + int expandedStop = getWindowStop( bounds ); + initializeReferenceSequence(genomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); } } @@ -123,12 +123,12 @@ public class LocusReferenceView extends ReferenceView { if ( loc.getContigIndex() != bounds.getContigIndex() ) throw new ReviewedStingException("Illegal attempt to expand reference view bounds to accommodate location on a different contig."); - bounds = GenomeLocParser.createGenomeLoc(bounds.getContigIndex(), + bounds = genomeLocParser.createGenomeLoc(bounds.getContig(), Math.min(bounds.getStart(),loc.getStart()), Math.max(bounds.getStop(),loc.getStop())); - long expandedStart = getWindowStart( bounds ); - long expandedStop = getWindowStop( bounds ); - initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); + int expandedStart = getWindowStart( bounds ); + int expandedStop = getWindowStop( bounds ); + initializeReferenceSequence(genomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop)); } /** @@ -137,8 +137,8 @@ public class LocusReferenceView extends ReferenceView { */ private void initializeBounds(LocusShardDataProvider provider) { if(provider.getLocus() != null) { - long sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength(); - bounds = GenomeLocParser.createGenomeLoc(provider.getLocus().getContig(), + int sequenceLength = reference.getSequenceDictionary().getSequence(provider.getLocus().getContig()).getSequenceLength(); + bounds = genomeLocParser.createGenomeLoc(provider.getLocus().getContig(), Math.max(provider.getLocus().getStart(),1), Math.min(provider.getLocus().getStop(),sequenceLength)); } @@ -155,10 +155,10 @@ public class LocusReferenceView extends ReferenceView { } protected GenomeLoc trimToBounds(GenomeLoc l) { - long expandedStart = getWindowStart( bounds ); - long expandedStop = getWindowStop( bounds ); - if ( l.getStart() < expandedStart ) l = GenomeLocParser.setStart(l, expandedStart); - if ( l.getStop() > expandedStop ) l = GenomeLocParser.setStop(l, expandedStop); + int expandedStart = getWindowStart( bounds ); + int expandedStop = getWindowStop( bounds ); + if ( l.getStart() < expandedStart ) l = genomeLocParser.setStart(l, expandedStart); + if ( l.getStop() > expandedStop ) l = genomeLocParser.setStop(l, expandedStop); return l; } @@ -186,7 +186,7 @@ public class LocusReferenceView extends ReferenceView { public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) { //validateLocation( genomeLoc ); - GenomeLoc window = GenomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) ); + GenomeLoc window = genomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) ); int refStart = -1; if (bounds != null) { @@ -200,7 +200,7 @@ public class LocusReferenceView extends ReferenceView { } int len = (int)window.size(); - return new ReferenceContext( genomeLoc, window, new Provider(refStart, len)); + return new ReferenceContext( genomeLocParser, genomeLoc, window, new Provider(refStart, len)); } /** @@ -228,7 +228,7 @@ public class LocusReferenceView extends ReferenceView { * @param locus The locus to expand. * @return The expanded window. */ - private long getWindowStart( GenomeLoc locus ) { + private int getWindowStart( GenomeLoc locus ) { // If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can. if(locus.getStart() < 1) return 1; // if(locus.getStart() < 1) return locus.getStart(); @@ -240,9 +240,9 @@ public class LocusReferenceView extends ReferenceView { * @param locus The locus to expand. * @return The expanded window. */ - private long getWindowStop( GenomeLoc locus ) { + private int getWindowStop( GenomeLoc locus ) { // If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can. - long sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength(); + int sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength(); if(locus.getStop() > sequenceLength) return sequenceLength; return Math.min( locus.getStop() + windowStop, sequenceLength ); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java index 0fad873ce..75d03856b 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.ReadProperties; import java.util.Collection; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * Presents data sharded by locus to the traversal engine. @@ -22,6 +23,11 @@ public class LocusShardDataProvider extends ShardDataProvider { */ private final ReadProperties sourceInfo; + /** + * The parser, used to create and build new GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + /** * The particular locus for which data is provided. Should be contained within shard.getGenomeLocs(). */ @@ -37,9 +43,10 @@ public class LocusShardDataProvider extends ShardDataProvider { * @param shard The chunk of data over which traversals happen. * @param reference A getter for a section of the reference. */ - public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { - super(shard,reference,rods); + public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection rods) { + super(shard,genomeLocParser,reference,rods); this.sourceInfo = sourceInfo; + this.genomeLocParser = genomeLocParser; this.locus = locus; this.locusIterator = locusIterator; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index 9814217eb..84b5a5463 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Arrays; import java.util.Collection; @@ -33,6 +34,11 @@ public abstract class LocusView extends LocusIterator implements View { */ protected GenomeLoc locus; + /** + * The GenomeLocParser, used to create new genome locs. + */ + protected GenomeLocParser genomeLocParser; + /** * Source info for this view. Informs the class about downsampling requirements. */ @@ -53,6 +59,7 @@ public abstract class LocusView extends LocusIterator implements View { this.locus = provider.getLocus(); this.sourceInfo = provider.getSourceInfo(); + this.genomeLocParser = provider.getGenomeLocParser(); this.loci = provider.getLocusIterator(); seedNextLocus(); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java index 034472676..f4adcefb9 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -76,7 +76,7 @@ public class ReadBasedReferenceOrderedView implements View { /** stores a window of data, dropping RODs if we've passed the new reads start point. */ class WindowedData { // the queue of possibly in-frame RODs; RODs are removed as soon as they are out of scope - private final TreeMap mapping = new TreeMap(); + private final TreeMap mapping = new TreeMap(); // our current location from the last read we processed private GenomeLoc currentLoc; @@ -109,16 +109,16 @@ class WindowedData { */ private void getStates(ShardDataProvider provider, SAMRecord rec) { - long stop = Integer.MAX_VALUE; + int stop = Integer.MAX_VALUE; // figure out the appropriate alignment stop if (provider.hasReference()) { stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength(); } - + // calculate the range of positions we need to look at - GenomeLoc range = GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), - rec.getAlignmentStart(), - stop); + GenomeLoc range = provider.getGenomeLocParser().createGenomeLoc(rec.getReferenceName(), + rec.getAlignmentStart(), + stop); states = new ArrayList(); if (provider != null && provider.getReferenceOrderedData() != null) for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) @@ -144,7 +144,7 @@ class WindowedData { */ public ReadMetaDataTracker getTracker(SAMRecord rec) { updatePosition(rec); - return new ReadMetaDataTracker(rec, mapping); + return new ReadMetaDataTracker(provider.getGenomeLocParser(), rec, mapping); } /** @@ -154,7 +154,7 @@ class WindowedData { */ private void updatePosition(SAMRecord rec) { if (states == null) getStates(this.provider, rec); - currentLoc = GenomeLocParser.createGenomeLoc(rec); + currentLoc = provider.getGenomeLocParser().createGenomeLoc(rec); // flush the queue looking for records we've passed over while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart()) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java index 98582ce41..d2c097f5d 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceView.java @@ -67,10 +67,10 @@ public class ReadReferenceView extends ReferenceView { } public ReferenceContext getReferenceContext( SAMRecord read ) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(read); + GenomeLoc loc = genomeLocParser.createGenomeLoc(read); // byte[] bases = super.getReferenceBases(loc); // return new ReferenceContext( loc, loc, bases ); - return new ReferenceContext( loc, loc, getReferenceBasesProvider(loc) ); + return new ReferenceContext( genomeLocParser, loc, loc, getReferenceBasesProvider(loc) ); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java index b6490794b..04120d4e5 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import java.util.Collection; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * Present data sharded by read to a traversal engine. @@ -26,8 +27,8 @@ public class ReadShardDataProvider extends ShardDataProvider { * @param shard The chunk of data over which traversals happen. * @param reference A getter for a section of the reference. */ - public ReadShardDataProvider(Shard shard, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection rods) { - super(shard,reference,rods); + public ReadShardDataProvider(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator reads, IndexedFastaSequenceFile reference, Collection rods) { + super(shard,genomeLocParser,reference,rods); this.reads = reads; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java index 43d122ffb..d6c938f36 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceView.java @@ -28,6 +28,11 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; * A view into the reference backing this shard. */ public class ReferenceView implements View { + /** + * The parser, used to create and parse GenomeLocs. + */ + protected final GenomeLocParser genomeLocParser; + /** * The source of reference data. */ @@ -38,6 +43,7 @@ public class ReferenceView implements View { * @param provider */ public ReferenceView( ShardDataProvider provider ) { + this.genomeLocParser = provider.getGenomeLocParser(); this.reference = provider.getReference(); } @@ -68,7 +74,7 @@ public class ReferenceView implements View { } protected byte[] getReferenceBases( SAMRecord read ) { - return getReferenceBases(GenomeLocParser.createGenomeLoc(read)); + return getReferenceBases(genomeLocParser.createGenomeLoc(read)); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 186a7d335..8a67a5db7 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -80,7 +80,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { // the iterator to immediately before it, so that it can be added to the merging iterator primed for // next() to return the first real ROD in this shard LocationAwareSeekableRODIterator it = dataSource.seek(provider.getShard()); - it.seekForward(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart()-1)); + it.seekForward(genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart()-1)); states.add(new ReferenceOrderedDataState(dataSource,it)); @@ -128,7 +128,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { tracker = createTracker(allTracksHere); GenomeLoc rodSite = datum.getLocation(); - GenomeLoc site = GenomeLocParser.createGenomeLoc( rodSite.getContigIndex(), rodSite.getStart(), rodSite.getStart()); + GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart()); if ( DEBUG ) System.out.printf("rodLocusView.next() is at %s%n", site); @@ -167,7 +167,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { */ private long getSkippedBases( GenomeLoc currentPos ) { // the minus - is because if lastLoc == null, you haven't yet seen anything in this interval, so it should also be counted as skipped - Long compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop(); + Integer compStop = lastLoc == null ? locus.getStart() - 1 : lastLoc.getStop(); long skippedBases = currentPos.getStart() - compStop - 1; if ( skippedBases < -1 ) { // minus 1 value is ok @@ -182,7 +182,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { * @return */ public GenomeLoc getLocOneBeyondShard() { - return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStop()+1); + return genomeLocParser.createGenomeLoc(locus.getContig(),locus.getStop()+1); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index cb912ccf9..e6f6c9879 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; @@ -37,6 +38,11 @@ public abstract class ShardDataProvider { */ private final Shard shard; + /** + * The parser, used to create and build new GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + /** * Provider of reference data for this particular shard. */ @@ -47,6 +53,14 @@ public abstract class ShardDataProvider { */ private final Collection referenceOrderedData; + /** + * Returns the GenomeLocParser associated with this traversal. + * @return The associated parser. + */ + public GenomeLocParser getGenomeLocParser() { + return genomeLocParser; + } + /** * Retrieves the shard associated with this data provider. * @return The shard associated with this data provider. @@ -86,8 +100,9 @@ public abstract class ShardDataProvider { * @param shard The chunk of data over which traversals happen. * @param reference A getter for a section of the reference. */ - public ShardDataProvider(Shard shard,IndexedFastaSequenceFile reference,Collection rods) { + public ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser,IndexedFastaSequenceFile reference,Collection rods) { this.shard = shard; + this.genomeLocParser = genomeLocParser; this.reference = reference; this.referenceOrderedData = rods; } @@ -96,8 +111,8 @@ public abstract class ShardDataProvider { * Skeletal, package protected constructor for unit tests which require a ShardDataProvider. * @param shard the shard */ - ShardDataProvider(Shard shard) { - this(shard,null,null); + ShardDataProvider(Shard shard,GenomeLocParser genomeLocParser) { + this(shard,genomeLocParser,null,null); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java index 0c4c08480..5cb41182c 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java @@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; public class IntervalSharder { private static Logger logger = Logger.getLogger(IntervalSharder.class); - public static Iterator shardIntervals(final SAMDataSource dataSource, final List loci) { + public static Iterator shardIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) { return new FilePointerIterator(dataSource,loci); } @@ -55,11 +55,13 @@ public class IntervalSharder { */ private static class FilePointerIterator implements Iterator { final SAMDataSource dataSource; + final GenomeLocSortedSet loci; final PeekableIterator locusIterator; final Queue cachedFilePointers = new LinkedList(); - public FilePointerIterator(final SAMDataSource dataSource, final List loci) { + public FilePointerIterator(final SAMDataSource dataSource, final GenomeLocSortedSet loci) { this.dataSource = dataSource; + this.loci = loci; locusIterator = new PeekableIterator(loci.iterator()); advance(); } @@ -82,7 +84,7 @@ public class IntervalSharder { } private void advance() { - List nextBatch = new ArrayList(); + GenomeLocSortedSet nextBatch = new GenomeLocSortedSet(loci.getGenomeLocParser()); String contig = null; while(locusIterator.hasNext() && nextBatch.isEmpty()) { @@ -99,7 +101,7 @@ public class IntervalSharder { } } - private static List shardIntervalsOnContig(final SAMDataSource dataSource, final String contig, final List loci) { + private static List shardIntervalsOnContig(final SAMDataSource dataSource, final String contig, final GenomeLocSortedSet loci) { // Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin. List filePointers = new ArrayList(); FilePointer lastFilePointer = null; @@ -171,7 +173,7 @@ public class IntervalSharder { final int regionStop = Math.min(locationStop,binStart-1); - GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop); + GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop); lastFilePointer = new FilePointer(subset); locationStart = regionStop + 1; @@ -184,7 +186,7 @@ public class IntervalSharder { lastBAMOverlap = null; } - GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,locationStop); + GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,locationStop); filePointers.add(new FilePointer(subset)); locationStart = locationStop + 1; @@ -195,7 +197,7 @@ public class IntervalSharder { // The start of the region overlaps the bin. Add the overlapping subset. final int regionStop = Math.min(locationStop,binStop); - lastFilePointer.addLocation(GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop)); + lastFilePointer.addLocation(loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop)); locationStart = regionStop + 1; } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java index f5003ddb9..3313b992e 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java @@ -58,14 +58,14 @@ public class LocusShardStrategy implements ShardStrategy { * @param reads Data source from which to load index data. * @param locations List of locations for which to load data. */ - LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocSortedSet locations) { + LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) { this.reads = reads; if(!reads.isEmpty()) { - List intervals; + GenomeLocSortedSet intervals; if(locations == null) { // If no locations were passed in, shard the entire BAM file. SAMFileHeader header = reads.getHeader(); - intervals = new ArrayList(); + intervals = new GenomeLocSortedSet(genomeLocParser); for(SAMSequenceRecord readsSequenceRecord: header.getSequenceDictionary().getSequences()) { // Check this sequence against the reference sequence dictionary. @@ -73,12 +73,12 @@ public class LocusShardStrategy implements ShardStrategy { SAMSequenceRecord refSequenceRecord = reference.getSequenceDictionary().getSequence(readsSequenceRecord.getSequenceName()); if(refSequenceRecord != null) { final int length = Math.min(readsSequenceRecord.getSequenceLength(),refSequenceRecord.getSequenceLength()); - intervals.add(GenomeLocParser.createGenomeLoc(readsSequenceRecord.getSequenceName(),1,length)); + intervals.add(genomeLocParser.createGenomeLoc(readsSequenceRecord.getSequenceName(),1,length)); } } } else - intervals = locations.toList(); + intervals = locations; this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals); } @@ -89,15 +89,15 @@ public class LocusShardStrategy implements ShardStrategy { for(SAMSequenceRecord refSequenceRecord: reference.getSequenceDictionary().getSequences()) { for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) { final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength()); - filePointers.add(new FilePointer(GenomeLocParser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop))); + filePointers.add(new FilePointer(genomeLocParser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop))); } } } else { for(GenomeLoc interval: locations) { while(interval.size() > maxShardSize) { - filePointers.add(new FilePointer(GenomeLocParser.createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1))); - interval = GenomeLocParser.createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop()); + filePointers.add(new FilePointer(locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1))); + interval = locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop()); } filePointers.add(new FilePointer(interval)); } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java index 18517d1cd..f17441a35 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java @@ -90,7 +90,7 @@ public class ReadShardStrategy implements ShardStrategy { this.locations = locations; if(locations != null) - filePointerIterator = IntervalSharder.shardIntervals(this.dataSource,locations.toList()); + filePointerIterator = IntervalSharder.shardIntervals(this.dataSource,locations); else filePointerIterator = filePointers.iterator(); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java index 2bc7a3207..9af08c22e 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards; import net.sf.samtools.SAMSequenceDictionary; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; @@ -50,8 +51,8 @@ public class ShardStrategyFactory { * @param startingSize the starting size * @return a shard strategy capable of dividing input data into shards. */ - static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) { - return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, -1L); + static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser) { + return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, -1L); } /** @@ -64,10 +65,10 @@ public class ShardStrategyFactory { * @param startingSize the starting size * @return a shard strategy capable of dividing input data into shards. */ - static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, long limitByCount) { + static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, long limitByCount) { switch (strat) { case LOCUS_EXPERIMENTAL: - return new LocusShardStrategy(readsDataSource,referenceDataSource,null); + return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null); case READS_EXPERIMENTAL: return new ReadShardStrategy(readsDataSource,null); default: @@ -87,8 +88,8 @@ public class ShardStrategyFactory { * @param startingSize the starting size * @return a shard strategy capable of dividing input data into shards. */ - static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst) { - return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, lst, -1l); + static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst) { + return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, lst, -1l); } @@ -102,10 +103,10 @@ public class ShardStrategyFactory { * @param startingSize the starting size * @return A strategy for shattering this data. */ - static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst, long limitDataCount) { + static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst, long limitDataCount) { switch (strat) { case LOCUS_EXPERIMENTAL: - return new LocusShardStrategy(readsDataSource,referenceDataSource,lst); + return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst); case READS_EXPERIMENTAL: return new ReadShardStrategy(readsDataSource,lst); default: diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index b6373738a..c652f32cf 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.FeatureSource; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; @@ -10,6 +11,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -47,12 +49,12 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { * Create a new reference-ordered data source. * @param rod the reference ordered data */ - public ReferenceOrderedDataSource( RMDTrack rod, boolean flashbackData ) { + public ReferenceOrderedDataSource(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) { this.rod = rod; if (rod.supportsQuery()) - iteratorPool = new ReferenceOrderedQueryDataPool(new RMDTrackBuilder(),rod); + iteratorPool = new ReferenceOrderedQueryDataPool(sequenceDictionary,genomeLocParser,new RMDTrackBuilder(),rod); else - iteratorPool = new ReferenceOrderedDataPool( rod, flashbackData ); + iteratorPool = new ReferenceOrderedDataPool(sequenceDictionary,genomeLocParser,rod, flashbackData ); } /** @@ -110,7 +112,8 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { class ReferenceOrderedDataPool extends ResourcePool { private final RMDTrack rod; boolean flashbackData = false; - public ReferenceOrderedDataPool( RMDTrack rod, boolean flashbackData ) { + public ReferenceOrderedDataPool( SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser, RMDTrack rod, boolean flashbackData ) { + super(sequenceDictionary,genomeLocParser); this.flashbackData = flashbackData; this.rod = rod; } @@ -121,7 +124,7 @@ class ReferenceOrderedDataPool extends ResourcePool resources ) { if(segment instanceof MappedStreamSegment) { - GenomeLoc position = ((MappedStreamSegment)segment).getFirstLocation(); + GenomeLoc position = ((MappedStreamSegment)segment).getLocation(); for( LocationAwareSeekableRODIterator RODIterator : resources ) { @@ -178,14 +181,14 @@ class ReferenceOrderedDataPool extends ResourcePool { - // the reference-ordered data itself. private final RMDTrack rod; // our tribble track builder private final RMDTrackBuilder builder; - public ReferenceOrderedQueryDataPool( RMDTrackBuilder builder, RMDTrack rod ) { + public ReferenceOrderedQueryDataPool( SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser, RMDTrackBuilder builder, RMDTrack rod ) { + super(sequenceDictionary,genomeLocParser); this.rod = rod; this.builder = builder; // a little bit of a hack, but it saves us from re-reading the index from the file @@ -209,9 +212,9 @@ class ReferenceOrderedQueryDataPool extends ResourcePool { + /** + * Sequence dictionary. + */ + protected final SAMSequenceDictionary sequenceDictionary; + + /** + * Builder/parser for GenomeLocs. + */ + protected final GenomeLocParser genomeLocParser; + /** * All iterators of this reference-ordered data. */ @@ -41,6 +52,11 @@ abstract class ResourcePool { */ private Map resourceAssignments = new HashMap(); + protected ResourcePool(SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser) { + this.sequenceDictionary = sequenceDictionary; + this.genomeLocParser = genomeLocParser; + } + /** * Get an iterator whose position is before the specified location. Create a new one if none exists. * @param segment Target position for the iterator. @@ -180,36 +196,11 @@ class MappedStreamSegment implements DataStreamSegment { * Retrieves the first location covered by a mapped stream segment. * @return Location of the first base in this segment. */ - public GenomeLoc getFirstLocation() { - return GenomeLocParser.createGenomeLoc(locus.getContigIndex(),locus.getStart()); + public GenomeLoc getLocation() { + return locus; } public MappedStreamSegment(GenomeLoc locus) { this.locus = locus; } } - -/** - * Models a position within the unmapped reads in a stream of GATK input data. - */ -class UnmappedStreamSegment implements DataStreamSegment { - /** - * Where does this region start, given 0 = the position of the first unmapped read. - */ - public final long position; - - /** - * How many reads wide is this region? This size is generally treated as an upper bound. - */ - public final long size; - - /** - * Create a new target location in an unmapped read stream. - * @param position The 0-based index into the unmapped reads. Position 0 represents the first unmapped read. - * @param size the size of the segment. - */ - public UnmappedStreamSegment( long position, long size ) { - this.position = position; - this.size = size; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 1c063230d..61f137fb4 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -43,6 +43,7 @@ import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -63,17 +64,22 @@ public class SAMDataSource implements SimpleDataSource { /** * Runtime metrics of reads filtered, etc. */ - protected final ReadMetrics readMetrics; + private final ReadMetrics readMetrics; + + /** + * Tools for parsing GenomeLocs, for verifying BAM ordering against general ordering. + */ + private final GenomeLocParser genomeLocParser; /** * Identifiers for the readers driving this data source. */ - protected final List readerIDs; + private final List readerIDs; /** * How strict are the readers driving this data source. */ - protected final SAMFileReader.ValidationStringency validationStringency; + private final SAMFileReader.ValidationStringency validationStringency; /** * How far along is each reader? @@ -113,9 +119,10 @@ public class SAMDataSource implements SimpleDataSource { * Create a new SAM data source given the supplied read metadata. * @param samFiles list of reads files. */ - public SAMDataSource(List samFiles) { + public SAMDataSource(List samFiles,GenomeLocParser genomeLocParser) { this( samFiles, + genomeLocParser, false, SAMFileReader.ValidationStringency.STRICT, null, @@ -145,6 +152,7 @@ public class SAMDataSource implements SimpleDataSource { */ public SAMDataSource( List samFiles, + GenomeLocParser genomeLocParser, boolean useOriginalBaseQualities, SAMFileReader.ValidationStringency strictness, Integer readBufferSize, @@ -155,6 +163,7 @@ public class SAMDataSource implements SimpleDataSource { boolean generateExtendedEvents ) { this.readMetrics = new ReadMetrics(); + this.genomeLocParser = genomeLocParser; readerIDs = samFiles; validationStringency = strictness; @@ -520,7 +529,7 @@ public class SAMDataSource implements SimpleDataSource { // unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification, // verify the read ordering by applying a sort order iterator if (!noValidationOfReadOrder && enableVerification) - wrappedIterator = new VerifyingSamIterator(wrappedIterator); + wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator); wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters)); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 1da0cdeed..4290b4960 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -53,9 +53,9 @@ public class LinearMicroScheduler extends MicroScheduler { // New experimental code for managing locus intervals. if(shard.getShardType() == Shard.ShardType.LOCUS) { LocusWalker lWalker = (LocusWalker)walker; - WindowMaker windowMaker = new WindowMaker(shard, getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards()); + WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards()); for(WindowMaker.WindowMakerIterator iterator: windowMaker) { - ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,reference,rods); + ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); accumulator.accumulate(dataProvider,result); dataProvider.close(); @@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler { windowMaker.close(); } else { - ShardDataProvider dataProvider = new ReadShardDataProvider(shard,getReadIterator(shard),reference,rods); + ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); accumulator.accumulate(dataProvider,result); dataProvider.close(); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 74c5ff6f6..de7623de6 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -158,6 +158,12 @@ public abstract class MicroScheduler { traversalEngine.printOnTraversalDone(metrics); } + /** + * Gets the engine that created this microscheduler. + * @return The engine owning this microscheduler. + */ + public GenomeAnalysisEngine getEngine() { return engine; } + /** * Returns data source maintained by this scheduler * @return diff --git a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index 62788737c..cbd3e822d 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -61,11 +61,11 @@ public class ShardTraverser implements Callable { Object accumulator = walker.reduceInit(); LocusWalker lWalker = (LocusWalker)walker; - WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards()); + WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards()); ShardDataProvider dataProvider = null; for(WindowMaker.WindowMakerIterator iterator: windowMaker) { - dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods); + dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods); accumulator = traversalEngine.traverse( walker, dataProvider, accumulator ); dataProvider.close(); } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index b8afd937b..5172079af 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -9,6 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; import net.sf.picard.util.PeekableIterator; +import org.broadinstitute.sting.utils.GenomeLocParser; /** * Buffer shards of data which may or may not contain multiple loci into @@ -51,11 +52,11 @@ public class WindowMaker implements Iterable, I * @param intervals The set of intervals over which to traverse. * @param discards a filter at that indicates read position relative to some locus? */ - public WindowMaker(Shard shard, StingSAMIterator iterator, List intervals, List discards ) { + public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List intervals, List discards ) { this.sourceInfo = shard.getReadProperties(); this.readIterator = iterator; - LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,discards); + LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,discards); this.sourceIterator = new PeekableIterator(locusIterator); this.intervalIterator = intervals.size()>0 ? new PeekableIterator(intervals.iterator()) : null; diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java index a078d9d4e..bc45cc75b 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/GenomeLocusIterator.java @@ -22,6 +22,11 @@ import java.util.Iterator; * Iterates through all of the loci provided in the reference. */ public class GenomeLocusIterator implements Iterator { + /** + * Builds individual loci. + */ + private GenomeLocParser parser; + /** * The entire region over which we're iterating. */ @@ -38,9 +43,10 @@ public class GenomeLocusIterator implements Iterator { * @param completeLocus Data provider to use as a backing source. * Provider must have a reference (hasReference() == true). */ - public GenomeLocusIterator( GenomeLoc completeLocus ) { + public GenomeLocusIterator( GenomeLocParser parser, GenomeLoc completeLocus ) { + this.parser = parser; this.completeLocus = completeLocus; - this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart()); + this.currentLocus = parser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart()); } /** @@ -59,7 +65,7 @@ public class GenomeLocusIterator implements Iterator { if( !hasNext() ) throw new NoSuchElementException("No elements remaining in bounded reference region."); GenomeLoc toReturn = (GenomeLoc)currentLocus.clone(); - currentLocus = GenomeLocParser.incPos(currentLocus); + currentLocus = parser.incPos(currentLocus); return toReturn; } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index f8e8cfe58..659884526 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -61,6 +61,11 @@ public class LocusIteratorByState extends LocusIterator { // ----------------------------------------------------------------------------------------------------------------- private boolean hasExtendedEvents = false; // will be set to true if at least one read had an indel right before the current position + /** + * Used to create new GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + private final Collection sampleNames = new ArrayList(); private final ReadStateManager readStates; @@ -129,8 +134,8 @@ public class LocusIteratorByState extends LocusIterator { public int getGenomePosition() { return read.getAlignmentStart() + getGenomeOffset(); } - public GenomeLoc getLocation() { - return GenomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); + public GenomeLoc getLocation(GenomeLocParser genomeLocParser) { + return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition()); } public CigarOperator getCurrentCigarOperator() { @@ -268,12 +273,13 @@ public class LocusIteratorByState extends LocusIterator { // constructors and other basic operations // // ----------------------------------------------------------------------------------------------------------------- - public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation ) { - this(samIterator, readInformation, NO_FILTERS); + public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser ) { + this(samIterator, readInformation, genomeLocParser, NO_FILTERS); } - public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, List filters ) { + public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, List filters ) { this.readInfo = readInformation; + this.genomeLocParser = genomeLocParser; this.filters = filters; // Aggregate all sample names. sampleNames.addAll(SampleUtils.getSAMFileSamples(readInfo.getHeader())); @@ -310,7 +316,7 @@ public class LocusIteratorByState extends LocusIterator { } private GenomeLoc getLocation() { - return readStates.isEmpty() ? null : readStates.getFirst().getLocation(); + return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser); } // ----------------------------------------------------------------------------------------------------------------- @@ -354,7 +360,7 @@ public class LocusIteratorByState extends LocusIterator { SAMRecordState our1stState = readStates.getFirst(); // get current location on the reference and decrement it by 1: the indels we just stepped over // are associated with the *previous* reference base - GenomeLoc loc = GenomeLocParser.incPos(our1stState.getLocation(),-1); + GenomeLoc loc = genomeLocParser.incPos(our1stState.getLocation(genomeLocParser),-1); boolean hasBeenSampled = false; for(String sampleName: sampleNames) { diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java index 804055140..e5a561176 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java @@ -16,11 +16,13 @@ import java.util.Iterator; * To change this template use File | Settings | File Templates. */ public class VerifyingSamIterator implements StingSAMIterator { + private GenomeLocParser genomeLocParser; StingSAMIterator it; SAMRecord last = null; boolean checkOrderP = true; - public VerifyingSamIterator(StingSAMIterator it) { + public VerifyingSamIterator(GenomeLocParser genomeLocParser,StingSAMIterator it) { + this.genomeLocParser = genomeLocParser; this.it = it; } @@ -35,27 +37,19 @@ public class VerifyingSamIterator implements StingSAMIterator { return cur; } - /** - * If true, enables ordered checking of the reads in the file. By default this is enabled. - * @param checkP If true, sam records will be checked to insure they come in order - */ - public void setCheckOrderP( boolean checkP ) { - checkOrderP = checkP; - } - - public void verifyRecord( final SAMRecord last, final SAMRecord cur ) { + private void verifyRecord( final SAMRecord last, final SAMRecord cur ) { if ( checkOrderP && isOutOfOrder(last, cur) ) { this.last = null; throw new RuntimeIOException(String.format("Reads are out of order:%nlast:%n%s%ncurrent:%n%s%n", last.format(), cur.format()) ); } } - public static boolean isOutOfOrder( final SAMRecord last, final SAMRecord cur ) { + private boolean isOutOfOrder( final SAMRecord last, final SAMRecord cur ) { if ( last == null || cur.getReadUnmappedFlag() ) return false; else { - GenomeLoc lastLoc = GenomeLocParser.createGenomeLoc( last ); - GenomeLoc curLoc = GenomeLocParser.createGenomeLoc( cur ); + GenomeLoc lastLoc = genomeLocParser.createGenomeLoc( last ); + GenomeLoc curLoc = genomeLocParser.createGenomeLoc( cur ); return curLoc.compareTo(lastLoc) == -1; } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java index 7a497b9a9..090022269 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java @@ -43,10 +43,15 @@ import java.util.TreeMap; * a read-based meta data tracker */ public class ReadMetaDataTracker { + /** + * The parser, used to create new GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + private final SAMRecord record; // the buffer of positions and RODs we've stored - private final TreeMap mapping; + private final TreeMap mapping; /** * create a read meta data tracker, given the read and a queue of RODatum positions @@ -54,7 +59,8 @@ public class ReadMetaDataTracker { * @param record the read to create offset from * @param mapping the mapping of reference ordered datum */ - public ReadMetaDataTracker(SAMRecord record, TreeMap mapping) { + public ReadMetaDataTracker(GenomeLocParser genomeLocParser, SAMRecord record, TreeMap mapping) { + this.genomeLocParser = genomeLocParser; this.record = record; this.mapping = mapping; } @@ -69,13 +75,13 @@ public class ReadMetaDataTracker { * * @return a mapping from the position in the read to the reference ordered datum */ - private Map> createReadAlignment(SAMRecord record, TreeMap queue, Class cl, String name) { + private Map> createReadAlignment(SAMRecord record, TreeMap queue, Class cl, String name) { if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified"); - Map> ret = new LinkedHashMap>(); - GenomeLoc location = GenomeLocParser.createGenomeLoc(record); + Map> ret = new LinkedHashMap>(); + GenomeLoc location = genomeLocParser.createGenomeLoc(record); int length = record.getReadLength(); - for (Long loc : queue.keySet()) { - Long position = loc - location.getStart(); + for (Integer loc : queue.keySet()) { + Integer position = loc - location.getStart(); if (position >= 0 && position < length) { Collection set; if (cl != null) @@ -95,11 +101,11 @@ public class ReadMetaDataTracker { * * @return a mapping from the position in the read to the reference ordered datum */ - private Map> createGenomeLocAlignment(SAMRecord record, TreeMap mapping, Class cl, String name) { - Map> ret = new LinkedHashMap>(); + private Map> createGenomeLocAlignment(SAMRecord record, TreeMap mapping, Class cl, String name) { + Map> ret = new LinkedHashMap>(); int start = record.getAlignmentStart(); int stop = record.getAlignmentEnd(); - for (Long location : mapping.keySet()) { + for (Integer location : mapping.keySet()) { if (location >= start && location <= stop) if (cl != null) ret.put(location, mapping.get(location).getSet(cl)); @@ -114,7 +120,7 @@ public class ReadMetaDataTracker { * * @return a mapping of read offset to ROD(s) */ - public Map> getReadOffsetMapping() { + public Map> getReadOffsetMapping() { return createReadAlignment(record, mapping, null, null); } @@ -123,7 +129,7 @@ public class ReadMetaDataTracker { * * @return a mapping of genome loc position to ROD(s) */ - public Map> getContigOffsetMapping() { + public Map> getContigOffsetMapping() { return createGenomeLocAlignment(record, mapping, null, null); } @@ -132,7 +138,7 @@ public class ReadMetaDataTracker { * * @return a mapping of read offset to ROD(s) */ - public Map> getReadOffsetMapping(String name) { + public Map> getReadOffsetMapping(String name) { return createReadAlignment(record, mapping, null, name); } @@ -141,7 +147,7 @@ public class ReadMetaDataTracker { * * @return a mapping of genome loc position to ROD(s) */ - public Map> getContigOffsetMapping(String name) { + public Map> getContigOffsetMapping(String name) { return createGenomeLocAlignment(record, mapping, null, name); } @@ -150,7 +156,7 @@ public class ReadMetaDataTracker { * * @return a mapping of read offset to ROD(s) */ - public Map> getReadOffsetMapping(Class cl) { + public Map> getReadOffsetMapping(Class cl) { return createReadAlignment(record, mapping, cl, null); } @@ -159,7 +165,7 @@ public class ReadMetaDataTracker { * * @return a mapping of genome loc position to ROD(s) */ - public Map> getContigOffsetMapping(Class cl) { + public Map> getContigOffsetMapping(Class cl) { return createGenomeLocAlignment(record, mapping, cl, null); } } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTestUtils.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java similarity index 58% rename from java/test/org/broadinstitute/sting/utils/GenomeLocParserTestUtils.java rename to java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java index 89b527a38..b4427c228 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocParserTestUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceDependentFeatureCodec.java @@ -22,18 +22,21 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils; +package org.broadinstitute.sting.gatk.refdata; + +import org.broad.tribble.FeatureCodec; +import org.broadinstitute.sting.utils.GenomeLocParser; /** - * A suite of utilities for working with the GenomeLocParser - * in the context of the sequence dictionary. + * An interface marking that a given Tribble feature/codec is actually dependent on context within the + * reference, rather than having a dependency only on the contig, start, and stop of the given feature. + * A HACK. Tribble should contain all the information in needs to decode the unqualified position of + * a feature. */ -public class GenomeLocParserTestUtils { +public interface ReferenceDependentFeatureCodec extends FeatureCodec { /** - * Clear out the sequence dictionary associated with - * the genomeloc creator. + * Sets the appropriate GenomeLocParser, providing additional context when decoding larger and more variable features. + * @param genomeLocParser The parser to supply. */ - public static void clearSequenceDictionary() { - GenomeLocParser.clearRefContigOrdering(); - } + public void setGenomeLocParser(GenomeLocParser genomeLocParser); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java index c995f53bc..96f78ad09 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.refdata; +import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; @@ -38,21 +39,26 @@ import java.util.List; * To change this template use File | Settings | File Templates. */ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { + /** + * The parser, used to construct new genome locs. + */ + private final GenomeLocParser parser; + private PushbackIterator it; List records = null; // here we will keep a pile of records overlaping with current position; when we iterate // and step out of record's scope, we purge it from the list String name = null; // name of the ROD track wrapped by this iterator. Will be pulled from underlying iterator. - long curr_position = 0; // where the iterator is currently positioned on the genome - long max_position = 0; // the rightmost stop position of currently loaded records - int curr_contig = -1; // what contig the iterator is currently on + int curr_position = 0; // where the iterator is currently positioned on the genome + int max_position = 0; // the rightmost stop position of currently loaded records + String curr_contig = null; // what contig the iterator is currently on boolean next_is_allowed = true; // see discussion below. next() is illegal after seek-forward queries of length > 1 // the stop position of the last query. We can query only in forward direction ("seek forward"); // it is not only the start position of every successive query that can not be before the start // of the previous one (curr_start), but it is also illegal for a query interval to *end* before // the end of previous query, otherwise we can end up in an inconsistent state - long curr_query_end = -1; + int curr_query_end = -1; // EXAMPLE of inconsistency curr_query_end guards against: // record 1 record 2 @@ -80,7 +86,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { // This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1, // but re-enables next() again after a length-1 query. - public SeekableRODIterator(CloseableIterator it) { + public SeekableRODIterator(SAMSequenceDictionary dictionary,GenomeLocParser parser,CloseableIterator it) { + this.parser = parser; this.it = new PushbackIterator(it); records = new LinkedList(); // the following is a trick: we would like the iterator to know the actual name assigned to @@ -91,6 +98,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { GATKFeature r = null; if (this.it.hasNext()) r = this.it.element(); name = (r==null?null:r.getName()); + + curr_contig = dictionary.getSequence(0).getSequenceName(); } /** @@ -111,14 +120,14 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { // Returns point location (i.e. genome loc of length 1) on the reference, to which this iterator will advance // upon next call to next(). public GenomeLoc peekNextLocation() { - if ( curr_position + 1 <= max_position ) return GenomeLocParser.createGenomeLoc(curr_contig,curr_position+1); + if ( curr_position + 1 <= max_position ) return parser.createGenomeLoc(curr_contig,curr_position+1); // sorry, next reference position is not covered by the RODs we are currently holding. In this case, // the location we will jump to upon next call to next() is the start of the next ROD record that we did // not read yet: if ( it.hasNext() ) { GATKFeature r = it.element(); // peek, do not load! - return GenomeLocParser.createGenomeLoc(r.getLocation().getContigIndex(),r.getLocation().getStart()); + return parser.createGenomeLoc(r.getLocation().getContig(),r.getLocation().getStart()); } return null; // underlying iterator has no more records, there is no next location! } @@ -147,7 +156,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { records.clear(); GATKFeature r = it.next(); // if hasNext() previously returned true, we are guaranteed that this call to reader.next() is safe records.add( r ); - curr_contig = r.getLocation().getContigIndex(); + curr_contig = r.getLocation().getContig(); curr_position = r.getLocation().getStart(); max_position = r.getLocation().getStop(); } @@ -163,11 +172,14 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { it.next(); continue; } - int that_contig = r.getLocation().getContigIndex(); - if ( curr_contig > that_contig ) + + GenomeLoc currentContig = parser.createOverEntireContig(curr_contig); + GenomeLoc thatContig = r.getLocation(); + + if ( currentContig.isPast(thatContig) ) throw new UserException("LocationAwareSeekableRODIterator: contig " +r.getLocation().getContig() + " occurs out of order in track " + r.getName() ); - if ( curr_contig < that_contig ) break; // next record is on a higher contig, we do not need it yet... + if ( currentContig.isBefore(thatContig) ) break; // next record is on a higher contig, we do not need it yet... if ( r.getLocation().getStart() < curr_position ) throw new UserException("LocationAwareSeekableRODIterator: track "+r.getName() + @@ -177,7 +189,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { r = it.next(); // we got here only if we do need next record, time to load it for real - long stop = r.getLocation().getStop(); + int stop = r.getLocation().getStop(); if ( stop < curr_position ) throw new ReviewedStingException("DEBUG: encountered contig that should have been loaded earlier"); // this should never happen if ( stop > max_position ) max_position = stop; // max_position keeps the rightmost stop position across all loaded records records.add(r); @@ -186,7 +198,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { // 'records' and current position are fully updated. Last, we need to set the location of the whole track // (collection of ROD records) to the genomic site we are currently looking at, and return the list - return new RODRecordListImpl(name,records, GenomeLocParser.createGenomeLoc(curr_contig,curr_position)); + return new RODRecordListImpl(name,records, parser.createGenomeLoc(curr_contig,curr_position)); } /** @@ -218,13 +230,13 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { * @return Current ending position of the iterator, or null if no position exists. */ public GenomeLoc position() { - if ( curr_contig < 0 ) return null; + if ( curr_contig == null ) return null; if ( curr_query_end > curr_position ) { // do not attempt to reuse this iterator if the position we need it for lies before the end of last query performed - return GenomeLocParser.createGenomeLoc(curr_contig,curr_query_end,curr_query_end); + return parser.createGenomeLoc(curr_contig,curr_query_end,curr_query_end); } else { - return GenomeLocParser.createGenomeLoc(curr_contig,curr_position); + return parser.createGenomeLoc(curr_contig,curr_position); } } @@ -256,10 +268,11 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { */ public RODRecordList seekForward(GenomeLoc interval) { - if ( interval.getContigIndex() < curr_contig ) + if ( interval.isBefore(parser.createOverEntireContig(curr_contig)) && + !(interval.getStart() == 0 && interval.getStop() == 0 && interval.getContig().equals(curr_contig)) ) // This criteria is syntactic sugar for 'seek to right before curr_contig' throw new ReviewedStingException("Out of order query: query contig "+interval.getContig()+" is located before "+ "the iterator's current contig"); - if ( interval.getContigIndex() == curr_contig ) { + if ( interval.getContig().equals(curr_contig) ) { if ( interval.getStart() < curr_position ) throw new ReviewedStingException("Out of order query: query position "+interval +" is located before "+ "the iterator's current position "+curr_contig + ":" + curr_position); @@ -273,7 +286,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { next_is_allowed = ( curr_position == curr_query_end ); // we can call next() later only if interval length is 1 - if ( interval.getContigIndex() == curr_contig && curr_position <= max_position ) { + if ( interval.getContig().equals(curr_contig) && curr_position <= max_position ) { // some of the intervals we are currently keeping do overlap with the query interval purgeOutOfScopeRecords(); @@ -281,7 +294,7 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { // clean up and get ready for fast-forwarding towards the requested position records.clear(); max_position = -1; - curr_contig = interval.getContigIndex(); + curr_contig = interval.getContig(); } // curr_contig and curr_position are set to where we asked to scroll to @@ -289,10 +302,12 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { while ( it.hasNext() ) { GATKFeature r = it.next(); if ( r == null ) continue; - int that_contig = r.getLocation().getContigIndex(); - if ( curr_contig > that_contig ) continue; // did not reach requested contig yet - if ( curr_contig < that_contig ) { + GenomeLoc currentContig = parser.createOverEntireContig(curr_contig); + GenomeLoc thatContig = r.getLocation(); + + if ( currentContig.isPast(thatContig) ) continue; // did not reach requested contig yet + if ( currentContig.isBefore(thatContig) ) { it.pushback(r); // next record is on the higher contig, we do not need it yet... break; } @@ -340,4 +355,5 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { public void close() { if (this.it != null) ((CloseableIterator)this.it.getUnderlyingIterator()).close(); } + } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 3fc76c8a5..bbcce6677 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -181,7 +181,7 @@ public class VariantContextAdaptors { // add the call to the genotype list, and then use this list to create a VariantContext genotypes.add(call); alleles.add(refAllele); - VariantContext vc = VariantContextUtils.toVC(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes); + VariantContext vc = VariantContextUtils.toVC(name, ref.getGenomeLocParser().createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes); return vc; } else return null; // can't handle anything else diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java index a42e103d6..e87986a2e 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java @@ -33,15 +33,15 @@ import java.util.StringTokenizer; import org.apache.log4j.Logger; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; -public class AnnotatorInputTableCodec implements FeatureCodec { +public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec { private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class); @@ -49,6 +49,19 @@ public class AnnotatorInputTableCodec implements FeatureCodec header; + /** + * The parser to use when resolving genome-wide locations. + */ + private GenomeLocParser genomeLocParser; + + /** + * Set the parser to use when resolving genetic data. + * @param genomeLocParser The supplied parser. + */ + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } + /** * Parses the header. * @@ -80,9 +93,9 @@ public class AnnotatorInputTableCodec implements FeatureCodec { +public class BeagleCodec implements ReferenceDependentFeatureCodec { private String[] header; public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2}; private BeagleReaderType readerType; @@ -52,6 +52,19 @@ public class BeagleCodec implements FeatureCodec { private static final String delimiterRegex = "\\s+"; + /** + * The parser to use when resolving genome-wide locations. + */ + private GenomeLocParser genomeLocParser; + + /** + * Set the parser to use when resolving genetic data. + * @param genomeLocParser The supplied parser. + */ + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } + public Feature decodeLoc(String line) { return decode(line); } @@ -147,17 +160,6 @@ public class BeagleCodec implements FeatureCodec { private static Pattern MARKER_PATTERN = Pattern.compile("(.+):([0-9]+)"); - private static GenomeLoc parseMarkerName(String markerName) { - Matcher m = MARKER_PATTERN.matcher(markerName); - if ( m.matches() ) { - String contig = m.group(1); - long start = Long.valueOf(m.group(2)); - return GenomeLocParser.createGenomeLoc(contig, start, start); - } else { - throw new IllegalArgumentException("Malformatted marker string: " + markerName + " required format is chrN:position"); - } - } - @Override public Class getFeatureType() { return BeagleFeature.class; @@ -175,7 +177,7 @@ public class BeagleCodec implements FeatureCodec { BeagleFeature bglFeature = new BeagleFeature(); - final GenomeLoc loc = GenomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this + final GenomeLoc loc = genomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this //parse the location: common to all readers bglFeature.setChr(loc.getContig()); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java index 9f7c2709f..9e9024e65 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.refdata.features.refseq; import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -13,7 +13,21 @@ import java.util.ArrayList; /** * the ref seq codec */ -public class RefSeqCodec implements FeatureCodec { +public class RefSeqCodec implements ReferenceDependentFeatureCodec { + + /** + * The parser to use when resolving genome-wide locations. + */ + private GenomeLocParser genomeLocParser; + + /** + * Set the parser to use when resolving genetic data. + * @param genomeLocParser The supplied parser. + */ + @Override + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } @Override public Feature decodeLoc(String line) { @@ -21,19 +35,19 @@ public class RefSeqCodec implements FeatureCodec { String fields[] = line.split("\t"); if (fields.length < 3) throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 3 columns, we saw " + fields.length); String contig_name = fields[2]; - return new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])); + return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); } /** Fills this object from a text line in RefSeq (UCSC) text dump file */ @Override - public Feature decode(String line) { + public RefSeqFeature decode(String line) { if (line.startsWith("#")) return null; String fields[] = line.split("\t"); // we reference postion 15 in the split array below, make sure we have at least that many columns if (fields.length < 16) throw new TribbleException("RefSeq (decode) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length); String contig_name = fields[2]; - RefSeqFeature feature = new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])); + RefSeqFeature feature = new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); feature.setTranscript_id(fields[1]); if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1); @@ -41,8 +55,8 @@ public class RefSeqCodec implements FeatureCodec { else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line); - feature.setTranscript_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); - feature.setTranscript_coding_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]))); + feature.setTranscript_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); + feature.setTranscript_coding_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]))); feature.setGene_name(fields[12]); String[] exon_starts = fields[9].split(","); String[] exon_stops = fields[10].split(","); @@ -57,7 +71,7 @@ public class RefSeqCodec implements FeatureCodec { ArrayList exon_frames = new ArrayList(eframes.length); for ( int i = 0 ; i < exon_starts.length ; i++ ) { - exons.add(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); + exons.add(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); exon_frames.add(Integer.decode(eframes[i])); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java index af895b262..19fe064ce 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java @@ -25,15 +25,8 @@ public class RefSeqFeature implements Transcript, Feature { private List exon_frames; private String name; - // store the contig, start, and stop for this record - private final String contig; - private final int start; - private final int stop; - - public RefSeqFeature(String contig, int start, int stop) { - this.contig = contig; - this.start = start; - this.stop = stop; + public RefSeqFeature(GenomeLoc genomeLoc) { + this.transcript_interval = genomeLoc; } /** Returns id of the transcript (RefSeq NM_* id) */ @@ -44,8 +37,6 @@ public class RefSeqFeature implements Transcript, Feature { /** Returns transcript's full genomic interval (includes all exons with UTRs) */ public GenomeLoc getLocation() { - if (transcript_interval == null) - transcript_interval = GenomeLocParser.parseGenomeLoc(contig,start,stop); return transcript_interval; } @@ -270,16 +261,16 @@ public class RefSeqFeature implements Transcript, Feature { @Override public String getChr() { - return contig; + return transcript_interval.getContig(); } @Override public int getStart() { - return start; + return transcript_interval.getStart(); } @Override public int getEnd() { - return stop; + return transcript_interval.getStop(); } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java index c24eb94e6..a930e825f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.refdata.features.table; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -14,12 +15,27 @@ import java.util.*; /** * implementation of a simple table (tab or comma delimited format) input files... more improvements to come */ -public class TableCodec implements FeatureCodec { +public class TableCodec implements ReferenceDependentFeatureCodec { private String delimiterRegex = "\\s+"; private String headerDelimiter = "HEADER"; private String commentDelimiter = "#"; private ArrayList header = new ArrayList(); + /** + * The parser to use when resolving genome-wide locations. + */ + private GenomeLocParser genomeLocParser; + + /** + * Set the parser to use when resolving genetic data. + * @param genomeLocParser The supplied parser. + */ + @Override + public void setGenomeLocParser(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } + + @Override public Feature decodeLoc(String line) { return decode(line); @@ -34,7 +50,7 @@ public class TableCodec implements FeatureCodec { throw new IllegalArgumentException("TableCodec line = " + line + " doesn't appear to be a valid table format"); - return new TableFeature(GenomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header); + return new TableFeature(genomeLocParser.parseGenomeLoc(split[0]),Arrays.asList(split),header); } @Override diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java index 7956e4469..ab11b9333 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrack.java @@ -30,6 +30,7 @@ import org.broad.tribble.FeatureSource; import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; @@ -59,6 +60,11 @@ public class RMDTrack { // our sequence dictionary, which can be null private final SAMSequenceDictionary dictionary; + /** + * Parser to use when creating/parsing GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + // our codec type private final FeatureCodec codec; @@ -101,13 +107,14 @@ public class RMDTrack { * @param dict the sam sequence dictionary * @param codec the feature codec we use to decode this type */ - public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, FeatureCodec codec) { + public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) { this.type = type; this.recordType = codec.getFeatureType(); this.name = name; this.file = file; this.reader = reader; this.dictionary = dict; + this.genomeLocParser = genomeLocParser; this.codec = codec; } @@ -117,7 +124,7 @@ public class RMDTrack { */ public CloseableIterator getIterator() { try { - return new FeatureToGATKFeatureIterator(reader.iterator(),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.iterator(),this.getName()); } catch (IOException e) { throw new UserException.CouldNotReadInputFile(getFile(), "Unable to read from file", e); } @@ -133,19 +140,19 @@ public class RMDTrack { } public CloseableIterator query(GenomeLoc interval) throws IOException { - return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); } public CloseableIterator query(GenomeLoc interval, boolean contained) throws IOException { - return new FeatureToGATKFeatureIterator(reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),(int)interval.getStart(),(int)interval.getStop()),this.getName()); } public CloseableIterator query(String contig, int start, int stop) throws IOException { - return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName()); } public CloseableIterator query(String contig, int start, int stop, boolean contained) throws IOException { - return new FeatureToGATKFeatureIterator(reader.query(contig,start,stop),this.getName()); + return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName()); } public void close() { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java index 98ab07a84..b3bd6ef86 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java @@ -35,11 +35,13 @@ import org.broad.tribble.source.BasicFeatureSource; import org.broad.tribble.source.CachingFeatureSource; import org.broad.tribble.util.LittleEndianOutputStream; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.AbstractGenomeAnalysisEngine; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -80,18 +82,34 @@ public class RMDTrackBuilder extends PluginManager { // private sequence dictionary we use to set our tracks with private SAMSequenceDictionary dict = null; + /** + * Private genome loc parser to use when building out new locs. + */ + private GenomeLocParser genomeLocParser; + /** Create a new plugin manager. */ public RMDTrackBuilder() { super(FeatureCodec.class, "Codecs", "Codec"); } + /** + * Create a new RMDTrackBuilder, with dictionary and genomeLocParser predefined. + * @param dict + * @param genomeLocParser + */ + public RMDTrackBuilder(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) { + super(FeatureCodec.class, "Codecs", "Codec"); + setSequenceDictionary(dict,genomeLocParser); + } + /** * * @param dict the sequence dictionary to use as a reference for Tribble track contig length lookups */ - public void setSequenceDictionary(SAMSequenceDictionary dict) { + public void setSequenceDictionary(SAMSequenceDictionary dict,GenomeLocParser genomeLocParser) { this.dict = dict; - } + this.genomeLocParser = genomeLocParser; + } /** @return a list of all available track types we currently have access to create */ public Map getAvailableTrackNamesAndTypes() { @@ -115,6 +133,7 @@ public class RMDTrackBuilder extends PluginManager { /** * create a RMDTrack of the specified type * + * @param genomeLocParser GenomeLocParser to use, if case track needs additional reference context. * @param targetClass the target class of track * @param name what to call the track * @param inputFile the input file @@ -127,7 +146,7 @@ public class RMDTrackBuilder extends PluginManager { // return a feature reader track Pair pair = createFeatureReader(targetClass, name, inputFile); if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); - return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, createCodec(targetClass, name)); + return new RMDTrack(targetClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(targetClass,name)); } /** @@ -186,6 +205,8 @@ public class RMDTrackBuilder extends PluginManager { FeatureCodec codex = this.createByType(targetClass); if ( codex instanceof NameAwareCodec ) ((NameAwareCodec)codex).setName(name); + if(codex instanceof ReferenceDependentFeatureCodec) + ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); return codex; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java index 649ffeb24..462bf98df 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.refdata.utils; import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.Feature; import org.broad.tribble.iterators.CloseableTribbleIterator; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Iterator; @@ -39,10 +40,12 @@ import java.util.Iterator; * a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs) */ public class FeatureToGATKFeatureIterator implements CloseableIterator { + private final GenomeLocParser genomeLocParser; private final CloseableTribbleIterator iterator; private final String name; - public FeatureToGATKFeatureIterator(CloseableTribbleIterator iter, String name) { + public FeatureToGATKFeatureIterator(GenomeLocParser genomeLocParser,CloseableTribbleIterator iter, String name) { + this.genomeLocParser = genomeLocParser; this.name = name; this.iterator = iter; } @@ -54,7 +57,7 @@ public class FeatureToGATKFeatureIterator implements CloseableIterator { + private GenomeLocParser genomeLocParser; + private PushbackIterator it = null; public enum FORMAT { BED, GATK }; FORMAT myFormat = FORMAT.GATK; - public StringToGenomeLocIteratorAdapter(Iterator it, FORMAT format) { + public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator it, FORMAT format) { + this.genomeLocParser = genomeLocParser; this.it = new PushbackIterator(it); myFormat = format; } - public StringToGenomeLocIteratorAdapter(Iterator it ) { - this(it,FORMAT.GATK); + public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator it ) { + this(genomeLocParser,it,FORMAT.GATK); } public boolean hasNext() { @@ -81,8 +84,8 @@ public class StringToGenomeLocIteratorAdapter implements Iterator { public GenomeLoc next() { - if ( myFormat == FORMAT.GATK ) return GenomeLocParser.parseGenomeInterval( it.next() ); - return BedParser.parseLocation( it.next() ); + if ( myFormat == FORMAT.GATK ) return genomeLocParser.parseGenomeInterval( it.next() ); + return BedParser.parseLocation( genomeLocParser,it.next() ); } public void remove() { diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index d28911d11..41bfcc872 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -48,7 +48,7 @@ public abstract class TraversalEngine,Provide /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); - private GenomeAnalysisEngine engine; + protected GenomeAnalysisEngine engine; /** * Gets the named traversal type associated with the given traversal. @@ -74,7 +74,7 @@ public abstract class TraversalEngine,Provide public void printProgress(Shard shard,GenomeLoc loc) { // A bypass is inserted here for unit testing. // TODO: print metrics outside of the traversal engine to more easily handle cumulative stats. - ReadMetrics cumulativeMetrics = engine != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics(); + ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics(); cumulativeMetrics.incrementMetrics(shard.getReadMetrics()); printProgress(loc, cumulativeMetrics, false); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 2740eae46..89ff688a7 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -59,12 +59,12 @@ public class TraverseDuplicates extends TraversalEngine readsAtLoc(final SAMRecord read, PushbackIterator iter) { - GenomeLoc site = GenomeLocParser.createGenomeLoc(read); + GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read); ArrayList l = new ArrayList(); l.add(read); for (SAMRecord read2 : iter) { - GenomeLoc site2 = GenomeLocParser.createGenomeLoc(read2); + GenomeLoc site2 = engine.getGenomeLocParser().createGenomeLoc(read2); // the next read starts too late if (site2.getStart() != site.getStart()) { @@ -114,7 +114,7 @@ public class TraverseDuplicates extends TraversalEngine findDuplicateReads(SAMRecord read, Set> readSets ) { if ( read.getReadPairedFlag() ) { // paired - final GenomeLoc readMateLoc = GenomeLocParser.createGenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart()); + final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart()); for (List reads : readSets) { SAMRecord key = reads.get(0); @@ -123,7 +123,7 @@ public class TraverseDuplicates extends TraversalEngine Adding read to dups list: %s %d %s vs. %s", read, reads.size(), readMateLoc, keyMateLoc)); @@ -176,7 +176,7 @@ public class TraverseDuplicates extends TraversalEngine> readSets = uniqueReadSets(readsAtLoc(read, iter)); if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size())); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 4dbf158d6..240176f2f 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -57,7 +57,7 @@ public class TraverseLoci extends TraversalEngine,Locu // if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions // associated with the current site), we need to update the location. The updated location still starts // at the current genomic position, but it has to span the length of the longest deletion (if any). - location = GenomeLocParser.setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength()); + location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength()); // it is possible that the new expanded location spans the current shard boundary; the next method ensures // that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 4cb5b4949..670676b48 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -102,7 +102,7 @@ public class TraverseReads extends TraversalEngine,Read sum = walker.reduce(x, sum); } - GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()); + GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart()); printProgress(dataProvider.getShard(),locus); } return sum; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index d2941c637..ab4bf068b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -225,7 +225,7 @@ public class VariantAnnotator extends RodWalker { vcfWriter.add(annotatedVC, ref.getBase()); } else { // check to see if the buffered context is different (in location) this context - if ( indelBufferContext != null && ! VariantContextUtils.getLocation(indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(annotatedVCs.iterator().next())) ) { + if ( indelBufferContext != null && ! VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),annotatedVCs.iterator().next())) ) { for ( VariantContext annotatedVC : indelBufferContext ) vcfWriter.add(annotatedVC, ref.getBase()); indelBufferContext = annotatedVCs; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 7186e8036..e00e7c01b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -165,10 +165,10 @@ public class ProduceBeagleInputWalker extends RodWalker { } public void writeBeagleOutput(VariantContext preferredVC, VariantContext otherVC, boolean isValidationSite, double prior) { - GenomeLoc currentLoc = VariantContextUtils.getLocation(preferredVC); + GenomeLoc currentLoc = VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),preferredVC); beagleWriter.print(String.format("%s:%d ",currentLoc.getContig(),currentLoc.getStart())); if ( beagleGenotypesWriter != null ) { - beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(preferredVC).toString())); + beagleGenotypesWriter.print(String.format("%s ",VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),preferredVC).toString())); } for ( Allele allele : preferredVC.getAlleles() ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java index 21a0f4241..2cf640317 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java @@ -100,10 +100,12 @@ public class CallableLociWalker extends LocusWalker, Gen } // otherwise, merge them else { - sum = GenomeLocParser.setStop(sum, value.first.getStop()); + sum = getToolkit().getGenomeLocParser().setStop(sum, value.first.getStop()); fasta.append(value.second); } return sum; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java index 5d43a67ac..a5a74ffa4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java @@ -2,13 +2,16 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; public class ClusteredSnps { + private GenomeLocParser genomeLocParser; private int window = 10; private int snpThreshold = 3; - public ClusteredSnps(int snpThreshold, int window) { + public ClusteredSnps(GenomeLocParser genomeLocParser,int snpThreshold, int window) { + this.genomeLocParser = genomeLocParser; this.window = window; this.snpThreshold = snpThreshold; if ( window < 1 || snpThreshold < 1 ) @@ -29,7 +32,7 @@ public class ClusteredSnps { throw new UserException.BadInput("The clustered SNPs filter does not work in the presence of non-variant records; see the documentation for more details"); // find the nth variant - GenomeLoc left = VariantContextUtils.getLocation(variants[i].getVariantContext()); + GenomeLoc left = VariantContextUtils.getLocation(genomeLocParser,variants[i].getVariantContext()); GenomeLoc right = null; int snpsSeen = 1; @@ -37,7 +40,7 @@ public class ClusteredSnps { while ( ++currentIndex < variants.length ) { if ( variants[currentIndex] != null && variants[currentIndex].getVariantContext() != null && variants[currentIndex].getVariantContext().isVariant() ) { if ( ++snpsSeen == snpThreshold ) { - right = VariantContextUtils.getLocation(variants[currentIndex].getVariantContext()); + right = VariantContextUtils.getLocation(genomeLocParser,variants[currentIndex].getVariantContext()); break; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 31a617f68..3ddb0d2da 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -117,7 +117,7 @@ public class VariantFiltrationWalker extends RodWalker { public void initialize() { if ( clusterWindow > 0 ) - clusteredSNPs = new ClusteredSnps(clusterSize, clusterWindow); + clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow); filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS); genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS); @@ -188,7 +188,7 @@ public class VariantFiltrationWalker extends RodWalker { Set filters = new LinkedHashSet(g.getFilters()); for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) { - if ( VariantContextUtils.match(vc, g, exp) ) + if ( VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, g, exp) ) filters.add(exp.name); } genotypes.put(genotype.getKey(), new Genotype(genotype.getKey(), g.getAlleles(), g.getNegLog10PError(), filters, g.getAttributes(), g.genotypesArePhased())); @@ -211,7 +211,7 @@ public class VariantFiltrationWalker extends RodWalker { filters.add(CLUSTERED_SNP_FILTER_NAME); for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) { - if ( VariantContextUtils.match(vc, exp) ) + if ( VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, exp) ) filters.add(exp.name); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java index c98f0639a..e9fb6e875 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java @@ -133,7 +133,7 @@ public class BatchedCallsMerger extends LocusWalker imp } // merge the variant contexts - return VariantContextUtils.simpleMerge(calls, ref.getBase()); + return VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), calls, ref.getBase()); } public static AlignmentContext filterForSamples(ReadBackedPileup pileup, Set samples) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java index 25ba13061..dd83fe7bc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java @@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.contexts.*; import java.util.*; public class SimpleIndelCalculationModel extends GenotypeCalculationModel { + private final GenomeLocParser genomeLocParser; private int MIN_COVERAGE = 6; private double MIN_FRACTION = 0.3; @@ -20,7 +21,9 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel { // the previous normal event context // private Map cachedContext; - protected SimpleIndelCalculationModel() {} + protected SimpleIndelCalculationModel(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; + } private int totalIndels = 0; private int totalCoverage = 0; @@ -70,7 +73,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel { if ( bestEvent.charAt(0) == '-' ) { alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,false) ); alleles.add( Allele.create(bestEvent.substring(1), true )); - loc = GenomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-1); + loc = genomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-1); } else throw new ReviewedStingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index a253cb792..bed012c44 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -239,7 +239,9 @@ public class IndelGenotyperV2Walker extends ReadWalker { FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; try { - refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq")); + refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(), + new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq")); } catch (IOException e) { throw new UserException.CouldNotReadInputFile(new File(RefseqFileName), "Write failed", e); } @@ -257,7 +259,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { int nNorm = 0; int nTum = 0; - for ( SAMReaderID rid : getToolkit().getDataSource().getReaderIDs() ) { + for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) { List tags = rid.getTags() ; if ( tags.isEmpty() && call_somatic ) throw new UserException.BadInput("In somatic mode all input bam files must be tagged as either 'normal' or 'tumor'. Untagged file: "+ @@ -297,12 +299,12 @@ public class IndelGenotyperV2Walker extends ReadWalker { if ( ! GENOTYPE_NOT_SORTED && IntervalUtils.isIntervalFile(genotypeIntervalsFile)) { // prepare to read intervals one-by-one, as needed (assuming they are sorted). - genotypeIntervals = new IntervalFileMergingIterator( + genotypeIntervals = new IntervalFileMergingIterator(getToolkit().getGenomeLocParser(), new java.io.File(genotypeIntervalsFile), IntervalMergingRule.OVERLAPPING_ONLY ); } else { // read in the whole list of intervals for cleaning - GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals( - IntervalUtils.parseIntervalArguments(Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY); + GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(), + IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(genotypeIntervalsFile),true), IntervalMergingRule.OVERLAPPING_ONLY); genotypeIntervals = locs.iterator(); } currentGenotypeInterval = genotypeIntervals.hasNext() ? genotypeIntervals.next() : null; @@ -310,7 +312,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { } - location = GenomeLocParser.createGenomeLoc(0,1); + location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1); // List> readGroupSets = getToolkit().getMergedReadGroupsByReaders(); // List> sampleSets = getToolkit().getSamplesByReaders(); @@ -387,8 +389,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { currentPosition = read.getAlignmentStart(); refName = new String(read.getReferenceName()); - location = GenomeLocParser.setContig(location,refName); - contigLength = GenomeLocParser.getContigInfo(refName).getSequenceLength(); + location = getToolkit().getGenomeLocParser().createGenomeLoc(refName,location.getStart(),location.getStop()); + contigLength = getToolkit().getGenomeLocParser().getContigInfo(refName).getSequenceLength(); outOfContigUserWarned = false; normal_context.clear(); // reset coverage window; this will also set reference position to 0 @@ -543,7 +545,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { } long move_to = adjustedPosition; - for ( long pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) { + for ( int pos = normal_context.getStart() ; pos < Math.min(adjustedPosition,normal_context.getStop()+1) ; pos++ ) { if ( normal_context.indelsAt(pos).size() == 0 ) continue; // no indels @@ -579,8 +581,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right: if ( right > normal_context.getStop() ) right = normal_context.getStop(); - location = GenomeLocParser.setStart(location,pos); - location = GenomeLocParser.setStop(location,pos); // retrieve annotation data + location = getToolkit().getGenomeLocParser().setStart(location,pos); + location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data if ( normalCall.isCall() ) { normalCallsMade++; @@ -692,7 +694,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { if ( DEBUG ) System.out.println("DEBUG>> Emitting in somatic mode up to "+position+" force shift="+force+" current window="+tumor_context.getStart()+"-"+tumor_context.getStop()); - for ( long pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) { + for ( int pos = tumor_context.getStart() ; pos < Math.min(adjustedPosition,tumor_context.getStop()+1) ; pos++ ) { if ( tumor_context.indelsAt(pos).size() == 0 ) continue; // no indels in tumor @@ -735,8 +737,8 @@ public class IndelGenotyperV2Walker extends ReadWalker { if ( right > tumor_context.getStop() ) right = tumor_context.getStop(); // if indel is too close to the end of the window but we need to emit anyway (force-shift), adjust right - location = GenomeLocParser.setStart(location,pos); - location = GenomeLocParser.setStop(location,pos); // retrieve annotation data + location = getToolkit().getGenomeLocParser().setStart(location,pos); + location = getToolkit().getGenomeLocParser().setStop(location,pos); // retrieve annotation data if ( tumorCall.isCall() ) { tumorCallsMade++; @@ -1395,13 +1397,13 @@ public class IndelGenotyperV2Walker extends ReadWalker { class WindowContext implements IndelListener { private Set reads; - private long start=0; // where the window starts on the ref, 1-based + private int start=0; // where the window starts on the ref, 1-based private CircularArray< List< IndelVariant > > indels; private List emptyIndelList = new ArrayList(); - public WindowContext(long start, int length) { + public WindowContext(int start, int length) { this.start = start; indels = new CircularArray< List >(length); // reads = new LinkedList(); @@ -1412,13 +1414,13 @@ public class IndelGenotyperV2Walker extends ReadWalker { * * @return */ - public long getStart() { return start; } + public int getStart() { return start; } /** Returns 1-based reference stop position (inclusive) of the interval this object keeps context for. * * @return */ - public long getStop() { return start + indels.length() - 1; } + public int getStop() { return start + indels.length() - 1; } /** Resets reference start position to 0 and clears the context. * diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 72136dd3a..3b062c2e1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -211,9 +211,9 @@ public class IndelRealigner extends ReadWalker { for (String fileOrInterval : intervalsFile.split(";")) { // if it's a file, add items to raw interval list if (IntervalUtils.isIntervalFile(fileOrInterval)) { - merger.add(new IntervalFileMergingIterator( new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) ); + merger.add(new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) ); } else { - rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval)); + rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeInterval(fileOrInterval)); } } if ( ! rawIntervals.isEmpty() ) merger.add(rawIntervals.iterator()); @@ -221,7 +221,7 @@ public class IndelRealigner extends ReadWalker { intervals = merger; } else { // read in the whole list of intervals for cleaning - GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY); + GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(),IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY); intervals = locs.iterator(); } currentInterval = intervals.hasNext() ? intervals.next() : null; @@ -239,9 +239,9 @@ public class IndelRealigner extends ReadWalker { nwayWriters = new HashMap(); - for ( SAMReaderID rid : getToolkit().getDataSource().getReaderIDs() ) { + for ( SAMReaderID rid : getToolkit().getReadsDataSource().getReaderIDs() ) { - String fName = getToolkit().getDataSource().getSAMFile(rid).getName(); + String fName = getToolkit().getReadsDataSource().getSAMFile(rid).getName(); int pos ; if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM"); @@ -383,10 +383,10 @@ public class IndelRealigner extends ReadWalker { return 0; } - GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); + GenomeLoc readLoc = getToolkit().getGenomeLocParser().createGenomeLoc(read); // hack to get around unmapped reads having screwy locations if ( readLoc.getStop() == 0 ) - readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), readLoc.getStart(), readLoc.getStart()); + readLoc = getToolkit().getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart()); if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) ) { // TODO -- it would be nice if we could use indels from 454 reads as alternate consenses @@ -1414,7 +1414,7 @@ public class IndelRealigner extends ReadWalker { } } - private static class ReadBin { + private class ReadBin { private final ArrayList reads = new ArrayList(); private byte[] reference = null; @@ -1426,11 +1426,11 @@ public class IndelRealigner extends ReadWalker { // This can happen if e.g. there's a large known indel with no overlapping reads. public void add(SAMRecord read) { - GenomeLoc locForRead = GenomeLocParser.createGenomeLoc(read); + GenomeLoc locForRead = getToolkit().getGenomeLocParser().createGenomeLoc(read); if ( loc == null ) loc = locForRead; else if ( locForRead.getStop() > loc.getStop() ) - loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), locForRead.getStop()); + loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), loc.getStart(), locForRead.getStop()); reads.add(read); } @@ -1441,9 +1441,9 @@ public class IndelRealigner extends ReadWalker { // set up the reference if we haven't done so yet if ( reference == null ) { // first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read) - long padLeft = Math.max(loc.getStart()-REFERENCE_PADDING, 1); - long padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength()); - loc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), padLeft, padRight); + int padLeft = Math.max(loc.getStart()-REFERENCE_PADDING, 1); + int padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength()); + loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), padLeft, padRight); reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); StringUtil.toUpperCase(reference); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 6a39f36b6..8dc69eeaa 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -92,7 +92,7 @@ public class RealignerTargetCreator extends RodWalker pointEvents = new ArrayList(); + private ArrayList pointEvents = new ArrayList(); - public Event(GenomeLoc loc, long furthestStopPos, EVENT_TYPE type) { + public Event(GenomeLoc loc, int furthestStopPos, EVENT_TYPE type) { this.loc = loc; this.furthestStopPos = furthestStopPos; this.type = type; @@ -254,9 +254,9 @@ public class RealignerTargetCreator extends RodWalker 0 ) { - long lastPosition = pointEvents.get(pointEvents.size()-1); + int lastPosition = pointEvents.get(pointEvents.size()-1); if ( newPosition - lastPosition < windowSize ) { eventStopPos = Math.max(eventStopPos, newPosition); furthestStopPos = e.furthestStopPos; @@ -272,7 +272,7 @@ public class RealignerTargetCreator extends RodWalker= 0 && eventStopPos - eventStartPos < maxIntervalSize; + return getToolkit().getGenomeLocParser().validGenomeLoc(loc.getContig(), eventStartPos, eventStopPos) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize; } public String toString() { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingAlternateAllelesVCFWriter.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingAlternateAllelesVCFWriter.java index 8455bfda1..b1313f5f4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingAlternateAllelesVCFWriter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergePhasedSegregatingAlternateAllelesVCFWriter.java @@ -33,6 +33,7 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeader; import org.broad.tribble.vcf.VCFWriter; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -44,6 +45,8 @@ import java.util.*; public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWriter { private VCFWriter innerWriter; + private GenomeLocParser genomeLocParser; + private ReferenceSequenceFile referenceFileForMNPmerging; private int maxGenomicDistanceForMNP; @@ -64,8 +67,9 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite // Should we call innerWriter.close() in close() private boolean takeOwnershipOfInner; - public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) { + public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, String singleSample, boolean emitOnlyMergedRecords, Logger logger, boolean takeOwnershipOfInner, boolean trackAltAlleleStats) { this.innerWriter = innerWriter; + this.genomeLocParser = genomeLocParser; this.referenceFileForMNPmerging = new IndexedFastaSequenceFile(referenceFile); this.maxGenomicDistanceForMNP = maxGenomicDistanceForMNP; this.useSingleSample = singleSample; @@ -83,8 +87,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite this.takeOwnershipOfInner = takeOwnershipOfInner; } - public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) { - this(innerWriter, referenceFile, maxGenomicDistanceForMNP, null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics + public MergePhasedSegregatingAlternateAllelesVCFWriter(VCFWriter innerWriter, GenomeLocParser genomeLocParser, File referenceFile, int maxGenomicDistanceForMNP, Logger logger) { + this(innerWriter, genomeLocParser, referenceFile, maxGenomicDistanceForMNP, null, false, logger, false, false); // by default: consider all samples, emit all records, don't own inner, don't keep track of alt allele statistics } public void writeHeader(VCFHeader header) { @@ -113,7 +117,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite return; } - logger.debug("Next VC input = " + VariantContextUtils.getLocation(vc)); + logger.debug("Next VC input = " + VariantContextUtils.getLocation(genomeLocParser,vc)); boolean curVcIsNotFiltered = vc.isNotFiltered(); if (vcfrWaitingToMerge == null) { @@ -123,20 +127,20 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite throw new ReviewedStingException("filteredVcfrList should be empty if not waiting to merge a vc!"); if (curVcIsNotFiltered) { // still need to wait before can release vc - logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(vc)); + logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(genomeLocParser,vc)); vcfrWaitingToMerge = new VCFRecord(vc, refBase, false); } else if (!emitOnlyMergedRecords) { // filtered records are never merged - logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(vc)); + logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(genomeLocParser,vc)); innerWriter.add(vc, refBase); } } else { // waiting to merge vcfrWaitingToMerge - logger.debug("Waiting to merge " + VariantContextUtils.getLocation(vcfrWaitingToMerge.vc)); + logger.debug("Waiting to merge " + VariantContextUtils.getLocation(genomeLocParser,vcfrWaitingToMerge.vc)); if (!curVcIsNotFiltered) { if (!emitOnlyMergedRecords) { // filtered records are never merged - logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(vc)); + logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(genomeLocParser,vc)); filteredVcfrList.add(new VCFRecord(vc, refBase, false)); } } @@ -164,7 +168,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite boolean mergedRecords = false; if (mergeDistanceInRange) { numRecordsWithinDistance++; - VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging); + VariantContext mergedVc = VariantContextUtils.mergeIntoMNP(genomeLocParser,vcfrWaitingToMerge.vc, vc, referenceFileForMNPmerging); if (mergedVc != null) { mergedRecords = true; vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true); @@ -209,8 +213,8 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite return numMergedRecords; } - public static int minDistance(VariantContext vc1, VariantContext vc2) { - return VariantContextUtils.getLocation(vc1).minDistance(VariantContextUtils.getLocation(vc2)); + public int minDistance(VariantContext vc1, VariantContext vc2) { + return VariantContextUtils.getLocation(genomeLocParser,vc1).minDistance(VariantContextUtils.getLocation(genomeLocParser,vc2)); } /** @@ -354,10 +358,10 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite if (!VariantContextUtils.alleleSegregationIsKnown(gt1, gt2)) { aas.segregationUnknown++; - logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2)); + logger.debug("Unknown segregation of alleles [not phased] for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2)); } else if (gt1.isHomRef() || gt2.isHomRef()) { - logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2)); + logger.debug("gt1.isHomRef() || gt2.isHomRef() for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2)); aas.eitherNotVariant++; } else { // BOTH gt1 and gt2 have at least one variant allele (so either hets, or homozygous variant): @@ -386,7 +390,7 @@ public class MergePhasedSegregatingAlternateAllelesVCFWriter implements VCFWrite // Check MNPs vs. CHets: if (containsRefAllele(site1Alleles) && containsRefAllele(site2Alleles)) { - logger.debug("HET-HET for " + samp + " at " + VariantContextUtils.getLocation(vc1) + ", " + VariantContextUtils.getLocation(vc2)); + logger.debug("HET-HET for " + samp + " at " + VariantContextUtils.getLocation(genomeLocParser,vc1) + ", " + VariantContextUtils.getLocation(genomeLocParser,vc2)); if (logger.isDebugEnabled() && !(gt1.isHet() && gt2.isHet())) throw new ReviewedStingException("Since !gt1.isHomRef() && !gt2.isHomRef(), yet both have ref alleles, they BOTH must be hets!"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java index 5d941ec17..20fc4cf76 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java @@ -78,7 +78,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker don't take control of writer, since didn't create it: - vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics); + vcMergerWriter = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer,getToolkit().getGenomeLocParser(),getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, useSingleSample, emitOnlyMergedRecords, logger, false, !disablePrintAlternateAlleleStatistics); writer = null; // so it can't be accessed directly [i.e., not through vcMergerWriter] // setup the header fields: diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 0bf2d3cb9..5d0aca9c9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -138,7 +138,7 @@ public class ReadBackedPhasingWalker extends RodWalker use ALL samples, false <-> emit all records, false <-> don't track the statistics of alternate alleles being merged: - writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer, getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, null, false, logger, writer != origWriter, false); + writer = new MergePhasedSegregatingAlternateAllelesVCFWriter(writer,getToolkit().getGenomeLocParser(),getToolkit().getArguments().referenceFile, maxGenomicDistanceForMNP, null, false, logger, writer != origWriter, false); /* Due to discardIrrelevantPhasedSites(), the startDistance spanned by [partiallyPhasedSites.peek(), unphasedSiteQueue.peek()] is <= cacheWindow Due to processQueue(), the startDistance spanned by [unphasedSiteQueue.peek(), mostDownstreamLocusReached] is <= cacheWindow @@ -197,7 +197,7 @@ public class ReadBackedPhasingWalker extends RodWalker= 0; i--) { + SAMSequenceDictionary dictionary = getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(); + SAMSequenceRecord contig = dictionary.getSequence(cur.getContig()); + if(cur.getStop() < contig.getSequenceLength()) + cur = getToolkit().getGenomeLocParser().incPos(cur,1); + else + cur = getToolkit().getGenomeLocParser().createGenomeLoc(dictionary.getSequence(contig.getSequenceIndex()+1).getSequenceName(),1,1); + out.printf("%s: skipped%n", cur); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java index 72ba37830..bc68be592 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java @@ -87,11 +87,11 @@ public class ValidatingPileupWalker extends LocusWalker { logger.info("Loading SNP mask... "); ReferenceOrderedData snp_mask; if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) { - RMDTrackBuilder builder = new RMDTrackBuilder(); + RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser()); CloseableIterator iter = builder.createInstanceOfTrack(DbSNPCodec.class,"snp_mask",new java.io.File(SNP_MASK)).getIterator(); - snpMaskIterator = new SeekableRODIterator(iter); + snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter); } else { // TODO: fix me when Plink is back @@ -142,8 +142,8 @@ public class PickSequenomProbes extends RodWalker { if ( ! haveMaskForWindow ) { String contig = context.getLocation().getContig(); - long offset = context.getLocation().getStart(); - long true_offset = offset - 200; + int offset = context.getLocation().getStart(); + int true_offset = offset - 200; // we have variant; let's load all the snps falling into the current window and prepare the mask array. // we need to do it only once per window, regardless of how many vcs we may have at this location! @@ -152,7 +152,7 @@ public class PickSequenomProbes extends RodWalker { for ( int i = 0 ; i < 401; i++ ) maskFlags[i] = 0; - RODRecordList snpList = snpMaskIterator.seekForward(GenomeLocParser.createGenomeLoc(contig,offset-200,offset+200)); + RODRecordList snpList = snpMaskIterator.seekForward(getToolkit().getGenomeLocParser().createGenomeLoc(contig,offset-200,offset+200)); if ( snpList != null && snpList.size() != 0 ) { Iterator snpsInWindow = snpList.iterator(); int i = 0; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 5bc147725..3c1e1ce4d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -639,7 +639,7 @@ public class VariantEvalWalker extends RodWalker implements Tr else if ( group.requiresNovel() && vcKnown ) return false; - if ( group.selectExp != null && ! VariantContextUtils.match(vc, group.selectExp) ) + if ( group.selectExp != null && ! VariantContextUtils.match(getToolkit().getGenomeLocParser(),vc, group.selectExp) ) return false; // nothing invalidated our membership in this set diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java index def39ba3c..c82c18c09 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java @@ -178,7 +178,7 @@ public class GenerateVariantClustersWalker extends RodWalker { vc = VariantContextUtils.reverseComplement(vc); } - vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length)); + vc = VariantContextUtils.modifyLocation(vc, getToolkit().getGenomeLocParser().createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length)); VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false); if ( originalVC.isSNP() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index e8e57986c..8addc10d8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker { if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) { //System.out.printf("%s%n",sub.toString()); for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { - if ( !VariantContextUtils.match(sub, jexl) ) { + if ( !VariantContextUtils.match(getToolkit().getGenomeLocParser(),sub, jexl) ) { return 0; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java index 87b82c7c8..60078cbfb 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java @@ -56,7 +56,7 @@ public class CreateTiTvTrack extends RodWalker { window.update(VariantContextUtils.isTransition(vc)); if ( window.getTiTv() != null ) { - writer.writeData(VariantContextUtils.getLocation(vc),window.getTiTv()); + writer.writeData(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),vc),window.getTiTv()); } return window; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java index dd9fab657..34b08a089 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java @@ -287,7 +287,7 @@ public class DSBWalkerV3 extends ReadWalker { } - private void shiftWindows(long pos) { + private void shiftWindows(int pos) { // we shift windows when there is a read that does not fit into the current window. // the position, to which the shift is performed, is the first position such that the new read // can be accomodated. Hence we can safely slide up to pos, only discarding reads that go out of scope - @@ -332,7 +332,7 @@ public class DSBWalkerV3 extends ReadWalker { purgeSignal(pos); purgeControl(pos); - currentWindow = GenomeLocParser.createGenomeLoc(currentWindow.getContigIndex(),pos,pos+WINDOW_SIZE-1); + currentWindow = getToolkit().getGenomeLocParser().createGenomeLoc(currentWindow.getContig(),pos,pos+WINDOW_SIZE-1); } @Override @@ -349,7 +349,8 @@ public class DSBWalkerV3 extends ReadWalker { controlReadGroups = readGroupSets.get(1); // System.out.println(controlReadGroups.size()+" read groups in control"); - currentWindow = GenomeLocParser.createGenomeLoc(0,1,WINDOW_SIZE); + String sequenceName = getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequence(0).getSequenceName(); + currentWindow = getToolkit().getGenomeLocParser().createGenomeLoc(sequenceName,1,WINDOW_SIZE); readsInSignalWindow = new LinkedList(); readsInControlWindow = new LinkedList(); @@ -366,7 +367,7 @@ public class DSBWalkerV3 extends ReadWalker { if ( read.getReferenceIndex() > currentWindow.getContigIndex() ) { printRegion(); // print all we had on the previous contig - currentWindow = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(), + currentWindow = ref.getGenomeLocParser().createGenomeLoc(read.getReferenceName(), read.getAlignmentStart(), read.getAlignmentStart()+WINDOW_SIZE-1); currentContig = read.getReferenceName(); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java index a2ff62a72..ea904d26e 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java @@ -95,7 +95,7 @@ public class DesignFileGeneratorWalker extends RodWalker { } for ( Map.Entry additionalGenes : currentBedFeatures.entrySet() ) { - GenomeLoc entryLoc = GenomeLocParser.createGenomeLoc(additionalGenes.getValue().getChr(),additionalGenes.getValue().getStart(),additionalGenes.getValue().getEnd()); + GenomeLoc entryLoc = getToolkit().getGenomeLocParser().createGenomeLoc(additionalGenes.getValue().getChr(),additionalGenes.getValue().getStart(),additionalGenes.getValue().getEnd()); if ( interval.overlapsP(entryLoc) && ! additionalGenes.getValue().getName().equals("") && ! intervalBuffer.get(interval).geneNames.contains(additionalGenes.getKey()+"_"+additionalGenes.getValue().getName())) { @@ -142,7 +142,7 @@ public class DesignFileGeneratorWalker extends RodWalker { } for ( Map.Entry entry : currentBedFeatures.entrySet() ) { - GenomeLoc entryLoc = GenomeLocParser.createGenomeLoc(entry.getValue().getChr(),entry.getValue().getStart(),entry.getValue().getEnd()); + GenomeLoc entryLoc = getToolkit().getGenomeLocParser().createGenomeLoc(entry.getValue().getChr(),entry.getValue().getStart(),entry.getValue().getEnd()); if ( entryLoc.isBefore(ref.getLocus()) ) { currentBedFeatures.remove(entry.getKey()); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java index 4e682956e..c07f2f168 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java @@ -39,7 +39,9 @@ public class IndelAnnotator extends RodWalker { FeatureSource refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first; try { - refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq")); + refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), + getToolkit().getGenomeLocParser(), + new FeatureToGATKFeatureIterator(getToolkit().getGenomeLocParser(),refseq.iterator(),"refseq")); } catch (IOException e) { throw new UserException.CouldNotReadInputFile(RefseqFileName, e); } @@ -128,14 +130,14 @@ public class IndelAnnotator extends RodWalker { } else { if ( RefSeqFeature.isCoding(ann) ) { //b.append(annIntron); // not in exon, but within the coding region = intron - GenomeLoc ig = GenomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd()); + GenomeLoc ig = getToolkit().getGenomeLocParser().createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd()); GenomeLoc cl = t.getCodingLocation(); GenomeLoc g = t.getLocation(); boolean spliceSiteDisruption = false; for (GenomeLoc exon : t.getExons()) { - GenomeLoc expandedExon = GenomeLocParser.createGenomeLoc(exon.getContig(), exon.getStart() - 6, exon.getStop() + 6); + GenomeLoc expandedExon = getToolkit().getGenomeLocParser().createGenomeLoc(exon.getContig(), exon.getStart() - 6, exon.getStop() + 6); if (ig.overlapsP(expandedExon)) { spliceSiteDisruption = true; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java index 5bc71c8cf..b90a152dc 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java @@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -71,7 +72,7 @@ public class IndelDBRateWalker extends RodWalker private void finalUpdate(OverlapTabulator tab) { while ( ! evalContexts.isEmpty() ) { - tab.update(emptyOverlapTable()); + tab.update(emptyOverlapTable(getToolkit().getGenomeLocParser())); } } @@ -119,25 +120,25 @@ public class IndelDBRateWalker extends RodWalker public OverlapTable getOverlapTable(ReferenceContext ref) { // step 1: check that the eval queue is non-empty and that we are outside the window - if ( evalContexts.isEmpty() || VariantContextUtils.getLocation(evalContexts.get(0)).distance(ref.getLocus()) <= indelWindow ) { + if ( evalContexts.isEmpty() || VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0)).distance(ref.getLocus()) <= indelWindow ) { return null; } // step 2: discard all comp variations which come before the window - while ( ! compContexts.isEmpty() && VariantContextUtils.getLocation(compContexts.get(0)).isBefore(ref.getLocus()) && - VariantContextUtils.getLocation(compContexts.get(0)).distance(ref.getLocus()) > indelWindow) { + while ( ! compContexts.isEmpty() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(0)).isBefore(ref.getLocus()) && + VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(0)).distance(ref.getLocus()) > indelWindow) { compContexts.remove(0); } // step 3: see if there are any contexts left; if so then they must be within the window if ( ! compContexts.isEmpty() ) { return nonEmptyOverlapTable(ref); } else { - return emptyOverlapTable(); + return emptyOverlapTable(ref.getGenomeLocParser()); } } - public OverlapTable emptyOverlapTable() { + public OverlapTable emptyOverlapTable(GenomeLocParser genomeLocParser) { // only eval, no comp - OverlapTable ot = new OverlapTable(); + OverlapTable ot = new OverlapTable(genomeLocParser); ot.setEvalSizeAndType(evalContexts.get(0)); return ot; } @@ -145,17 +146,17 @@ public class IndelDBRateWalker extends RodWalker public OverlapTable nonEmptyOverlapTable(ReferenceContext ref) { if ( vcfWriter != null ) { int i = 0; - while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).isBefore(VariantContextUtils.getLocation(evalContexts.get(0)))) { + while ( i < compContexts.size() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(i)).isBefore(VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0)))) { vcfWriter.add(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]); i++; } vcfWriter.add(evalContexts.get(0), ref.getBase()); - while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).distance(VariantContextUtils.getLocation(evalContexts.get(0))) <= indelWindow) { + while ( i < compContexts.size() && VariantContextUtils.getLocation(ref.getGenomeLocParser(),compContexts.get(i)).distance(VariantContextUtils.getLocation(ref.getGenomeLocParser(),evalContexts.get(0))) <= indelWindow) { vcfWriter.add(compContexts.get(i), compContexts.get(i).getReference().getBases()[0]); i++; } } - OverlapTable ot = new OverlapTable(); + OverlapTable ot = new OverlapTable(ref.getGenomeLocParser()); ot.setCompOverlaps(compContexts.size()); ot.setDistances(compContexts,evalContexts.get(0), indelWindow); return ot; @@ -164,13 +165,15 @@ public class IndelDBRateWalker extends RodWalker } class OverlapTable { + private GenomeLocParser genomeLocParser; private int numOverlaps; private ExpandingArrayList distances; // currently unused private int evalSize; private boolean isDeletion; - public OverlapTable() { + public OverlapTable(GenomeLocParser genomeLocParser) { + this.genomeLocParser = genomeLocParser; numOverlaps = 0; } @@ -187,8 +190,8 @@ class OverlapTable { public void setDistances(List comps, VariantContext eval, int winsize) { distances = new ExpandingArrayList(); for ( VariantContext comp : comps ) { - if ( VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval)) <= winsize ) { - distances.add(VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval))); + if ( VariantContextUtils.getLocation(genomeLocParser,comp).distance(VariantContextUtils.getLocation(genomeLocParser,eval)) <= winsize ) { + distances.add(VariantContextUtils.getLocation(genomeLocParser,comp).distance(VariantContextUtils.getLocation(genomeLocParser,eval))); } } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java index a3296c9ce..302c071d5 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelErrorRateWalker.java @@ -143,7 +143,7 @@ public class IndelErrorRateWalker extends LocusWalker { // System.out.println("Non countable indel event at "+pileup.getLocation()); countableIndelBuffer.clear(); coverageBuffer.clear(); // we do not want to count observations (read bases) around non-countable indel as well - skipToLoc = GenomeLocParser.createGenomeLoc(pileup.getLocation().getContigIndex(),pileup.getLocation().getStop()+pileup.getMaxDeletionLength()+MIN_DISTANCE+1); + skipToLoc = ref.getGenomeLocParser().createGenomeLoc(pileup.getLocation().getContig(),pileup.getLocation().getStop()+pileup.getMaxDeletionLength()+MIN_DISTANCE+1); // System.out.println("Skip to "+skipToLoc); } else { // pileup does not contain too many indels, we need to store them in the buffer and count them later, diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MarkIntervals.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MarkIntervals.java index 1def72a21..109fa6b40 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MarkIntervals.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MarkIntervals.java @@ -83,7 +83,7 @@ public class MarkIntervals extends RodWalker { try { for ( String line : new XReadLines(locs, true) ) { String parts[] = line.split(":"); - badSites.add(GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1]))); + badSites.add(getToolkit().getGenomeLocParser().createGenomeLoc(parts[0], Integer.valueOf(parts[1]))); } } catch ( FileNotFoundException e ) { throw new UserException.CouldNotReadInputFile(locs, e); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java index 89487ca16..66969da79 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java @@ -237,7 +237,7 @@ public class MendelianViolationClassifier extends LocusWalker { +public class + ReadQualityScoreWalker extends ReadWalker { @Output protected PrintStream out; @Argument(fullName = "inputQualityFile", shortName = "if", doc = "Input quality score file generated by NeighborhoodQualityWalker", required = true) @@ -98,7 +99,7 @@ public class ReadQualityScoreWalker extends ReadWalker // BUGBUG: This assumes reads will be sorted by start location float sumNeighborhoodQuality = 0.0f; int numLines = 0; - GenomeLoc readLoc = GenomeLocParser.createGenomeLoc( read ); + GenomeLoc readLoc = getToolkit().getGenomeLocParser().createGenomeLoc( read ); if( readLoc.size() > 0 ) { // only calculate mean NQS if the read has a well formed GenomeLoc, if not NQS will be zero try { if( line == null ) { @@ -106,12 +107,12 @@ public class ReadQualityScoreWalker extends ReadWalker if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file is empty" ); } } String[] halves = line.split( " ", 2 ); - GenomeLoc curLoc = GenomeLocParser.parseGenomeLoc( halves[0] ); + GenomeLoc curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] ); while( curLoc.isBefore( readLoc ) ) { // Loop until the beginning of the read line = inputReader.readLine(); if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find beginning of read: " + readLoc ); } halves = line.split( " ", 2 ); - curLoc = GenomeLocParser.parseGenomeLoc( halves[0] ); + curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] ); } // now we have skipped ahead in the input file to where this read starts logger.debug( "Starting: " + curLoc + ", read: " + readLoc + "\t size: " + readLoc.size() ); @@ -124,7 +125,7 @@ public class ReadQualityScoreWalker extends ReadWalker line = inputReader.readLine(); if( line == null ) { throw new UserException.MalformedFile(new File(inputQualityFile), "Input file doesn't encompass all reads. Can't find end of read: " + readLoc ); } halves = line.split( " ", 2 ); - curLoc = GenomeLocParser.parseGenomeLoc( halves[0] ); + curLoc = getToolkit().getGenomeLocParser().parseGenomeLoc( halves[0] ); } // now we have parsed the input file up to where the read ends // reset back to the mark in order to parse the next read in the next call to the reduce function diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java index 03777e8ec..a81136d8c 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/RealignedReadCounter.java @@ -60,7 +60,7 @@ public class RealignedReadCounter extends ReadWalker { public void initialize() { // prepare to read intervals one-by-one, as needed (assuming they are sorted). - intervals = new IntervalFileMergingIterator( new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY ); + intervals = new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY ); currentInterval = intervals.hasNext() ? intervals.next() : null; } @@ -69,10 +69,10 @@ public class RealignedReadCounter extends ReadWalker { return 0; } - GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read); + GenomeLoc readLoc = ref.getGenomeLocParser().createGenomeLoc(read); // hack to get around unmapped reads having screwy locations if ( readLoc.getStop() == 0 ) - readLoc = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), readLoc.getStart(), readLoc.getStart()); + readLoc = ref.getGenomeLocParser().createGenomeLoc(readLoc.getContig(), readLoc.getStart(), readLoc.getStart()); if ( readLoc.isBefore(currentInterval) || ReadUtils.is454Read(read) ) return 0; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java index 2a10f1ad5..40adde697 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java @@ -130,7 +130,7 @@ public class TestReadFishingWalker extends ReadWalker { else throw new ReviewedStingException("Invalid indel type: " + type); - aligners.put(GenomeLocParser.createGenomeLoc(contig,start,stop),new BWACAligner(revisedReference,new BWAConfiguration())); + aligners.put(getToolkit().getGenomeLocParser().createGenomeLoc(contig,start,stop),new BWACAligner(revisedReference,new BWAConfiguration())); if(++numAlignersCreated % 100 == 0) out.printf("Created %d aligners in %dms%n",++numAlignersCreated,System.currentTimeMillis()-startTime); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java index 45768476d..5c06c188f 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java @@ -28,9 +28,9 @@ public class ValidateRODForReads extends ReadWalker { @Override public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { if (tracker != null) { - Map> mapping = tracker.getContigOffsetMapping(); - for (Map.Entry> entry : mapping.entrySet()) { - GenomeLoc location = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),entry.getKey()); + Map> mapping = tracker.getContigOffsetMapping(); + for (Map.Entry> entry : mapping.entrySet()) { + GenomeLoc location = ref.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),entry.getKey()); if (!map.containsKey(location)) { map.put(location,0); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java index 09e48833e..6cbe725c7 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java @@ -113,7 +113,7 @@ public class CombineDuplicatesWalker extends DuplicateWalker, SA // out.printf("Combining Read %s%n", read.format()); // } // - combinedRead = DupUtils.combineDuplicates(reads, MAX_QUALITY_SCORE); + combinedRead = DupUtils.combineDuplicates(getToolkit().getGenomeLocParser(),reads, MAX_QUALITY_SCORE); //out.printf(" => into %s%n", combinedRead.format()); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 2c6bf42ce..e08d51e2a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -468,10 +468,10 @@ public class UnifiedGenotyperEngine { // if a read is too long for the reference context, extend the context (being sure not to extend past the end of the chromosome) if ( record.getAlignmentEnd() > refContext.getWindow().getStop() ) { - GenomeLoc window = GenomeLocParser.createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), Math.min(record.getAlignmentEnd(), referenceReader.getSequenceDictionary().getSequence(refContext.getLocus().getContig()).getSequenceLength())); + GenomeLoc window = refContext.getGenomeLocParser().createGenomeLoc(refContext.getLocus().getContig(), refContext.getWindow().getStart(), Math.min(record.getAlignmentEnd(), referenceReader.getSequenceDictionary().getSequence(refContext.getLocus().getContig()).getSequenceLength())); byte[] bases = referenceReader.getSubsequenceAt(window.getContig(), window.getStart(), window.getStop()).getBases(); StringUtil.toUpperCase(bases); - refContext = new ReferenceContext(refContext.getLocus(), window, bases); + refContext = new ReferenceContext(refContext.getGenomeLocParser(),refContext.getLocus(), window, bases); } BitSet mismatches = AlignmentUtils.mismatchesInRefWindow(record, refContext, UAC.MAX_MISMATCHES, MISMATCH_WINDOW_SIZE); diff --git a/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java b/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java index c08b459dc..d16c50fac 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java +++ b/java/src/org/broadinstitute/sting/playground/tools/RemapAlignments.java @@ -136,10 +136,10 @@ public class RemapAlignments extends CommandLineProgram { } h.setSequenceDictionary(reference.getSequenceDictionary()); - GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary()); - + GenomeLocParser genomeLocParser = new GenomeLocParser(reference.getSequenceDictionary()); + map = new GenomicMap(10000); - map.read(MAP_FILE); + map.read(genomeLocParser,MAP_FILE); System.out.println("Map loaded successfully: "+map.size()+" contigs"); diff --git a/java/src/org/broadinstitute/sting/playground/utils/GenomicMap.java b/java/src/org/broadinstitute/sting/playground/utils/GenomicMap.java index c22105ed5..49c136874 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/GenomicMap.java +++ b/java/src/org/broadinstitute/sting/playground/utils/GenomicMap.java @@ -40,12 +40,7 @@ import java.util.List; import java.util.Map; import java.util.Set; -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; +import net.sf.samtools.*; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.*; @@ -95,7 +90,7 @@ public class GenomicMap implements Iterable 0 && segments.get(segments.size()-1).getStop()+1 == newSegment.getStart() && segments.get(segments.size()-1).getContigIndex() == newSegment.getContigIndex()) @@ -408,7 +404,7 @@ public class GenomicMap implements Iterable, Cloneable, Serializable * start and stop position, and (optionally) the contig name */ protected final int contigIndex; - protected final long start; - protected final long stop; + protected final int start; + protected final int stop; protected final String contigName; // -------------------------------------------------------------------------------------------------------------- @@ -32,93 +32,25 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable // constructors // // -------------------------------------------------------------------------------------------------------------- - /*GenomeLoc( int contigIndex, final long start, final long stop ) { - MAX_CONTIG = Integer.MAX_VALUE; - if (start < 0) { throw new StingException("Bad start position " + start);} - if (stop < -1) { throw new StingException("Bad stop position " + stop); } // a negative -1 indicates it's not a meaningful end position - - this.contigIndex = contigIndex; - this.start = start; - this.contigName = null; // we just don't know - this.stop = stop == -1 ? start : stop; - }*/ protected GenomeLoc(final SAMRecord read) { this(read.getHeader().getSequence(read.getReferenceIndex()).getSequenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()); } - protected GenomeLoc( final String contig, final int contigIndex, final long start, final long stop ) { + protected GenomeLoc( final String contig, final int contigIndex, final int start, final int stop ) { this.contigName = contig; this.contigIndex = contigIndex; this.start = start; this.stop = stop; } - /*GenomeLoc( final int contig, final long pos ) { - this(contig, pos, pos ); - } - */ - protected GenomeLoc( final GenomeLoc toCopy ) { - this( toCopy.getContig(), toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() ); - } - - /** - * Returns true if we have a specified series of locations to process AND we are past the last - * location in the list. It means that, in a serial processing of the genome, that we are done. - * - * @param curr Current genome Location - * @param locs a list of genomic locations - * @return true if we are past the last location to process + * Return a new GenomeLoc at this same position. + * @return A GenomeLoc with the same contents as the current loc. */ - public static boolean pastFinalLocation(GenomeLoc curr, List locs) { - return (locs.size() > 0 && curr.isPast(locs.get(locs.size() - 1))); - } - - /** - * A key function that returns true if the proposed GenomeLoc curr is within the list of - * locations we are processing in this TraversalEngine - * - * @param curr the current location - * @param locs a list of genomic locations - * @return true if we should process GenomeLoc curr, otherwise false - */ - public static boolean inLocations(GenomeLoc curr, ArrayList locs) { - if ( locs.size() == 0 ) { - return true; - } else { - for ( GenomeLoc loc : locs ) { - //System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr)); - if (loc.overlapsP(curr)) - return true; - } - return false; - } - } - - public static void removePastLocs(GenomeLoc curr, List locs) { - while ( !locs.isEmpty() && curr.isPast(locs.get(0)) ) { - //System.out.println("At: " + curr + ", removing: " + locs.get(0)); - locs.remove(0); - } - } - - public static boolean overlapswithSortedLocsP(GenomeLoc curr, List locs, boolean returnTrueIfEmpty) { - if ( locs.isEmpty() ) - return returnTrueIfEmpty; - - // skip loci before intervals begin - if ( curr.contigIndex < locs.get(0).contigIndex ) - return false; - - for ( GenomeLoc loc : locs ) { - //System.out.printf(" Overlap %s vs. %s => %b%n", loc, curr, loc.overlapsP(curr)); - if ( loc.overlapsP(curr) ) - return true; - if ( curr.compareTo(loc) < 0 ) - return false; - } - return false; + @Override + public GenomeLoc clone() { + return new GenomeLoc(getContig(),getContigIndex(),getStart(),getStop()); } // @@ -129,8 +61,8 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable } public final int getContigIndex() { return this.contigIndex; } - public final long getStart() { return this.start; } - public final long getStop() { return this.stop; } + public final int getStart() { return this.start; } + public final int getStop() { return this.stop; } public final String toString() { if ( throughEndOfContigP() && atBeginningOfContigP() ) return getContig(); @@ -139,13 +71,8 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable else return String.format("%s:%d-%d", getContig(), getStart(), getStop()); } - - public final boolean isUnmapped() { return this.contigIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; } - public final boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; } - public final boolean atBeginningOfContigP() { return this.start == 1; } - - - public final boolean isSingleBP() { return stop == start; } + private boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; } + private boolean atBeginningOfContigP() { return this.start == 1; } public final boolean disjointP(GenomeLoc that) { return this.contigIndex != that.contigIndex || this.start > that.stop || that.start > this.stop; @@ -187,15 +114,6 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable return onSameContig(that) && getStart() <= that.getStart() && getStop() >= that.getStop(); } - /** - * Returns true if this GenomeLoc contains the start position of GenomeLoc that, on the same contig - * @param start - * @return - */ - public final boolean containsStartPosition(long start) { - return getStart() <= start && start <= getStop(); - } - public final boolean onSameContig(GenomeLoc that) { return (this.contigIndex == that.contigIndex); } @@ -215,26 +133,26 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable return this.compareTo(left) > -1 && this.compareTo(right) < 1; } + /** + * Tests whether this contig is completely before contig 'that'. + * @param that Contig to test against. + * @return true if this contig ends before 'that' starts; false if this is completely after or overlaps 'that'. + */ public final boolean isBefore( GenomeLoc that ) { int comparison = this.compareContigs(that); return ( comparison == -1 || ( comparison == 0 && this.getStop() < that.getStart() )); } + /** + * Tests whether this contig is completely after contig 'that'. + * @param that Contig to test against. + * @return true if this contig starts after 'that' ends; false if this is completely before or overlaps 'that'. + */ public final boolean isPast( GenomeLoc that ) { int comparison = this.compareContigs(that); return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStop() )); } - public final boolean startsBefore( GenomeLoc that ) { - int comparison = this.compareContigs(that); - return ( comparison == -1 || ( comparison == 0 && this.getStart() < that.getStart() )); - } - - public final boolean startsAfter( GenomeLoc that ) { - int comparison = this.compareContigs(that); - return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStart() )); - } - // Return the minimum distance between any pair of bases in this and that GenomeLocs: public final int minDistance( final GenomeLoc that ) { if (!this.onSameContig(that)) @@ -281,15 +199,6 @@ public class GenomeLoc implements Comparable, Cloneable, Serializable } - /** - * Return a new GenomeLoc at this same position. - * @return A GenomeLoc with the same contents as the current loc. - */ - @Override - public GenomeLoc clone() { - return new GenomeLoc(this); - } - /** * conpare this genomeLoc's contig to another genome loc * @param that the genome loc to compare contigs with diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 6336e113f..8ea108fe1 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -57,24 +57,33 @@ import org.broadinstitute.sting.utils.text.XReadLines; public class GenomeLocParser { private static Logger logger = Logger.getLogger(GenomeLocParser.class); - //private static final Pattern mPattern = Pattern.compile("([\\p{Print}&&[^:]]+):*([\\d,]+)?([\\+-])?([\\d,]+)?$"); // matches case 3 - - // -------------------------------------------------------------------------------------------------------------- // // Ugly global variable defining the optional ordering of contig elements // // -------------------------------------------------------------------------------------------------------------- //public static Map refContigOrdering = null; - protected static SAMSequenceDictionary contigInfo = null; + protected SAMSequenceDictionary contigInfo = null; /** - * do we have a contig ordering setup? - * - * @return true if the contig order is setup + * set our internal reference contig order + * @param refFile the reference file */ - public static boolean hasKnownContigOrdering() { - return contigInfo != null; + public GenomeLocParser(final ReferenceSequenceFile refFile) { + this(refFile.getSequenceDictionary()); + } + + public GenomeLocParser(SAMSequenceDictionary seqDict) { + if (seqDict == null) { // we couldn't load the reference dictionary + //logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs"); + throw new UserException.CommandLineException("Failed to load reference dictionary"); + } else if (contigInfo == null) { + contigInfo = seqDict; + logger.debug(String.format("Prepared reference sequence contig dictionary")); + for (SAMSequenceRecord contig : seqDict.getSequences()) { + logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength())); + } + } } /** @@ -84,7 +93,7 @@ public class GenomeLocParser { * * @return the sam sequence record */ - public static SAMSequenceRecord getContigInfo(final String contig) { + public SAMSequenceRecord getContigInfo(final String contig) { return contigInfo.getSequence(contig); } @@ -96,53 +105,13 @@ public class GenomeLocParser { * * @return the contig index, -1 if not found */ - public static int getContigIndex(final String contig, boolean exceptionOut) { + public int getContigIndex(final String contig, boolean exceptionOut) { if (contigInfo.getSequenceIndex(contig) == -1 && exceptionOut) throw new UserException.CommandLineException(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig)); return contigInfo.getSequenceIndex(contig); } - /** - * set our internal reference contig order - * - * @param refFile the reference file - * - * @return true if we were successful - */ - public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) { - return setupRefContigOrdering(refFile.getSequenceDictionary()); - } - - /** - * setup our internal reference contig order - * - * @param seqDict the sequence dictionary - * - * @return true if we were successful - */ - public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) { - if (seqDict == null) { // we couldn't load the reference dictionary - //logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs"); - throw new UserException.CommandLineException("Failed to load reference dictionary"); - } else if (contigInfo == null) { - contigInfo = seqDict; - logger.debug(String.format("Prepared reference sequence contig dictionary")); - for (SAMSequenceRecord contig : seqDict.getSequences()) { - logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength())); - } - } - return true; - } - - /** - * A package-protected method that can be used by the test system to reset the sequence dictionary - * being used. Use this method sparingly. - */ - static void clearRefContigOrdering() { - contigInfo = null; - } - /** * parse a genome interval, from a location string * @@ -155,7 +124,7 @@ public class GenomeLocParser { * */ - public static GenomeLoc parseGenomeInterval(final String str) { + public GenomeLoc parseGenomeInterval(final String str) { GenomeLoc ret = parseGenomeLoc(str); exceptionOnInvalidGenomeLocBounds(ret); return ret; @@ -173,13 +142,13 @@ public class GenomeLocParser { * @return a GenomeLoc representing the String * */ - public static GenomeLoc parseGenomeLoc(final String str) { + public GenomeLoc parseGenomeLoc(final String str) { // 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' //System.out.printf("Parsing location '%s'%n", str); String contig = null; - long start = 1; - long stop = -1; + int start = 1; + int stop = -1; final int colonIndex = str.indexOf(":"); if(colonIndex == -1) { @@ -210,7 +179,7 @@ public class GenomeLocParser { if (!isContigValid(contig)) throw new UserException("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?"); - if (stop == Integer.MAX_VALUE && hasKnownContigOrdering()) + if (stop == Integer.MAX_VALUE) // lookup the actually stop position! stop = getContigInfo(contig).getSequenceLength(); @@ -228,7 +197,7 @@ public class GenomeLocParser { * Parses a number like 1,000,000 into a long. * @param pos */ - private static long parsePosition(final String pos) { + private int parsePosition(final String pos) { //String x = pos.replaceAll(",", ""); - this was replaced because it uses regexps //System.out.println("Parsing position: '" + pos + "'"); if(pos.indexOf('-') != -1) { @@ -244,13 +213,13 @@ public class GenomeLocParser { continue; } else if(c < '0' || c > '9') { throw new NumberFormatException("Position: '" + pos + "' contains invalid chars." ); - } else { + } else { buffer.append(c); } } - return Long.parseLong(buffer.toString()); + return Integer.parseInt(buffer.toString()); } else { - return Long.parseLong(pos); + return Integer.parseInt(pos); } } @@ -263,7 +232,7 @@ public class GenomeLocParser { * * @return the list of merged locations */ - public static List mergeIntervalLocations(final List raw, IntervalMergingRule rule) { + public List mergeIntervalLocations(final List raw, IntervalMergingRule rule) { if (raw.size() <= 1) return raw; else { @@ -292,7 +261,7 @@ public class GenomeLocParser { * * @return True if the contig is valid. False otherwise. */ - private static boolean isContigValid(String contig) { + private boolean isContigValid(String contig) { int contigIndex = contigInfo.getSequenceIndex(contig); return contigIndex >= 0 && contigIndex < contigInfo.size(); } @@ -309,7 +278,7 @@ public class GenomeLocParser { * Validation: only checks that contig is valid * start/stop could be anything */ - public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) { + public GenomeLoc parseGenomeLoc(final String contig, int start, int stop) { if (!isContigValid(contig)) throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?"); return new GenomeLoc(contig, getContigIndex(contig,true), start, stop); @@ -327,7 +296,7 @@ public class GenomeLocParser { * @param allowEmptyIntervalList if false empty interval lists will return null * @return List List of Genome Locs that have been parsed from file */ - public static List intervalFileToList(final String file_name, boolean allowEmptyIntervalList) { + public List intervalFileToList(final String file_name, boolean allowEmptyIntervalList) { // try to open file File inputFile = new File(file_name); @@ -344,7 +313,7 @@ public class GenomeLocParser { // case: BED file if (file_name.toUpperCase().endsWith(".BED")) { - BedParser parser = new BedParser(inputFile); + BedParser parser = new BedParser(this,inputFile); return parser.getLocations(); } @@ -393,22 +362,8 @@ public class GenomeLocParser { * * @return the string that represents that contig name */ - private static String getSequenceNameFromIndex(int contigIndex) { - return GenomeLocParser.contigInfo.getSequence(contigIndex).getSequenceName(); - } - - /** - * create a genome loc, given the contig name, start, and stop - * - * @param contig the contig name - * @param start the starting position - * @param stop the stop position - * - * @return a new genome loc - */ - public static GenomeLoc createGenomeLoc(String contig, final long start, final long stop) { - checkSetup(); - return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), start, stop)); + private String getSequenceNameFromIndex(int contigIndex) { + return contigInfo.getSequence(contigIndex).getSequenceName(); } /** @@ -420,31 +375,21 @@ public class GenomeLocParser { * * @return a new genome loc - but don't exception out if it is invalid */ - public static GenomeLoc createPotentiallyInvalidGenomeLoc(String contig, final long start, final long stop) { - checkSetup(); - return new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,false), start, stop); + public GenomeLoc createPotentiallyInvalidGenomeLoc(String contig, final int start, final int stop) { + return new GenomeLoc(contig, getContigIndex(contig,false), start, stop); } /** - * create a genome loc, given the contig index, start, and stop + * create a genome loc, given the contig name, start, and stop * - * @param contigIndex the contig index - * @param start the start position - * @param stop the stop position + * @param contig the contig name + * @param start the starting position + * @param stop the stop position * * @return a new genome loc */ - public static GenomeLoc createGenomeLoc(int contigIndex, final long start, final long stop) { - checkSetup(); - if (start < 0) { - throw new ReviewedStingException("Bad start position " + start); - } - if (stop < -1) { - throw new ReviewedStingException("Bad stop position " + stop); - } // a negative -1 indicates it's not a meaningful end position - - - return new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop); + public GenomeLoc createGenomeLoc(String contig, final int start, final int stop) { + return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig,true), start, stop)); } /** @@ -454,25 +399,10 @@ public class GenomeLocParser { * * @return */ - public static GenomeLoc createGenomeLoc(final SAMRecord read) { - checkSetup(); + public GenomeLoc createGenomeLoc(final SAMRecord read) { return exceptionOnInvalidGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd())); } - - /** - * create a new genome loc, given the contig position, and a single position - * - * @param contig the contig name - * @param pos the postion - * - * @return a genome loc representing a single base at the specified postion on the contig - */ - public static GenomeLoc createGenomeLoc(final int contig, final long pos) { - checkSetup(); - return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contig), contig, pos, pos)); - } - /** * create a new genome loc, given the contig name, and a single position * @@ -481,14 +411,8 @@ public class GenomeLocParser { * * @return a genome loc representing a single base at the specified postion on the contig */ - public static GenomeLoc createGenomeLoc(final String contig, final long pos) { - checkSetup(); - return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), pos, pos)); - } - - public static GenomeLoc createGenomeLoc(final GenomeLoc toCopy) { - checkSetup(); - return exceptionOnInvalidGenomeLoc(new GenomeLoc(toCopy.getContig(), toCopy.getContigIndex(), toCopy.getStart(), toCopy.getStop())); + public GenomeLoc createGenomeLoc(final String contig, final int pos) { + return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig,true), pos, pos)); } /** @@ -505,7 +429,7 @@ public class GenomeLocParser { * @return the genome loc if it's valid, otherwise we throw an exception * */ - private static GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) { + private GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) { if (toReturn.getStart() < 0) { throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0"); } @@ -534,7 +458,7 @@ public class GenomeLocParser { * * @param locus Locus to verify. */ - private static void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) { + private void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) { int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength(); if(locus.getStart() > contigSize) throw new ReviewedStingException(String.format("GenomeLoc is invalid: locus start %d is after the end of contig %s",locus.getStart(),locus.getContig())); @@ -554,8 +478,7 @@ public class GenomeLocParser { * * performs interval-style validation: contig is valid and atart and stop less than the end */ - public static boolean validGenomeLoc(GenomeLoc loc) { - checkSetup(); + public boolean validGenomeLoc(GenomeLoc loc) { // quick check before we get the contig size, is the contig number valid if ((loc.getContigIndex() < 0) || // the contig index has to be positive (loc.getContigIndex() >= contigInfo.getSequences().size())) // the contig must be in the integer range of contigs) @@ -583,9 +506,8 @@ public class GenomeLocParser { * * performs interval-style validation: contig is valid and atart and stop less than the end */ - public static boolean validGenomeLoc(String contig, long start, long stop) { - checkSetup(); - return validGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig, false), start, stop)); + public boolean validGenomeLoc(String contig, int start, int stop) { + return validGenomeLoc(new GenomeLoc(contig, getContigIndex(contig, false), start, stop)); } @@ -600,58 +522,11 @@ public class GenomeLocParser { * * performs interval-style validation: contig is valid and atart and stop less than the end */ - public static boolean validGenomeLoc(int contigIndex, long start, long stop) { - checkSetup(); + public boolean validGenomeLoc(int contigIndex, int start, int stop) { if (contigIndex < 0 || contigIndex >= contigInfo.size()) return false; return validGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop)); } - /** - * Move this Genome loc to the next contig, with a start - * and stop of 1. - * - * @return true if we are not out of contigs, otherwise false if we're - * at the end of the genome (no more contigs to jump to). - */ - public static GenomeLoc toNextContig(GenomeLoc current) { - if (current.getContigIndex() + 1 >= contigInfo.getSequences().size()) { - return null; - } else - return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(current.getContigIndex() + 1), current.getContigIndex() + 1, 1, 1)); - } - - /** - * create a new genome loc, given an old location and a new contig - * - * @param loc the old location - * @param contig the new contig to set - * - * @return a new genome loc with an updated contig name and index - */ - public static GenomeLoc setContig(GenomeLoc loc, String contig) { - checkSetup(); - - int index = -1; - if ((index = contigInfo.getSequenceIndex(contig)) < 0) { - throw new ReviewedStingException("Contig name ( " + contig + " ) not in the set sequence dictionary."); - } - return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, index, loc.start, loc.getStop())); - } - - /** - * Sets contig index. UNSAFE since it 1) does NOT update contig name; 2) does not validate the index - * - * @param contig - */ - public static GenomeLoc setContigIndex(GenomeLoc loc, int contig) { - checkSetup(); - if ((contig >= GenomeLocParser.contigInfo.getSequences().size()) || (contig < 0)) { - throw new ReviewedStingException("Contig index ( " + contig + " ) is not in the sequence dictionary set."); - } - return exceptionOnInvalidGenomeLoc(new GenomeLoc(GenomeLocParser.contigInfo.getSequence(contig).getSequenceName(), contig, loc.start, loc.getStop())); - } - - /** * create a new genome loc from an existing loc, with a new start position * Note that this function will NOT explicitly check the ending offset, in case someone wants to @@ -662,8 +537,7 @@ public class GenomeLocParser { * * @return the newly created genome loc */ - public static GenomeLoc setStart(GenomeLoc loc, long start) { - checkSetup(); + public GenomeLoc setStart(GenomeLoc loc, int start) { return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop())); } @@ -677,8 +551,7 @@ public class GenomeLocParser { * * @return */ - public static GenomeLoc setStop(GenomeLoc loc, long stop) { - checkSetup(); + public GenomeLoc setStop(GenomeLoc loc, int stop) { return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop)); } @@ -689,7 +562,7 @@ public class GenomeLocParser { * * @return a new genome loc */ - public static GenomeLoc incPos(GenomeLoc loc) { + public GenomeLoc incPos(GenomeLoc loc) { return incPos(loc, 1); } @@ -701,41 +574,19 @@ public class GenomeLocParser { * * @return a new genome loc */ - public static GenomeLoc incPos(GenomeLoc loc, long by) { + public GenomeLoc incPos(GenomeLoc loc, int by) { return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by)); } /** - * create a new genome loc with an incremented position - * - * @param loc the location - * - * @return a new genome loc + * Creates a GenomeLoc than spans the entire contig. + * @param contigName Name of the contig. + * @return A locus spanning the entire contig. */ - public static GenomeLoc nextLoc(GenomeLoc loc) { - return incPos(loc); - } - - /** check to make sure that we've setup the contig information */ - private static void checkSetup() { - if (contigInfo == null) { - throw new ReviewedStingException("The GenomeLocParser hasn't been setup with a contig sequence yet"); - } - } - - /** - * compare two contig names, in the current context - * - * @param firstContig - * @param secondContig - * - * @return - */ - public static int compareContigs(String firstContig, String secondContig) { - checkSetup(); - Integer ref1 = GenomeLocParser.getContigIndex(firstContig,true); - Integer ref2 = GenomeLocParser.getContigIndex(secondContig,true); - return ref1.compareTo(ref2); - - } + public GenomeLoc createOverEntireContig(String contigName) { + SAMSequenceRecord contig = contigInfo.getSequence(contigName); + if(contig == null) + throw new ReviewedStingException("Unable to find contig named " + contigName); + return exceptionOnInvalidGenomeLoc(new GenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength())); + } } diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java index fcb73f1ce..eba412e0b 100755 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java @@ -38,25 +38,36 @@ import java.util.*; public class GenomeLocSortedSet extends AbstractSet { private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class); + private GenomeLocParser genomeLocParser; + // our private storage for the GenomeLoc's private List mArray = new ArrayList(); /** default constructor */ - public GenomeLocSortedSet() { + public GenomeLocSortedSet(GenomeLocParser parser) { + this.genomeLocParser = parser; } - public GenomeLocSortedSet(GenomeLoc e) { - this(); + public GenomeLocSortedSet(GenomeLocParser parser,GenomeLoc e) { + this(parser); add(e); } - public GenomeLocSortedSet(Collection l) { - this(); + public GenomeLocSortedSet(GenomeLocParser parser,Collection l) { + this(parser); for ( GenomeLoc e : l ) add(e); } + /** + * Gets the GenomeLocParser used to create this sorted set. + * @return The parser. Will never be null. + */ + public GenomeLocParser getGenomeLocParser() { + return genomeLocParser; + } + /** * get an iterator over this collection * @@ -201,7 +212,7 @@ public class GenomeLocSortedSet extends AbstractSet { logger.debug("removeRegions operation: i = " + i); } - return GenomeLocSortedSet.createSetFromList(good); + return createSetFromList(genomeLocParser,good); } private static final List EMPTY_LIST = new ArrayList(); @@ -221,8 +232,8 @@ public class GenomeLocSortedSet extends AbstractSet { * |------| + |--------| * */ - GenomeLoc before = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1); - GenomeLoc after = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop()); + GenomeLoc before = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1); + GenomeLoc after = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop()); if (after.getStop() - after.getStart() >= 0) { l.add(after); } @@ -255,9 +266,9 @@ public class GenomeLocSortedSet extends AbstractSet { GenomeLoc n; if (e.getStart() < g.getStart()) { - n = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop()); + n = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop()); } else { - n = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1); + n = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1); } // replace g with the new region @@ -283,9 +294,10 @@ public class GenomeLocSortedSet extends AbstractSet { * @return the GenomeLocSet of all references sequences as GenomeLoc's */ public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) { - GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(); + GenomeLocParser parser = new GenomeLocParser(dict); + GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet(parser); for (SAMSequenceRecord record : dict.getSequences()) { - returnSortedSet.add(GenomeLocParser.createGenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength())); + returnSortedSet.add(parser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength())); } return returnSortedSet; } @@ -297,8 +309,8 @@ public class GenomeLocSortedSet extends AbstractSet { * * @return the sorted genome loc list */ - public static GenomeLocSortedSet createSetFromList(List locs) { - GenomeLocSortedSet set = new GenomeLocSortedSet(); + public static GenomeLocSortedSet createSetFromList(GenomeLocParser parser,List locs) { + GenomeLocSortedSet set = new GenomeLocSortedSet(parser); set.addAll(locs); return set; } @@ -307,13 +319,13 @@ public class GenomeLocSortedSet extends AbstractSet { /** * return a deep copy of this collection. * - * @return a new GenomeLocSortedSet, indentical to the current GenomeLocSortedSet. + * @return a new GenomeLocSortedSet, identical to the current GenomeLocSortedSet. */ public GenomeLocSortedSet clone() { - GenomeLocSortedSet ret = new GenomeLocSortedSet(); + GenomeLocSortedSet ret = new GenomeLocSortedSet(genomeLocParser); for (GenomeLoc loc : this.mArray) { // ensure a deep copy - ret.mArray.add(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop())); + ret.mArray.add(genomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStop())); } return ret; } diff --git a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java index 31ffdaf81..b7ca11f9c 100644 --- a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java +++ b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java @@ -20,6 +20,8 @@ public class BedParser { // the buffered reader input private final BufferedReader mIn; + private GenomeLocParser genomeLocParser; + // our array of locations private List mLocations; @@ -28,7 +30,8 @@ public class BedParser { * * @param fl */ - public BedParser(File fl) { + public BedParser(GenomeLocParser genomeLocParser,File fl) { + this.genomeLocParser = genomeLocParser; try { mIn = new BufferedReader(new FileReader(fl)); } catch (FileNotFoundException e) { @@ -57,7 +60,7 @@ public class BedParser { List locArray = new ArrayList(); try { while ((line = mIn.readLine()) != null) { - locArray.add(parseLocation(line)); + locArray.add(parseLocation(genomeLocParser,line)); } } catch (IOException e) { throw new UserException.MalformedFile("Unable to parse line in BED file."); @@ -71,7 +74,7 @@ public class BedParser { * @param line the line, as a string * @return a parsed genome loc */ - public static GenomeLoc parseLocation(String line) { + public static GenomeLoc parseLocation(GenomeLocParser genomeLocParser,String line) { String contig; int start; int stop; @@ -85,7 +88,7 @@ public class BedParser { } // we currently drop the rest of the bed record, which can contain names, scores, etc - return GenomeLocParser.createGenomeLoc(contig, start, stop); + return genomeLocParser.createGenomeLoc(contig, start, stop); } diff --git a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index d2baa46c1..41de5ef96 100644 --- a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -45,7 +45,7 @@ public class DupUtils { } } - public static SAMRecord combineDuplicates(List duplicates, int maxQScore) { + public static SAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List duplicates, int maxQScore) { if ( duplicates.size() == 0 ) return null; @@ -63,7 +63,7 @@ public class DupUtils { //for ( SAMRecord read : duplicates ) { // System.out.printf("dup base %c %d%n", (char)read.getReadBases()[i], read.getBaseQualities()[i]); //} - Pair baseAndQual = combineBaseProbs(duplicates, i, maxQScore); + Pair baseAndQual = combineBaseProbs(genomeLocParser,duplicates, i, maxQScore); bases[i] = baseAndQual.getFirst(); quals[i] = baseAndQual.getSecond(); } @@ -114,8 +114,8 @@ public class DupUtils { System.out.printf("%n"); } - private static Pair combineBaseProbs(List duplicates, int readOffset, int maxQScore) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(duplicates.get(0)); + private static Pair combineBaseProbs(GenomeLocParser genomeLocParser,List duplicates, int readOffset, int maxQScore) { + GenomeLoc loc = genomeLocParser.createGenomeLoc(duplicates.get(0)); ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, duplicates, readOffset); final boolean debug = false; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java index c01596ae7..85c505935 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java @@ -114,11 +114,10 @@ public class Haplotype { // Create location for all haplotypes - long startLoc = ref.getWindow().getStart() + startIdxInReference; - long stopLoc = startLoc + haplotypeSize-1; + int startLoc = ref.getWindow().getStart() + startIdxInReference; + int stopLoc = startLoc + haplotypeSize-1; - GenomeLoc locus = GenomeLocParser.createGenomeLoc(ref.getLocus().getContigIndex(),startLoc, - stopLoc); + GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc); for (Allele a : vc.getAlleles()) { diff --git a/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java b/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java index ee5064289..e722ac196 100644 --- a/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java +++ b/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.interval; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; @@ -56,17 +57,17 @@ public class IntervalFileMergingIterator implements Iterator { private IntervalMergingRule myRule; private File myFile; - public IntervalFileMergingIterator(File f, IntervalMergingRule rule) { + public IntervalFileMergingIterator(GenomeLocParser genomeLocParser,File f, IntervalMergingRule rule) { myFile = f; try { XReadLines reader = new XReadLines(f); if (f.getName().toUpperCase().endsWith(".BED")) { - it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( reader.iterator(), + it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(), StringToGenomeLocIteratorAdapter.FORMAT.BED ) ) ; } else { - it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( reader.iterator(), + it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(), StringToGenomeLocIteratorAdapter.FORMAT.GATK ) ) ; } } catch ( FileNotFoundException e ) { diff --git a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 9f68ac87a..a2967d455 100644 --- a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -29,7 +29,7 @@ public class IntervalUtils { * @param allowEmptyIntervalList If false instead of an empty interval list will return null. * @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used. */ - public static List parseIntervalArguments(List argList, boolean allowEmptyIntervalList) { + public static List parseIntervalArguments(GenomeLocParser parser, List argList, boolean allowEmptyIntervalList) { List rawIntervals = new ArrayList(); // running list of raw GenomeLocs if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to @@ -51,7 +51,7 @@ public class IntervalUtils { // if it's a file, add items to raw interval list if (isIntervalFile(fileOrInterval)) { try { - rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval, allowEmptyIntervalList)); + rawIntervals.addAll(parser.intervalFileToList(fileOrInterval, allowEmptyIntervalList)); } catch (Exception e) { throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in either format.", e); @@ -60,7 +60,7 @@ public class IntervalUtils { // otherwise treat as an interval -> parse and add to raw interval list else { - rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval)); + rawIntervals.add(parser.parseGenomeInterval(fileOrInterval)); } } } @@ -121,13 +121,13 @@ public class IntervalUtils { * @param mergingRule A descriptor for the type of merging to perform. * @return A sorted, merged version of the intervals passed in. */ - public static GenomeLocSortedSet sortAndMergeIntervals(List intervals, IntervalMergingRule mergingRule) { + public static GenomeLocSortedSet sortAndMergeIntervals(GenomeLocParser parser, List intervals, IntervalMergingRule mergingRule) { // sort raw interval list Collections.sort(intervals); // now merge raw interval list - intervals = GenomeLocParser.mergeIntervalLocations(intervals, mergingRule); + intervals = parser.mergeIntervalLocations(intervals, mergingRule); - return GenomeLocSortedSet.createSetFromList(intervals); + return GenomeLocSortedSet.createSetFromList(parser,intervals); } /** diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index 15583da7a..bfa93e005 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -90,10 +90,6 @@ public class ExtendedEventPileupElement extends PileupElement { public Type getType() { return type; } - public GenomeLoc getLocation() { - return GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),read.getAlignmentStart()+offset, read.getAlignmentStart()+offset+eventLength); - } - // The offset can be negative with insertions at the start of the read, but a valid base does exist at this position with // a valid base quality. The following code attempts to compensate for that.' diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java index 16293ba62..ce6ca570c 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java @@ -1,9 +1,6 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMRecordIterator; -import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.*; import java.io.InputStream; import java.io.ByteArrayInputStream; @@ -31,17 +28,23 @@ import org.broadinstitute.sting.utils.GenomeLocParser; */ public class ArtificialSAMFileReader extends SAMFileReader { + /** + * The parser, for GenomeLocs. + */ + private final GenomeLocParser genomeLocParser; + /** * Backing data store of reads. */ - private List reads = null; + private final List reads; /** * Construct an artificial SAM file reader. * @param reads Reads to use as backing data source. */ - public ArtificialSAMFileReader(SAMRecord... reads) { + public ArtificialSAMFileReader(SAMSequenceDictionary sequenceDictionary,SAMRecord... reads) { super( createEmptyInputStream(),true ); + this.genomeLocParser = new GenomeLocParser(sequenceDictionary); this.reads = Arrays.asList(reads); } @@ -50,11 +53,11 @@ public class ArtificialSAMFileReader extends SAMFileReader { */ @Override public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) { - GenomeLoc region = GenomeLocParser.createGenomeLoc(sequence, start, end); + GenomeLoc region = genomeLocParser.createGenomeLoc(sequence, start, end); List coveredSubset = new ArrayList(); for( SAMRecord read: reads ) { - GenomeLoc readPosition = GenomeLocParser.createGenomeLoc(read); + GenomeLoc readPosition = genomeLocParser.createGenomeLoc(read); if( contained && region.containsP(readPosition) ) coveredSubset.add(read); else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read); } diff --git a/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java b/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java index 1d75e00b1..01f1dfe96 100755 --- a/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ComparableSAMRecord.java @@ -32,11 +32,9 @@ import org.broadinstitute.sting.utils.GenomeLocParser; public class ComparableSAMRecord implements Comparable { private SAMRecord record; - private GenomeLoc loc; public ComparableSAMRecord(SAMRecord record) { this.record = record; - this.loc = GenomeLocParser.createGenomeLoc(record); } public SAMRecord getRecord() { @@ -44,8 +42,8 @@ public class ComparableSAMRecord implements Comparable { } public int compareTo(ComparableSAMRecord o) { - // first sort by start position - int comparison = loc.compareTo(o.loc); + // first sort by start position -- with not coverflow because both are guaranteed to be positive. + int comparison = record.getAlignmentStart() - o.record.getAlignmentStart(); // if the reads have the same start position, we must give a non-zero comparison // (because java Sets often require "consistency with equals") if ( comparison == 0 ) @@ -63,7 +61,7 @@ public class ComparableSAMRecord implements Comparable { return true; ComparableSAMRecord csr = (ComparableSAMRecord)obj; - if ( loc.compareTo(csr.loc) != 0 ) + if(record.getAlignmentStart() != csr.record.getAlignmentStart()) return false; if ( !record.getReadName().equals(csr.getRecord().getReadName()) ) return false; diff --git a/java/test/org/broadinstitute/sting/WalkerTest.java b/java/test/org/broadinstitute/sting/WalkerTest.java index 80562b77a..e8712fcb0 100755 --- a/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/java/test/org/broadinstitute/sting/WalkerTest.java @@ -30,7 +30,6 @@ import org.broad.tribble.index.IndexFactory; import org.broad.tribble.vcf.VCFCodec; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.Utils; @@ -311,8 +310,6 @@ public class WalkerTest extends BaseTest { * @return a pair of file and string lists */ private Pair, List> executeTest(String name, List md5s, List tmpFiles, String args, Class expectedException) { - GenomeLocParserTestUtils.clearSequenceDictionary(); - CommandLineGATK instance = new CommandLineGATK(); String[] command; diff --git a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java index 146c11231..ffd31c9cd 100644 --- a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java @@ -69,16 +69,18 @@ public class VariantJEXLContextUnitTest extends BaseTest { private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH; static SAMFileHeader header; + private static GenomeLocParser genomeLocParser; + @BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); try { exp = new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.createExpression(expression)); } catch (Exception e) { Assert.fail("Unable to create expression" + e.getMessage()); } - snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10); + snpLoc = genomeLocParser.createGenomeLoc("chr1", 10, 10); } @BeforeMethod @@ -144,7 +146,7 @@ public class VariantJEXLContextUnitTest extends BaseTest { List alleles = Arrays.asList(Aref, T); VariantContext vc = new VariantContext("test", snpLoc.getContig(), snpLoc.getStart(), snpLoc.getStop(), alleles); - return new JEXLMap(Arrays.asList(exp),vc); + return new JEXLMap(genomeLocParser,Arrays.asList(exp),vc); } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java index 4665f11af..9807cede4 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/AllLocusViewUnitTest.java @@ -44,16 +44,16 @@ public class AllLocusViewUnitTest extends LocusViewTemplate { // TODO: Should skip over loci not in the given range. GenomeLoc firstLoc = range.get(0); GenomeLoc lastLoc = range.get(range.size()-1); - GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop()); + GenomeLoc bounds = genomeLocParser.createGenomeLoc(firstLoc.getContig(),firstLoc.getStart(),lastLoc.getStop()); - for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { - GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i); + for( int i = bounds.getStart(); i <= bounds.getStop(); i++ ) { + GenomeLoc site = genomeLocParser.createGenomeLoc("chr1",i); AlignmentContext locusContext = allLocusView.next(); Assert.assertEquals(locusContext.getLocation(), site, "Locus context location is incorrect"); int expectedReadsAtSite = 0; for( SAMRecord read: reads ) { - if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) { + if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) { Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads"); expectedReadsAtSite++; } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java index 625fc22d9..75716eae6 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/CoveredLocusViewUnitTest.java @@ -47,14 +47,14 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate { // TODO: Should skip over loci not in the given range. GenomeLoc firstLoc = range.get(0); GenomeLoc lastLoc = range.get(range.size()-1); - GenomeLoc bounds = GenomeLocParser.createGenomeLoc(firstLoc.getContigIndex(),firstLoc.getStart(),lastLoc.getStop()); + GenomeLoc bounds = genomeLocParser.createGenomeLoc(firstLoc.getContig(),firstLoc.getStart(),lastLoc.getStop()); - for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) { - GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i); + for( int i = bounds.getStart(); i <= bounds.getStop(); i++ ) { + GenomeLoc site = genomeLocParser.createGenomeLoc("chr1",i); int expectedReadsAtSite = 0; for( SAMRecord read: reads ) { - if( GenomeLocParser.createGenomeLoc(read).containsP(site) ) + if( genomeLocParser.createGenomeLoc(read).containsP(site) ) expectedReadsAtSite++; } @@ -68,7 +68,7 @@ public class CoveredLocusViewUnitTest extends LocusViewTemplate { Assert.assertEquals(locusContext.getReads().size(), expectedReadsAtSite, "Found wrong number of reads at site"); for( SAMRecord read: reads ) { - if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) + if(genomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) Assert.assertTrue(locusContext.getReads().contains(read),"Target locus context does not contain reads"); } } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java index 26efb0e13..0375e0efd 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java @@ -57,11 +57,15 @@ public class LocusReferenceViewUnitTest extends ReferenceViewTemplate { @Test public void testOverlappingReferenceBases() { - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length()))); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), + sequenceFile.getSequence("chrM").length() - 10, + sequenceFile.getSequence("chrM").length()))); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); - byte[] results = view.getReferenceBases(GenomeLocParser.createGenomeLoc(0, sequenceFile.getSequence("chrM").length() - 10, sequenceFile.getSequence("chrM").length() + 9)); + byte[] results = view.getReferenceBases(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), + sequenceFile.getSequence("chrM").length() - 10, + sequenceFile.getSequence("chrM").length() + 9)); System.out.printf("results are %s%n", new String(results)); Assert.assertEquals(results.length, 20); for (int x = 0; x < results.length; x++) { @@ -74,16 +78,16 @@ public class LocusReferenceViewUnitTest extends ReferenceViewTemplate { /** Queries outside the bounds of the shard should result in reference context window trimmed at the shard boundary. */ @Test public void testBoundsFailure() { - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc(0, 1, 50))); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), 1, 50))); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, sequenceFile, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); - GenomeLoc locus = GenomeLocParser.createGenomeLoc(0, 50, 51); + GenomeLoc locus = genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), 50, 51); ReferenceContext rc = view.getReferenceContext(locus); Assert.assertTrue(rc.getLocus().equals(locus)); - Assert.assertTrue(rc.getWindow().equals(GenomeLocParser.createGenomeLoc(0,50))); + Assert.assertTrue(rc.getWindow().equals(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(),50))); Assert.assertTrue(rc.getBases().length == 1); } @@ -94,10 +98,10 @@ public class LocusReferenceViewUnitTest extends ReferenceViewTemplate { * @param loc */ protected void validateLocation( GenomeLoc loc ) { - Shard shard = new MockLocusShard(Collections.singletonList(loc)); - GenomeLocusIterator shardIterator = new GenomeLocusIterator(loc); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(loc)); + GenomeLocusIterator shardIterator = new GenomeLocusIterator(genomeLocParser,loc); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, loc, null, sequenceFile, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, loc, null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); while (shardIterator.hasNext()) { diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index 078978586..e4d478d61 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -16,7 +16,6 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -39,23 +38,23 @@ import java.util.*; public abstract class LocusViewTemplate extends BaseTest { protected static ReferenceSequenceFile sequenceSourceFile = null; + protected GenomeLocParser genomeLocParser = null; @BeforeClass public void setupGenomeLoc() throws FileNotFoundException { - GenomeLocParserTestUtils.clearSequenceDictionary(); sequenceSourceFile = fakeReferenceSequenceFile(); - GenomeLocParser.setupRefContigOrdering(sequenceSourceFile); + genomeLocParser = new GenomeLocParser(sequenceSourceFile); } @Test public void emptyAlignmentContextTest() { SAMRecordIterator iterator = new SAMRecordIterator(); - GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); - Shard shard = new LocusShard(new SAMDataSource(Collections.emptyList()),Collections.singletonList(shardBounds),Collections.emptyMap()); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5); + Shard shard = new LocusShard(new SAMDataSource(Collections.emptyList(),genomeLocParser),Collections.singletonList(shardBounds),Collections.emptyMap()); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -67,11 +66,11 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 1, 5); SAMRecordIterator iterator = new SAMRecordIterator(read); - GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); - Shard shard = new MockLocusShard(Collections.singletonList(shardBounds)); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(shardBounds)); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); @@ -83,10 +82,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 1, 5); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -97,10 +96,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -111,10 +110,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 3, 7); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -125,10 +124,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 1, 10); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -139,10 +138,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read = buildSAMRecord("chr1", 6, 15); SAMRecordIterator iterator = new SAMRecordIterator(read); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); @@ -154,10 +153,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read2 = buildSAMRecord("chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -173,10 +172,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read4 = buildSAMRecord("chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -192,10 +191,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read4 = buildSAMRecord("chr1", 5, 9); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -213,10 +212,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecord read6 = buildSAMRecord("chr1", 6, 10); SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 1, 10))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(), LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); @@ -241,10 +240,10 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(read01, read02, read03, read04, read05, read06, read07, read08, read09, read10, read11, read12); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chr1", 6, 15))); - WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); - LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), window.getLocus(), window, null, null); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); LocusView view = createView(dataProvider); List expectedReads = new ArrayList(); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java index fe58e166a..9c84c3e18 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewUnitTest.java @@ -52,6 +52,7 @@ import java.util.*; * test out the ReadBasedReferenceOrderedView class */ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { + private GenomeLocParser genomeLocParser; private static int startingChr = 1; private static int endingChr = 2; @@ -62,7 +63,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { @BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } @BeforeMethod @@ -76,15 +77,15 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { for (int x = 1; x < 11; x++) { SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "name", 0, x, 10); } - GenomeLoc start = GenomeLocParser.createGenomeLoc(0, 0, 0); + GenomeLoc start = genomeLocParser.createGenomeLoc(header.getSequenceDictionary().getSequence(0).getSequenceName(), 0, 0); List list = new ArrayList(); - list.add(new RMDDataState(null, new FakePeekingRODIterator(start, "fakeName"))); + list.add(new RMDDataState(null, new FakePeekingRODIterator(genomeLocParser,start, "fakeName"))); ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list)); for (SAMRecord rec : records) { ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec); - Map> map = tracker.getReadOffsetMapping(); - for (Long i : map.keySet()) { + Map> map = tracker.getReadOffsetMapping(); + for (Integer i : map.keySet()) { Assert.assertEquals(map.get(i).size(), 1); } Assert.assertEquals(map.keySet().size(), 10); @@ -96,15 +97,16 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest { class FakePeekingRODIterator implements LocationAwareSeekableRODIterator { + private GenomeLocParser genomeLocParser; // current location private GenomeLoc location; private GATKFeature curROD; private final String name; - public FakePeekingRODIterator(GenomeLoc startingLoc, String name) { + public FakePeekingRODIterator(GenomeLocParser genomeLocParser, GenomeLoc startingLoc, String name) { this.name = name; - this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(), startingLoc.getStart() + 1, startingLoc.getStop() + 1); + this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1); } @Override @@ -134,7 +136,7 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator { public RODRecordList next() { System.err.println("Next -> " + location); curROD = new ReadMetaDataTrackerUnitTest.FakeRODatum(location, name); - location = GenomeLocParser.createGenomeLoc(location.getContigIndex(), location.getStart() + 1, location.getStop() + 1); + location = genomeLocParser.createGenomeLoc(location.getContig(), location.getStart() + 1, location.getStop() + 1); FakeRODRecordList list = new FakeRODRecordList(); list.add(curROD); return list; diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewUnitTest.java index bc9b685cf..b00f7ee6a 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadReferenceViewUnitTest.java @@ -70,7 +70,7 @@ public class ReadReferenceViewUnitTest extends ReferenceViewTemplate { final long contigStart = selectedContig.getSequenceLength() - (readLength - overlap - 1); final long contigStop = selectedContig.getSequenceLength() + overlap; - ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null); + ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,genomeLocParser,null,sequenceFile,null); ReadReferenceView view = new ReadReferenceView(dataProvider); SAMRecord rec = buildSAMRecord(selectedContig.getSequenceName(),(int)contigStart,(int)contigStop); @@ -98,7 +98,7 @@ public class ReadReferenceViewUnitTest extends ReferenceViewTemplate { protected void validateLocation( GenomeLoc loc ) { SAMRecord read = buildSAMRecord( loc.getContig(), (int)loc.getStart(), (int)loc.getStop() ); - ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,null,sequenceFile,null); + ReadShardDataProvider dataProvider = new ReadShardDataProvider(null,genomeLocParser,null,sequenceFile,null); ReadReferenceView view = new ReadReferenceView(dataProvider); ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(loc.getContig(),loc.getStart(),loc.getStop()); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index 87aeffbc5..8227435ae 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -43,17 +43,20 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { * Sequence file. */ private static IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; /** * our track builder */ - RMDTrackBuilder builder = new RMDTrackBuilder(); + RMDTrackBuilder builder = null; @BeforeClass public void init() throws FileNotFoundException { // sequence seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); + builder = new RMDTrackBuilder(); + builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser); } /** @@ -61,11 +64,11 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { */ @Test public void testNoBindings() { - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); - LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList()); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); + LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList()); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10)); Assert.assertEquals(tracker.getAllRods().size(), 0, "The tracker should not have produced any data"); } @@ -76,14 +79,14 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { public void testSingleBinding() { File file = new File(testDir + "TabularDataTest.dat"); RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest",file); - ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(track,false); + ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); - LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource)); + LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); TableFeature datum = tracker.lookup("tableTest",TableFeature.class); Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect"); @@ -100,17 +103,17 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { RMDTrack track = builder.createInstanceOfTrack(TableCodec.class,"tableTest1",file); - ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(track,false); + ReferenceOrderedDataSource dataSource1 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track,false); RMDTrack track2 = builder.createInstanceOfTrack(TableCodec.class,"tableTest2",file); - ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(track2,false); + ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(seq.getSequenceDictionary(),genomeLocParser,track2,false); - Shard shard = new MockLocusShard(Collections.singletonList(GenomeLocParser.createGenomeLoc("chrM",1,30))); + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chrM",1,30))); - LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2)); + LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); TableFeature datum1 = tracker.lookup("tableTest1",TableFeature.class); Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect"); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java index 0bb142ef7..c8bcf559b 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceViewTemplate.java @@ -5,7 +5,6 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -32,7 +31,8 @@ public abstract class ReferenceViewTemplate extends BaseTest { /** * The fasta, for comparison. */ - protected static IndexedFastaSequenceFile sequenceFile = null; + protected IndexedFastaSequenceFile sequenceFile = null; + protected GenomeLocParser genomeLocParser = null; // // The bulk of sequence retrieval is tested by IndexedFastaSequenceFile, but we'll run a few spot @@ -43,9 +43,8 @@ public abstract class ReferenceViewTemplate extends BaseTest { */ @BeforeClass public void initialize() throws FileNotFoundException { - GenomeLocParserTestUtils.clearSequenceDictionary(); sequenceFile = new IndexedFastaSequenceFile( new File(hg18Reference) ); - GenomeLocParser.setupRefContigOrdering(sequenceFile); + genomeLocParser = new GenomeLocParser(sequenceFile); } /** @@ -53,7 +52,7 @@ public abstract class ReferenceViewTemplate extends BaseTest { */ @Test public void testReferenceStart() { - validateLocation( GenomeLocParser.createGenomeLoc(0,1,25) ); + validateLocation( genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(),1,25) ); } /** @@ -63,9 +62,9 @@ public abstract class ReferenceViewTemplate extends BaseTest { public void testReferenceEnd() { // Test the last 25 bases of the first contig. SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1); - final long contigStart = selectedContig.getSequenceLength() - 24; - final long contigStop = selectedContig.getSequenceLength(); - validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); + final int contigStart = selectedContig.getSequenceLength() - 24; + final int contigStop = selectedContig.getSequenceLength(); + validateLocation( genomeLocParser.createGenomeLoc(selectedContig.getSequenceName(),contigStart,contigStop) ); } /** @@ -76,7 +75,7 @@ public abstract class ReferenceViewTemplate extends BaseTest { // Test the last 25 bases of the first contig. int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2; SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition); - validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),1,25) ); + validateLocation( genomeLocParser.createGenomeLoc(selectedContig.getSequenceName(),1,25) ); } @@ -88,9 +87,9 @@ public abstract class ReferenceViewTemplate extends BaseTest { // Test the last 25 bases of the first contig. int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2; SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition); - final long contigStart = selectedContig.getSequenceLength() - 24; - final long contigStop = selectedContig.getSequenceLength(); - validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) ); + final int contigStart = selectedContig.getSequenceLength() - 24; + final int contigStop = selectedContig.getSequenceLength(); + validateLocation( genomeLocParser.createGenomeLoc(selectedContig.getSequenceName(),contigStart,contigStop) ); } protected abstract void validateLocation( GenomeLoc loc ); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderUnitTest.java index 88e2ddfd0..638b04a92 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProviderUnitTest.java @@ -36,7 +36,7 @@ public class ShardDataProviderUnitTest extends BaseTest { @BeforeMethod public void createProvider() { - provider = new LocusShardDataProvider( null,null,null,null,null,null ); + provider = new LocusShardDataProvider( null,null,null,null,null,null,null ); } /** diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java index f5e90a8be..8b65f0900 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.List; import java.util.Collections; @@ -38,7 +39,7 @@ import java.util.Collections; * @version 0.1 */ public class MockLocusShard extends LocusShard { - public MockLocusShard(final List intervals) { - super(new SAMDataSource(Collections.emptyList()),intervals,null); + public MockLocusShard(final GenomeLocParser genomeLocParser,final List intervals) { + super(new SAMDataSource(Collections.emptyList(),genomeLocParser),intervals,null); } } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java index c0149580d..837e3735e 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java @@ -40,30 +40,34 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { private RMDTrack rod = null; + private IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; + private GenomeLoc testSite1; private GenomeLoc testSite2; private GenomeLoc testSite3; @BeforeClass public void init() throws FileNotFoundException { - File sequenceFile = new File(hg18Reference); - GenomeLocParser.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile)); + seq = new IndexedFastaSequenceFile(new File(hg18Reference)); + genomeLocParser = new GenomeLocParser(seq); - testSite1 = GenomeLocParser.createGenomeLoc("chrM",10); - testSite2 = GenomeLocParser.createGenomeLoc("chrM",20); - testSite3 = GenomeLocParser.createGenomeLoc("chrM",30); + testSite1 = genomeLocParser.createGenomeLoc("chrM",10); + testSite2 = genomeLocParser.createGenomeLoc("chrM",20); + testSite3 = genomeLocParser.createGenomeLoc("chrM",30); } @BeforeMethod public void setUp() { File file = new File(testDir + "TabularDataTest.dat"); RMDTrackBuilder builder = new RMDTrackBuilder(); + builder.setSequenceDictionary(seq.getSequenceDictionary(),genomeLocParser); rod = builder.createInstanceOfTrack(TableCodec.class, "tableTest", file); } @Test public void testCreateSingleIterator() { - ResourcePool iteratorPool = new ReferenceOrderedDataPool(rod, false); + ResourcePool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); LocationAwareSeekableRODIterator iterator = (LocationAwareSeekableRODIterator)iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -84,7 +88,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testCreateMultipleIterators() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); LocationAwareSeekableRODIterator iterator1 = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); // Create a new iterator at position 2. @@ -134,7 +138,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testIteratorConservation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser,rod, false); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite1) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); @@ -169,7 +173,7 @@ public class ReferenceOrderedDataPoolUnitTest extends BaseTest { @Test public void testIteratorCreation() { - ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(rod, false); + ReferenceOrderedDataPool iteratorPool = new ReferenceOrderedDataPool(seq.getSequenceDictionary(),genomeLocParser, rod, false); LocationAwareSeekableRODIterator iterator = iteratorPool.iterator( new MappedStreamSegment(testSite3) ); Assert.assertEquals(iteratorPool.numIterators(), 1, "Number of iterators in the pool is incorrect"); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java index 96f21e698..373cb6634 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java @@ -52,6 +52,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { private List readers; private IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; /** * This function does the setup of our parser, before each method call. @@ -64,7 +65,7 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { // sequence seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary()); } /** @@ -88,8 +89,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); // the sharding strat. - SAMDataSource data = new SAMDataSource(readers); - ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); + SAMDataSource data = new SAMDataSource(readers,genomeLocParser); + ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000,genomeLocParser); int count = 0; try { @@ -132,8 +133,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); // the sharding strat. - SAMDataSource data = new SAMDataSource(readers); - ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); + SAMDataSource data = new SAMDataSource(readers,genomeLocParser); + ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000,genomeLocParser); ArrayList readcountPerShard = new ArrayList(); ArrayList readcountPerShard2 = new ArrayList(); @@ -174,8 +175,8 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { count = 0; // the sharding strat. - data = new SAMDataSource(readers); - strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); + data = new SAMDataSource(readers,genomeLocParser); + strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000, genomeLocParser); logger.debug("Pile two:"); try { diff --git a/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java b/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java index ffad08047..166eb8a1d 100644 --- a/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/filters/ReadGroupBlackListFilterUnitTest.java @@ -43,8 +43,6 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest { groupRecord.setAttribute("PL", PLATFORM_PREFIX + (((i-1)%2)+1)); groupRecord.setAttribute("PU", PLATFORM_UNIT_PREFIX + (((i-1)%3)+1)); } - - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); } @Test(expectedExceptions=ReviewedStingException.class) diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java index e81c80b96..dfef86941 100755 --- a/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/BoundedReadIteratorUnitTest.java @@ -72,10 +72,6 @@ public class BoundedReadIteratorUnitTest extends BaseTest { @BeforeMethod public void doForEachTest() throws FileNotFoundException { fl = new ArrayList(); - - // sequence - seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary()); } diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index f01c4869c..7c8cd922b 100644 --- a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -29,10 +29,12 @@ public class LocusIteratorByStateUnitTest extends BaseTest { private static SAMFileHeader header; private LocusIteratorByState li; + private GenomeLocParser genomeLocParser; + @BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } @Test @@ -61,7 +63,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { List reads = Arrays.asList(before,during,after); // create the iterator by state with the fake reads and fake records - li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes); + li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes,genomeLocParser); boolean foundExtendedEventPileup = false; while (li.hasNext()) { @@ -113,7 +115,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { List reads = Arrays.asList(before,during,after); // create the iterator by state with the fake reads and fake records - li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes); + li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()),readAttributes,genomeLocParser); boolean foundExtendedEventPileup = false; while (li.hasNext()) { diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerUnitTest.java index 1450a975f..2198c461d 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerUnitTest.java @@ -56,10 +56,12 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { private static SAMFileHeader header; private Set nameSet; + private GenomeLocParser genomeLocParser; + @BeforeClass public void beforeClass() { header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } @BeforeMethod @@ -75,7 +77,7 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - for (Long x : tracker.getReadOffsetMapping().keySet()) { + for (Integer x : tracker.getReadOffsetMapping().keySet()) { count++; Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 2); } @@ -89,7 +91,7 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - for (Long x : tracker.getReadOffsetMapping().keySet()) { + for (Integer x : tracker.getReadOffsetMapping().keySet()) { count++; Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1); } @@ -103,8 +105,8 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - Map> map = tracker.getReadOffsetMapping("default"); - for (Long x : map.keySet()) { + Map> map = tracker.getReadOffsetMapping("default"); + for (Integer x : map.keySet()) { count++; Assert.assertEquals(map.get(x).size(), 1); } @@ -117,8 +119,8 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type // count the positions int count = 0; - Map> map = tracker.getReadOffsetMapping(FakeRODatum.class); - for (Long x : map.keySet()) { + Map> map = tracker.getReadOffsetMapping(FakeRODatum.class); + for (Integer x : map.keySet()) { count++; Assert.assertEquals(map.get(x).size(), 2); } @@ -136,8 +138,8 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type // count the positions int count = 0; - Map> map = tracker.getReadOffsetMapping(FakeRODatum.class); - for (Long x : map.keySet()) { + Map> map = tracker.getReadOffsetMapping(FakeRODatum.class); + for (Integer x : map.keySet()) { count++; Assert.assertEquals(map.get(x).size(), y + 2); } @@ -155,8 +157,8 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - Map> map = tracker.getReadOffsetMapping(Fake2RODatum.class); - for (long x : map.keySet()) { + Map> map = tracker.getReadOffsetMapping(Fake2RODatum.class); + for (int x : map.keySet()) { count++; Assert.assertEquals(map.get(x).size(), 1); } @@ -169,7 +171,7 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - for (Long x : tracker.getReadOffsetMapping().keySet()) { + for (Integer x : tracker.getReadOffsetMapping().keySet()) { count++; Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1); } @@ -182,7 +184,7 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { // count the positions int count = 0; - for (Long x : tracker.getContigOffsetMapping().keySet()) { + for (Integer x : tracker.getContigOffsetMapping().keySet()) { count++; Assert.assertEquals(tracker.getContigOffsetMapping().get(x).size(), 1); } @@ -200,9 +202,9 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { */ private ReadMetaDataTracker getRMDT(int incr, Set names, boolean alternateTypes) { SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10); - TreeMap data = new TreeMap(); + TreeMap data = new TreeMap(); for (int x = 0; x < record.getAlignmentEnd(); x += incr) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(record.getReferenceIndex(), record.getAlignmentStart() + x, record.getAlignmentStart() + x); + GenomeLoc loc = genomeLocParser.createGenomeLoc(record.getReferenceName(), record.getAlignmentStart() + x, record.getAlignmentStart() + x); RODMetaDataContainer set = new RODMetaDataContainer(); int cnt = 0; @@ -213,9 +215,9 @@ public class ReadMetaDataTrackerUnitTest extends BaseTest { set.addEntry(new FakeRODatum(loc, name)); cnt++; } - data.put((long) record.getAlignmentStart() + x, set); + data.put(record.getAlignmentStart() + x, set); } - ReadMetaDataTracker tracker = new ReadMetaDataTracker(record, data); + ReadMetaDataTracker tracker = new ReadMetaDataTracker(genomeLocParser, record, data); return tracker; } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java index f95a3b193..0e5744697 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java @@ -53,12 +53,13 @@ import java.util.Map; public class RMDTrackBuilderUnitTest extends BaseTest { private RMDTrackBuilder builder; private IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; @BeforeMethod public void setup() { - builder = new RMDTrackBuilder(); seq = new IndexedFastaSequenceFile(new File(b36KGReference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); + builder = new RMDTrackBuilder(); } @Test diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorUnitTest.java index 8f6d16782..d9b30cdf3 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIteratorUnitTest.java @@ -33,15 +33,19 @@ public class FlashBackIteratorUnitTest extends BaseTest { private static final int STARTING_CHROMOSOME = 1; private static final int CHROMOSOME_SIZE = 1000; + private String firstContig; + private GenomeLocParser genomeLocParser; + @BeforeMethod public void setup() { - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + firstContig = header.getSequenceDictionary().getSequence(0).getSequenceName(); } @Test public void testBasicIteration() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(genomeLocParser,loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { iter.next(); @@ -55,8 +59,8 @@ public class FlashBackIteratorUnitTest extends BaseTest { @Test public void testBasicIterationThenFlashBack() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(genomeLocParser,loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { iter.next(); @@ -66,13 +70,13 @@ public class FlashBackIteratorUnitTest extends BaseTest { } lastLocation = cur; } - iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 2)); + iter.flashBackTo(genomeLocParser.createGenomeLoc(firstContig, 2)); } @Test public void testBasicIterationThenFlashBackThenIterate() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(genomeLocParser,loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { iter.next(); @@ -82,7 +86,7 @@ public class FlashBackIteratorUnitTest extends BaseTest { } lastLocation = cur; } - iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 1)); + iter.flashBackTo(genomeLocParser.createGenomeLoc(firstContig, 1)); int count = 0; while (iter.hasNext()) { count++; @@ -94,8 +98,8 @@ public class FlashBackIteratorUnitTest extends BaseTest { @Test public void testFlashBackTruth() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - LocationAwareSeekableRODIterator backIter = new FakeSeekableRODIterator(loc); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + LocationAwareSeekableRODIterator backIter = new FakeSeekableRODIterator(genomeLocParser,loc); // remove the first three records backIter.next(); backIter.next(); @@ -110,16 +114,16 @@ public class FlashBackIteratorUnitTest extends BaseTest { } lastLocation = cur; } - Assert.assertTrue(iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 5))); - Assert.assertTrue(iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 15))); - Assert.assertTrue(!iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 2))); - Assert.assertTrue(!iter.canFlashBackTo(GenomeLocParser.createGenomeLoc(0, 1))); + Assert.assertTrue(iter.canFlashBackTo(genomeLocParser.createGenomeLoc(firstContig, 5))); + Assert.assertTrue(iter.canFlashBackTo(genomeLocParser.createGenomeLoc(firstContig, 15))); + Assert.assertTrue(!iter.canFlashBackTo(genomeLocParser.createGenomeLoc(firstContig, 2))); + Assert.assertTrue(!iter.canFlashBackTo(genomeLocParser.createGenomeLoc(firstContig, 1))); } @Test public void testBasicIterationThenFlashBackHalfWayThenIterate() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 0, 0); - FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(loc)); + GenomeLoc loc = genomeLocParser.createGenomeLoc(firstContig, 0, 0); + FlashBackIterator iter = new FlashBackIterator(new FakeSeekableRODIterator(genomeLocParser,loc)); GenomeLoc lastLocation = null; for (int x = 0; x < 10; x++) { iter.next(); @@ -129,7 +133,7 @@ public class FlashBackIteratorUnitTest extends BaseTest { } lastLocation = cur; } - iter.flashBackTo(GenomeLocParser.createGenomeLoc(0, 5)); + iter.flashBackTo(genomeLocParser.createGenomeLoc(firstContig, 5)); int count = 0; while (iter.hasNext()) { count++; @@ -141,15 +145,16 @@ public class FlashBackIteratorUnitTest extends BaseTest { class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator { + private GenomeLocParser genomeLocParser; // current location private GenomeLoc location; private FakeRODatum curROD; private int recordCount = 10; - public FakeSeekableRODIterator(GenomeLoc startingLoc) { - this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(), startingLoc.getStart() + 1, startingLoc.getStop() + 1); - ; + public FakeSeekableRODIterator(GenomeLocParser genomeLocParser,GenomeLoc startingLoc) { + this.genomeLocParser = genomeLocParser; + this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1); } @Override @@ -178,7 +183,7 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator { public RODRecordList next() { RODRecordList list = new FakeRODRecordList(); curROD = new FakeRODatum("STUPIDNAME", location); - location = GenomeLocParser.createGenomeLoc(location.getContigIndex(), location.getStart() + 1, location.getStop() + 1); + location = genomeLocParser.createGenomeLoc(location.getContig(), location.getStart() + 1, location.getStop() + 1); list.add(curROD); recordCount--; return list; diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseDuplicatesUnitTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseDuplicatesUnitTest.java index b60e81026..e8e3631d5 100644 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseDuplicatesUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseDuplicatesUnitTest.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -52,16 +53,22 @@ public class TraverseDuplicatesUnitTest extends BaseTest { private TraverseDuplicates obj = new TraverseDuplicates(); private SAMFileHeader header; - + private GenomeLocParser genomeLocParser; + private GenomeAnalysisEngine engine; @BeforeMethod public void doBefore() { header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser =new GenomeLocParser(header.getSequenceDictionary()); + + engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); + + obj.initialize(engine); } @Test - public void testAllDupplicatesNoPairs() { + public void testAllDuplicatesNoPairs() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ" + x, 0, 1, 100); @@ -74,7 +81,7 @@ public class TraverseDuplicatesUnitTest extends BaseTest { } @Test - public void testNoDupplicatesNoPairs() { + public void testNoDuplicatesNoPairs() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ" + x, 0, 1, 100); @@ -102,7 +109,7 @@ public class TraverseDuplicatesUnitTest extends BaseTest { } @Test - public void testAllDupplicatesAllPairs() { + public void testAllDuplicatesAllPairs() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ"+ x, 0, 1, 100); @@ -118,7 +125,7 @@ public class TraverseDuplicatesUnitTest extends BaseTest { } @Test - public void testNoDupplicatesAllPairs() { + public void testNoDuplicatesAllPairs() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ"+ x, 0, 1, 100); @@ -134,7 +141,7 @@ public class TraverseDuplicatesUnitTest extends BaseTest { } @Test - public void testAllDupplicatesAllPairsDifferentPairedEnd() { + public void testAllDuplicatesAllPairsDifferentPairedEnd() { List list = new ArrayList(); for (int x = 0; x < 10; x++) { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "SWEET_READ" + x, 0, 1, 100); diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index a44b9d44d..9d642519b 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; @@ -17,7 +18,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import static org.testng.Assert.fail; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -68,12 +68,16 @@ public class TraverseReadsUnitTest extends BaseTest { private TraverseReads traversalEngine = null; private IndexedFastaSequenceFile ref = null; + private GenomeLocParser genomeLocParser = null; + private GenomeAnalysisEngine engine = null; @BeforeClass public void doOnce() { - GenomeLocParserTestUtils.clearSequenceDictionary(); ref = new IndexedFastaSequenceFile(refFile); - GenomeLocParser.setupRefContigOrdering(ref); + genomeLocParser = new GenomeLocParser(ref); + + engine = new GenomeAnalysisEngine(); + engine.setGenomeLocParser(genomeLocParser); } /** @@ -99,17 +103,17 @@ public class TraverseReadsUnitTest extends BaseTest { countReadWalker = new CountReadsWalker(); traversalEngine = new TraverseReads(); - - + traversalEngine.initialize(engine); } /** Test out that we can shard the file and iterate over every read */ @Test public void testUnmappedReadCount() { - SAMDataSource dataSource = new SAMDataSource(bamList); + SAMDataSource dataSource = new SAMDataSource(bamList,genomeLocParser); ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, ref.getSequenceDictionary(), - readSize); + readSize, + genomeLocParser); countReadWalker.initialize(); Object accumulator = countReadWalker.reduceInit(); @@ -121,7 +125,7 @@ public class TraverseReadsUnitTest extends BaseTest { fail("Shard == null"); } - ShardDataProvider dataProvider = new ReadShardDataProvider(shard,dataSource.seek(shard),null,null); + ShardDataProvider dataProvider = new ReadShardDataProvider(shard,genomeLocParser,dataSource.seek(shard),null,null); accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator); dataProvider.close(); } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java index ccdb57a87..71774c7da 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java @@ -20,67 +20,45 @@ import org.testng.annotations.Test; * Test out the functionality of the new genome loc parser */ public class GenomeLocParserUnitTest extends BaseTest { - @Test(expectedExceptions=ReviewedStingException.class) - public void testUnsetupException() { - SAMSequenceDictionary contigInfoCache = GenomeLocParser.contigInfo; - GenomeLocParser.contigInfo = null; - try { - GenomeLocParser.createGenomeLoc(0, 0, 0); - } - finally { - GenomeLocParser.contigInfo = contigInfoCache; - } - } + private GenomeLocParser genomeLocParser; @BeforeClass public void init() { - GenomeLocParserTestUtils.clearSequenceDictionary(); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); - } - - @Test - public void testKnownContigOrder() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); - GenomeLocParser.contigInfo = null; - // assert that it's false when the contig ordering is not setup - assertTrue(!GenomeLocParser.hasKnownContigOrdering()); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); - // assert that it's true when it is setup - assertTrue(GenomeLocParser.hasKnownContigOrdering()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } @Test(expectedExceptions=RuntimeException.class) public void testGetContigIndex() { - assertEquals(GenomeLocParser.getContigIndex("blah",true), -1); // should not be in the reference + assertEquals(genomeLocParser.getContigIndex("blah",true), -1); // should not be in the reference } @Test public void testGetContigIndexValid() { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); - assertEquals(GenomeLocParser.getContigIndex("chr1",true), 0); // should be in the reference + assertEquals(genomeLocParser.getContigIndex("chr1",true), 0); // should be in the reference } @Test public void testGetContigInfoUnknownContig() { - assertEquals(null, GenomeLocParser.getContigInfo("blah")); // should be in the reference + assertEquals(null, genomeLocParser.getContigInfo("blah")); // should be in the reference } @Test public void testGetContigInfoKnownContig() { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10); - assertEquals(0, "chr1".compareTo(GenomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference + assertEquals(0, "chr1".compareTo(genomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference } @Test(expectedExceptions=ReviewedStingException.class) public void testParseBadString() { - GenomeLocParser.parseGenomeLoc("Bad:0-1"); + genomeLocParser.parseGenomeLoc("Bad:0-1"); } @Test public void testParseGoodString() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-100"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-100"); assertEquals(0, loc.getContigIndex()); assertEquals(loc.getStop(), 100); assertEquals(loc.getStart(), 1); @@ -88,7 +66,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testCreateGenomeLoc1() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1, 100); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100); assertEquals(0, loc.getContigIndex()); assertEquals(loc.getStop(), 100); assertEquals(loc.getStart(), 1); @@ -96,7 +74,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testCreateGenomeLoc1point5() { // in honor of VAAL! - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1"); assertEquals(0, loc.getContigIndex()); assertEquals(loc.getStop(), 1); assertEquals(loc.getStart(), 1); @@ -104,23 +82,23 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testCreateGenomeLoc2() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100); - assertEquals(0, loc.getContigIndex()); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100); + assertEquals("chr1", loc.getContig()); assertEquals(loc.getStop(), 100); assertEquals(loc.getStart(), 1); } @Test public void testCreateGenomeLoc3() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1); - assertEquals(0, loc.getContigIndex()); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1); + assertEquals("chr1", loc.getContig()); assertEquals(loc.getStop(), 1); assertEquals(loc.getStart(), 1); } @Test public void testCreateGenomeLoc4() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1); assertEquals(0, loc.getContigIndex()); assertEquals(loc.getStop(), 1); assertEquals(loc.getStart(), 1); @@ -128,8 +106,8 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testCreateGenomeLoc5() { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100); - GenomeLoc copy = GenomeLocParser.createGenomeLoc(loc); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100); + GenomeLoc copy = genomeLocParser.createGenomeLoc(loc.getContig(),loc.getStart(),loc.getStop()); assertEquals(0, copy.getContigIndex()); assertEquals(copy.getStop(), 100); assertEquals(copy.getStart(), 1); @@ -137,7 +115,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testGenomeLocPlusSign() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1+"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1+"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -145,7 +123,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testGenomeLocParseOnlyChrome() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -153,7 +131,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testGenomeLocParseOnlyBadChrome() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr12"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr12"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -161,7 +139,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testGenomeLocBad() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -169,7 +147,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testGenomeLocBad2() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-500-0"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-500-0"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -177,7 +155,7 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testGenomeLocBad3() { - GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1--0"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1--0"); assertEquals(loc.getContigIndex(), 0); assertEquals(loc.getStop(), 10); // the size assertEquals(loc.getStart(), 1); @@ -186,19 +164,19 @@ public class GenomeLocParserUnitTest extends BaseTest { // test out the validating methods @Test public void testValidationOfGenomeLocs() { - assertTrue(GenomeLocParser.validGenomeLoc("chr1",1,1)); - assertTrue(!GenomeLocParser.validGenomeLoc("chr2",1,1)); // shouldn't have an entry - assertTrue(!GenomeLocParser.validGenomeLoc("chr1",1,11)); // past the end of the contig - assertTrue(!GenomeLocParser.validGenomeLoc("chr1",-1,10)); // bad start - assertTrue(!GenomeLocParser.validGenomeLoc("chr1",1,-2)); // bad stop - assertTrue(!GenomeLocParser.validGenomeLoc("chr1",10,11)); // bad start, past end + assertTrue(genomeLocParser.validGenomeLoc("chr1",1,1)); + assertTrue(!genomeLocParser.validGenomeLoc("chr2",1,1)); // shouldn't have an entry + assertTrue(!genomeLocParser.validGenomeLoc("chr1",1,11)); // past the end of the contig + assertTrue(!genomeLocParser.validGenomeLoc("chr1",-1,10)); // bad start + assertTrue(!genomeLocParser.validGenomeLoc("chr1",1,-2)); // bad stop + assertTrue(!genomeLocParser.validGenomeLoc("chr1",10,11)); // bad start, past end - assertTrue(GenomeLocParser.validGenomeLoc(0,1,1)); - assertTrue(!GenomeLocParser.validGenomeLoc(1,1,1)); // shouldn't have an entry - assertTrue(!GenomeLocParser.validGenomeLoc(0,1,11)); // past the end of the contig - assertTrue(!GenomeLocParser.validGenomeLoc(-1,0,10)); // bad start - assertTrue(!GenomeLocParser.validGenomeLoc(0,1,-2)); // bad stop - assertTrue(!GenomeLocParser.validGenomeLoc(0,10,11)); // bad start, past end + assertTrue(genomeLocParser.validGenomeLoc(0,1,1)); + assertTrue(!genomeLocParser.validGenomeLoc(1,1,1)); // shouldn't have an entry + assertTrue(!genomeLocParser.validGenomeLoc(0,1,11)); // past the end of the contig + assertTrue(!genomeLocParser.validGenomeLoc(-1,0,10)); // bad start + assertTrue(!genomeLocParser.validGenomeLoc(0,1,-2)); // bad stop + assertTrue(!genomeLocParser.validGenomeLoc(0,10,11)); // bad start, past end } } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java index fc7099b7b..8e4ff8a2c 100755 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java @@ -48,20 +48,23 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { private static final int STARTING_CHROMOSOME = 1; private static final int CHROMOSOME_SIZE = 1000; + private GenomeLocParser genomeLocParser; + private String contigOneName; + @BeforeClass public void setup() { - GenomeLocParserTestUtils.clearSequenceDictionary(); - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + contigOneName = header.getSequenceDictionary().getSequence(1).getSequenceName(); } @BeforeMethod public void initializeSortedSet() { - mSortedSet = new GenomeLocSortedSet(); + mSortedSet = new GenomeLocSortedSet(genomeLocParser); } @Test public void testAdd() { - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); @@ -70,7 +73,7 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void testRemove() { assertTrue(mSortedSet.size() == 0); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); mSortedSet.remove(g); @@ -80,9 +83,9 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void addRegion() { assertTrue(mSortedSet.size() == 0); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 1, 50); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 1, 50); mSortedSet.add(g); - GenomeLoc f = GenomeLocParser.createGenomeLoc(1, 30, 80); + GenomeLoc f = genomeLocParser.createGenomeLoc(contigOneName, 30, 80); mSortedSet.addRegion(f); assertTrue(mSortedSet.size() == 1); @@ -92,7 +95,7 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test(expectedExceptions=ReviewedStingException.class) public void testAddDuplicate() { assertTrue(mSortedSet.size() == 0); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); mSortedSet.add(g); @@ -100,8 +103,8 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void mergingOverlappingBelow() { - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 50); - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 49, 100); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 0, 50); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 49, 100); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); @@ -116,8 +119,8 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void mergingOverlappingAbove() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 0, 50); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 49, 100); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 49, 100); assertTrue(mSortedSet.size() == 0); mSortedSet.add(g); assertTrue(mSortedSet.size() == 1); @@ -132,22 +135,22 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void deleteAllByRegion() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100); mSortedSet.add(e); for (int x = 1; x < 101; x++) { - GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x); - mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(del)); + GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName,x,x); + mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser,del)); } assertTrue(mSortedSet.isEmpty()); } @Test public void deleteSomeByRegion() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 100); mSortedSet.add(e); for (int x = 1; x < 50; x++) { - GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x); - mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(del)); + GenomeLoc del = genomeLocParser.createGenomeLoc(contigOneName,x,x); + mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser,del)); } assertTrue(!mSortedSet.isEmpty()); assertTrue(mSortedSet.size() == 1); @@ -159,14 +162,14 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void deleteSuperRegion() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 10, 20); - GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 70, 100); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 10, 20); + GenomeLoc g = genomeLocParser.createGenomeLoc(contigOneName, 70, 100); mSortedSet.add(g); mSortedSet.addRegion(e); assertTrue(mSortedSet.size() == 2); // now delete a region - GenomeLoc d = GenomeLocParser.createGenomeLoc(1, 15, 75); - mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(d)); + GenomeLoc d = genomeLocParser.createGenomeLoc(contigOneName, 15, 75); + mSortedSet = mSortedSet.subtractRegions(new GenomeLocSortedSet(genomeLocParser,d)); Iterator iter = mSortedSet.iterator(); GenomeLoc loc = iter.next(); assertTrue(loc.getStart() == 10); @@ -181,13 +184,13 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { @Test public void substractComplexExample() { - GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 20); + GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 1, 20); mSortedSet.add(e); - GenomeLoc r1 = GenomeLocParser.createGenomeLoc(1, 3, 5); - GenomeLoc r2 = GenomeLocParser.createGenomeLoc(1, 10, 12); - GenomeLoc r3 = GenomeLocParser.createGenomeLoc(1, 16, 18); - GenomeLocSortedSet toExclude = new GenomeLocSortedSet(Arrays.asList(r1, r2, r3)); + GenomeLoc r1 = genomeLocParser.createGenomeLoc(contigOneName, 3, 5); + GenomeLoc r2 = genomeLocParser.createGenomeLoc(contigOneName, 10, 12); + GenomeLoc r3 = genomeLocParser.createGenomeLoc(contigOneName, 16, 18); + GenomeLocSortedSet toExclude = new GenomeLocSortedSet(genomeLocParser,Arrays.asList(r1, r2, r3)); GenomeLocSortedSet remaining = mSortedSet.subtractRegions(toExclude); // logger.debug("Initial " + mSortedSet); @@ -204,10 +207,10 @@ public class GenomeLocSortedSetUnitTest extends BaseTest { GenomeLoc p3 = it.next(); GenomeLoc p4 = it.next(); - assertEquals(GenomeLocParser.createGenomeLoc(1, 1, 2), p1); - assertEquals(GenomeLocParser.createGenomeLoc(1, 6, 9), p2); - assertEquals(GenomeLocParser.createGenomeLoc(1, 13, 15), p3); - assertEquals(GenomeLocParser.createGenomeLoc(1, 19, 20), p4); + assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 1, 2), p1); + assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 6, 9), p2); + assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 13, 15), p3); + assertEquals(genomeLocParser.createGenomeLoc(contigOneName, 19, 20), p4); } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java index 27e694bf4..81d6446bf 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java @@ -21,13 +21,13 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; */ public class GenomeLocUnitTest extends BaseTest { private static ReferenceSequenceFile seq; + private GenomeLocParser genomeLocParser; @BeforeClass public void init() throws FileNotFoundException { // sequence - GenomeLocParserTestUtils.clearSequenceDictionary(); seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } /** @@ -37,10 +37,10 @@ public class GenomeLocUnitTest extends BaseTest { public void testIsBetween() { logger.warn("Executing testIsBetween"); - GenomeLoc locMiddle = GenomeLocParser.createGenomeLoc("chr1", 3, 3); + GenomeLoc locMiddle = genomeLocParser.createGenomeLoc("chr1", 3, 3); - GenomeLoc locLeft = GenomeLocParser.createGenomeLoc("chr1", 1, 1); - GenomeLoc locRight = GenomeLocParser.createGenomeLoc("chr1", 5, 5); + GenomeLoc locLeft = genomeLocParser.createGenomeLoc("chr1", 1, 1); + GenomeLoc locRight = genomeLocParser.createGenomeLoc("chr1", 5, 5); Assert.assertTrue(locMiddle.isBetween(locLeft, locRight)); Assert.assertFalse(locLeft.isBetween(locMiddle, locRight)); @@ -50,15 +50,15 @@ public class GenomeLocUnitTest extends BaseTest { @Test public void testContigIndex() { logger.warn("Executing testContigIndex"); - GenomeLoc locOne = GenomeLocParser.createGenomeLoc("chr1",1,1); + GenomeLoc locOne = genomeLocParser.createGenomeLoc("chr1",1,1); Assert.assertEquals(1, locOne.getContigIndex()); Assert.assertEquals("chr1", locOne.getContig()); - GenomeLoc locX = GenomeLocParser.createGenomeLoc("chrX",1,1); + GenomeLoc locX = genomeLocParser.createGenomeLoc("chrX",1,1); Assert.assertEquals(23, locX.getContigIndex()); Assert.assertEquals("chrX", locX.getContig()); - GenomeLoc locNumber = GenomeLocParser.createGenomeLoc(1,1,1); + GenomeLoc locNumber = genomeLocParser.createGenomeLoc(seq.getSequenceDictionary().getSequence(1).getSequenceName(),1,1); Assert.assertEquals(1, locNumber.getContigIndex()); Assert.assertEquals("chr1", locNumber.getContig()); Assert.assertEquals(0, locOne.compareTo(locNumber)); @@ -68,15 +68,15 @@ public class GenomeLocUnitTest extends BaseTest { @Test public void testCompareTo() { logger.warn("Executing testCompareTo"); - GenomeLoc twoOne = GenomeLocParser.createGenomeLoc("chr2", 1); - GenomeLoc twoFive = GenomeLocParser.createGenomeLoc("chr2", 5); - GenomeLoc twoOtherFive = GenomeLocParser.createGenomeLoc("chr2", 5); + GenomeLoc twoOne = genomeLocParser.createGenomeLoc("chr2", 1); + GenomeLoc twoFive = genomeLocParser.createGenomeLoc("chr2", 5); + GenomeLoc twoOtherFive = genomeLocParser.createGenomeLoc("chr2", 5); Assert.assertEquals(twoFive.compareTo(twoOtherFive), 0); Assert.assertEquals(twoOne.compareTo(twoFive), -1); Assert.assertEquals(twoFive.compareTo(twoOne), 1); - GenomeLoc oneOne = GenomeLocParser.createGenomeLoc("chr1", 5); + GenomeLoc oneOne = genomeLocParser.createGenomeLoc("chr1", 5); Assert.assertEquals(oneOne.compareTo(twoOne), -1); Assert.assertEquals(twoOne.compareTo(oneOne), 1); } diff --git a/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java b/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java index 364c11351..8a470303d 100644 --- a/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.utils.bed; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -18,25 +17,25 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; public class BedParserUnitTest extends BaseTest { private static IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; private File bedFile = new File("testdata/sampleBedFile.bed"); @BeforeClass public void beforeTests() { - GenomeLocParserTestUtils.clearSequenceDictionary(); seq = new IndexedFastaSequenceFile(new File(b36KGReference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } @Test public void testLoadBedFile() { - BedParser parser = new BedParser(bedFile); + BedParser parser = new BedParser(genomeLocParser,bedFile); List location = parser.getLocations(); Assert.assertEquals(location.size(), 4); } @Test public void testBedParsing() { - BedParser parser = new BedParser(bedFile); + BedParser parser = new BedParser(genomeLocParser,bedFile); List location = parser.getLocations(); Assert.assertEquals(location.size(), 4); Assert.assertTrue(location.get(0).getContig().equals("20")); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterUnitTest.java index cc8c61948..c2b622738 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterUnitTest.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.utils.genotype.glf; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; @@ -59,6 +58,9 @@ public class GLFWriterUnitTest extends BaseTest { protected static final String[] genotypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"}; protected final static double SIGNIFICANCE = 5.1; + private IndexedFastaSequenceFile seq; + private GenomeLocParser genomeLocParser; + @BeforeMethod public void before() { @@ -66,10 +68,8 @@ public class GLFWriterUnitTest extends BaseTest { @BeforeClass public void beforeTests() { - GenomeLocParserTestUtils.clearSequenceDictionary(); - IndexedFastaSequenceFile seq; seq = new IndexedFastaSequenceFile(new File(b36KGReference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } @@ -113,7 +113,7 @@ public class GLFWriterUnitTest extends BaseTest { rec = new GLFWriter(writeTo); rec.writeHeader(header); for (int x = 0; x < 100; x++) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); + GenomeLoc loc = genomeLocParser.createGenomeLoc(seq.getSequenceDictionary().getSequence(1).getSequenceName(), x + 1); rec.addCall(new SAMSequenceRecord("test", 0), (int)loc.getStart(), 10, 'A', 9, createLikelihoods(x % 10)); } rec.close(); @@ -131,7 +131,7 @@ public class GLFWriterUnitTest extends BaseTest { rec = new GLFWriter(writeTo); rec.writeHeader(header); for (int x = 0; x < 5; x++) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); + GenomeLoc loc = genomeLocParser.createGenomeLoc(seq.getSequenceDictionary().getSequence(1).getSequenceName(), x + 1); rec.addCall(new SAMSequenceRecord("test", 0), (int)loc.getStart(), 10, 'A', 9, createGreaterThan255MinimumGenotype(x % 10)); } rec.close(); @@ -150,7 +150,7 @@ public class GLFWriterUnitTest extends BaseTest { rec = new GLFWriter(writeTo); rec.writeHeader(header); for (int x = 0; x < 100; x++) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); + GenomeLoc loc = genomeLocParser.createGenomeLoc(seq.getSequenceDictionary().getSequence(1).getSequenceName(), x + 1); rec.addCall(new SAMSequenceRecord("test", 0), (int)loc.getStart(), 10, 'A', 9, createLikelihoods(x % 10)); } rec.close(); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index b0602e6f6..4f3a43e4d 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -6,7 +6,6 @@ import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.utils.GenomeLocParserTestUtils; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -36,12 +35,12 @@ public class VCFWriterUnitTest extends BaseTest { private Set metaData = new HashSet(); private Set additionalColumns = new HashSet(); private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf"); + private GenomeLocParser genomeLocParser; @BeforeClass public void beforeTests() { - GenomeLocParserTestUtils.clearSequenceDictionary(); IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } /** test, using the writer and reader, that we can output and input a VCF file without problems */ @@ -110,7 +109,7 @@ public class VCFWriterUnitTest extends BaseTest { */ private VariantContext createVC(VCFHeader header) { - GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1",1); + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1",1); List alleles = new ArrayList(); Set filters = null; Map attributes = new HashMap(); diff --git a/java/test/org/broadinstitute/sting/utils/interval/IntervalFileMergingIteratorUnitTest.java b/java/test/org/broadinstitute/sting/utils/interval/IntervalFileMergingIteratorUnitTest.java index e48e3e4c0..752695052 100644 --- a/java/test/org/broadinstitute/sting/utils/interval/IntervalFileMergingIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/interval/IntervalFileMergingIteratorUnitTest.java @@ -55,23 +55,25 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { private static List results1 = null; private static List results2 = null; + private GenomeLocParser genomeLocParser; + @BeforeClass public void init() { - GenomeLocParser.setupRefContigOrdering(ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile)); + genomeLocParser = new GenomeLocParser(ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile)); results1 = new ArrayList(); results2 = new ArrayList(); - results1.add(GenomeLocParser.createGenomeLoc("chr1",1554)); - results1.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); - results1.add(GenomeLocParser.createGenomeLoc("chr1",18932,19000)); - results1.add(GenomeLocParser.createGenomeLoc("chr1",19001,25000)); - results1.add(GenomeLocParser.createGenomeLoc("chr5",7415,7600)); + results1.add(genomeLocParser.createGenomeLoc("chr1",1554)); + results1.add(genomeLocParser.createGenomeLoc("chr1",2538,2568)); + results1.add(genomeLocParser.createGenomeLoc("chr1",18932,19000)); + results1.add(genomeLocParser.createGenomeLoc("chr1",19001,25000)); + results1.add(genomeLocParser.createGenomeLoc("chr5",7415,7600)); - results2.add(GenomeLocParser.createGenomeLoc("chr1",1554)); - results2.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); - results2.add(GenomeLocParser.createGenomeLoc("chr1",18932,25000)); - results2.add(GenomeLocParser.createGenomeLoc("chr5",7415,7600)); + results2.add(genomeLocParser.createGenomeLoc("chr1",1554)); + results2.add(genomeLocParser.createGenomeLoc("chr1",2538,2568)); + results2.add(genomeLocParser.createGenomeLoc("chr1",18932,25000)); + results2.add(genomeLocParser.createGenomeLoc("chr5",7415,7600)); } @@ -79,7 +81,7 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { public void testGATKIntervalFileIterator_Overlap() { logger.warn("Executing testGATKIntervalFileIterator_Overlap"); - Iterator it = new IntervalFileMergingIterator(new File(intervalFileNameGATK),IntervalMergingRule.OVERLAPPING_ONLY); + Iterator it = new IntervalFileMergingIterator(genomeLocParser,new File(intervalFileNameGATK),IntervalMergingRule.OVERLAPPING_ONLY); Iterator check_it = results1.iterator(); while(it.hasNext()) { GenomeLoc l = it.next(); @@ -93,7 +95,7 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { public void testGATKIntervalFileIterator_OverlapWithException() { logger.warn("Executing testGATKIntervalFileIterator_OverlapWithException"); - Iterator it = new IntervalFileMergingIterator(new File(intervalFileNameGATK),IntervalMergingRule.OVERLAPPING_ONLY); + Iterator it = new IntervalFileMergingIterator(genomeLocParser,new File(intervalFileNameGATK),IntervalMergingRule.OVERLAPPING_ONLY); Iterator check_it = results1.iterator(); try { while(it.hasNext()) { @@ -110,7 +112,7 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { public void testGATKIntervalFileIterator_All() { logger.warn("Executing testGATKIntervalFileIterator_All"); - Iterator it = new IntervalFileMergingIterator(new File(intervalFileNameGATK),IntervalMergingRule.ALL); + Iterator it = new IntervalFileMergingIterator(genomeLocParser,new File(intervalFileNameGATK),IntervalMergingRule.ALL); Iterator check_it = results2.iterator(); while(it.hasNext()) { GenomeLoc l = it.next(); @@ -124,7 +126,7 @@ public class IntervalFileMergingIteratorUnitTest extends BaseTest { public void testBEDIntervalFileIterator_Overlap() { logger.warn("Executing testBEDIntervalFileIterator_Overlap"); - Iterator it = new IntervalFileMergingIterator(new File(intervalFileNameBED),IntervalMergingRule.OVERLAPPING_ONLY); + Iterator it = new IntervalFileMergingIterator(genomeLocParser,new File(intervalFileNameBED),IntervalMergingRule.OVERLAPPING_ONLY); Iterator check_it = results1.iterator(); while(it.hasNext()) { GenomeLoc l = it.next(); diff --git a/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsTest.java b/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsTest.java index a74cc54e7..d4fcb8b9f 100644 --- a/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsTest.java +++ b/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsTest.java @@ -21,13 +21,12 @@ import java.util.List; public class IntervalUtilsTest extends BaseTest { // used to seed the genome loc parser with a sequence dictionary private static ReferenceSequenceFile seq; - - + private GenomeLocParser genomeLocParser; @BeforeClass public void init() throws FileNotFoundException { seq = new IndexedFastaSequenceFile(new File(hg18Reference)); - GenomeLocParser.setupRefContigOrdering(seq); + genomeLocParser = new GenomeLocParser(seq); } @@ -40,9 +39,9 @@ public class IntervalUtilsTest extends BaseTest { // create the two lists we'll use for (int x = 1; x < 101; x++) { if (x % 2 == 0) - listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); + listEveryTwoFromTwo.add(genomeLocParser.createGenomeLoc("chr1",x,x)); else - listEveryTwoFromOne.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); + listEveryTwoFromOne.add(genomeLocParser.createGenomeLoc("chr1",x,x)); } List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION); @@ -60,8 +59,8 @@ public class IntervalUtilsTest extends BaseTest { // create the two lists we'll use for (int x = 1; x < 101; x++) { if (x % 2 == 0) - listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); - allSites.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); + listEveryTwoFromTwo.add(genomeLocParser.createGenomeLoc("chr1",x,x)); + allSites.add(genomeLocParser.createGenomeLoc("chr1",x,x)); } List ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION); @@ -79,8 +78,8 @@ public class IntervalUtilsTest extends BaseTest { // create the two lists we'll use for (int x = 1; x < 101; x++) { if (x % 5 == 0) { - listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); - allSites.add(GenomeLocParser.createGenomeLoc("chr1",x,x)); + listEveryTwoFromTwo.add(genomeLocParser.createGenomeLoc("chr1",x,x)); + allSites.add(genomeLocParser.createGenomeLoc("chr1",x,x)); } } diff --git a/java/test/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIteratorUnitTest.java b/java/test/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIteratorUnitTest.java index 63d84f082..0b4e52a3d 100644 --- a/java/test/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIteratorUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/interval/NwayIntervalMergingIteratorUnitTest.java @@ -47,6 +47,7 @@ import java.io.File; public class NwayIntervalMergingIteratorUnitTest extends BaseTest { private static File refFile = new File(validationDataLocation + "Homo_sapiens_assembly17.fasta"); + private GenomeLocParser genomeLocParser; private static List stream1 = null; private static List stream2 = null; @@ -54,32 +55,32 @@ public class NwayIntervalMergingIteratorUnitTest extends BaseTest { @BeforeClass public static void init() { - GenomeLocParser.setupRefContigOrdering(ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile)); + GenomeLocParser genomeLocParser = new GenomeLocParser(ReferenceSequenceFileFactory.getReferenceSequenceFile(refFile)); stream1 = new ArrayList(); stream2 = new ArrayList(); expected = new ArrayList(); - stream1.add(GenomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",2600,2610)); // 4 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",2609,2625)); // 4 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",18932,19000)); // 6 - stream1.add(GenomeLocParser.createGenomeLoc("chr1",19001,25000)); //6 + stream1.add(genomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1 + stream1.add(genomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3 + stream1.add(genomeLocParser.createGenomeLoc("chr1",2600,2610)); // 4 + stream1.add(genomeLocParser.createGenomeLoc("chr1",2609,2625)); // 4 + stream1.add(genomeLocParser.createGenomeLoc("chr1",18932,19000)); // 6 + stream1.add(genomeLocParser.createGenomeLoc("chr1",19001,25000)); //6 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",1565,1570)); //2 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",2598,2604)); // 4 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6 - stream2.add(GenomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7 + stream2.add(genomeLocParser.createGenomeLoc("chr1",1565,1570)); //2 + stream2.add(genomeLocParser.createGenomeLoc("chr1",2598,2604)); // 4 + stream2.add(genomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5 + stream2.add(genomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6 + stream2.add(genomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7 - expected.add(GenomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1 - expected.add(GenomeLocParser.createGenomeLoc("chr1",1565,1570)); //2 - expected.add(GenomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3 - expected.add(GenomeLocParser.createGenomeLoc("chr1",2598,2625)); // 4 - expected.add(GenomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5 - expected.add(GenomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6 - expected.add(GenomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7 + expected.add(genomeLocParser.createGenomeLoc("chr1",1554,1560)); // 1 + expected.add(genomeLocParser.createGenomeLoc("chr1",1565,1570)); //2 + expected.add(genomeLocParser.createGenomeLoc("chr1",2538,2568)); // 3 + expected.add(genomeLocParser.createGenomeLoc("chr1",2598,2625)); // 4 + expected.add(genomeLocParser.createGenomeLoc("chr1",7415,7600)); // 5 + expected.add(genomeLocParser.createGenomeLoc("chr1",18932,25000)); // 6 + expected.add(genomeLocParser.createGenomeLoc("chr1",30000,35000)); // 7 } diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala index a760b220b..f444044bf 100644 --- a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunction.scala @@ -60,13 +60,13 @@ class IntervalScatterFunction extends ScatterFunction with InProcessFunction { object IntervalScatterFunction { private def parseLocs(referenceSource: ReferenceDataSource, intervals: List[String]) = { - GenomeLocParser.setupRefContigOrdering(referenceSource.getReference) + var genomeLocParser: GenomeLocParser = new GenomeLocParser(referenceSource.getReference) val locs = { // TODO: Abstract genome analysis engine has richer logic for parsing. We need to use it! if (intervals.size == 0) { GenomeLocSortedSet.createSetFromSequenceDictionary(referenceSource.getReference.getSequenceDictionary) } else { - new GenomeLocSortedSet(IntervalUtils.parseIntervalArguments(intervals, false)) + new GenomeLocSortedSet(genomeLocParser,IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, false)) } } if (locs == null || locs.size == 0) diff --git a/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala index 1998b6c76..19c34071e 100755 --- a/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala +++ b/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala @@ -13,8 +13,8 @@ class PipelineUtils { object PipelineUtils{ def smartSplitContigs(reference: File, intervals: File, sets: Int) : List[List[String]] = { - GenomeLocParser.setupRefContigOrdering(ReferenceSequenceFileFactory.getReferenceSequenceFile(reference)) - val targets = IntervalUtils.parseIntervalArguments(List(intervals.getAbsolutePath), false) + var genomeLocParser: GenomeLocParser = new GenomeLocParser(ReferenceSequenceFileFactory.getReferenceSequenceFile(reference)) + val targets = IntervalUtils.parseIntervalArguments(genomeLocParser,List(intervals.getAbsolutePath), false) // Build up a map of contigs with sizes. var contigSizes = Map.empty[String, Long] diff --git a/scala/test/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunctionUnitTest.scala b/scala/test/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunctionUnitTest.scala index 42ac83aa4..bcd2e254e 100644 --- a/scala/test/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunctionUnitTest.scala +++ b/scala/test/org/broadinstitute/sting/queue/extensions/gatk/IntervalScatterFunctionUnitTest.scala @@ -8,17 +8,17 @@ import org.broadinstitute.sting.utils.interval.IntervalUtils import org.broadinstitute.sting.queue.QException import net.sf.picard.reference.IndexedFastaSequenceFile import org.testng.annotations.{Test, BeforeMethod} -import org.broadinstitute.sting.utils.{GenomeLocParserTestUtils, GenomeLocParser} +import org.broadinstitute.sting.utils.GenomeLocParser class IntervalScatterFunctionUnitTest extends BaseTest { private def reference = new File(BaseTest.b36KGReference) private var header: IndexedFastaSequenceFile = _ + private var genomeLocParser: GenomeLocParser = _ @BeforeMethod def setup() { - GenomeLocParserTestUtils.clearSequenceDictionary() header = new IndexedFastaSequenceFile(reference) - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()) + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()) } @Test @@ -30,17 +30,17 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testBasicScatter = { - val chr1 = GenomeLocParser.parseGenomeInterval("1") - val chr2 = GenomeLocParser.parseGenomeInterval("2") - val chr3 = GenomeLocParser.parseGenomeInterval("3") + val chr1 = genomeLocParser.parseGenomeInterval("1") + val chr2 = genomeLocParser.parseGenomeInterval("2") + val chr3 = genomeLocParser.parseGenomeInterval("3") val files = (1 to 3).toList.map(index => new File(testDir + "basic." + index + ".intervals")) IntervalScatterFunction.scatter(reference, List("1", "2", "3"), files, false) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size) @@ -53,18 +53,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterLessFiles = { - val chr1 = GenomeLocParser.parseGenomeInterval("1") - val chr2 = GenomeLocParser.parseGenomeInterval("2") - val chr3 = GenomeLocParser.parseGenomeInterval("3") - val chr4 = GenomeLocParser.parseGenomeInterval("4") + val chr1 = genomeLocParser.parseGenomeInterval("1") + val chr2 = genomeLocParser.parseGenomeInterval("2") + val chr3 = genomeLocParser.parseGenomeInterval("3") + val chr4 = genomeLocParser.parseGenomeInterval("4") val files = (1 to 3).toList.map(index => new File(testDir + "less." + index + ".intervals")) IntervalScatterFunction.scatter(reference, List("1", "2", "3", "4"), files, false) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(2, locs1.size) Assert.assertEquals(1, locs2.size) @@ -85,18 +85,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterIntervals = { val intervals = List("1:1-2", "1:4-5", "2:1-1", "3:2-2") - val chr1a = GenomeLocParser.parseGenomeInterval("1:1-2") - val chr1b = GenomeLocParser.parseGenomeInterval("1:4-5") - val chr2 = GenomeLocParser.parseGenomeInterval("2:1-1") - val chr3 = GenomeLocParser.parseGenomeInterval("3:2-2") + val chr1a = genomeLocParser.parseGenomeInterval("1:1-2") + val chr1b = genomeLocParser.parseGenomeInterval("1:4-5") + val chr2 = genomeLocParser.parseGenomeInterval("2:1-1") + val chr3 = genomeLocParser.parseGenomeInterval("3:2-2") val files = (1 to 3).toList.map(index => new File(testDir + "split." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(2, locs1.size) Assert.assertEquals(1, locs2.size) @@ -111,17 +111,17 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterOrder = { val intervals = List("2:1-1", "1:1-1", "3:2-2") - val chr1 = GenomeLocParser.parseGenomeInterval("1:1-1") - val chr2 = GenomeLocParser.parseGenomeInterval("2:1-1") - val chr3 = GenomeLocParser.parseGenomeInterval("3:2-2") + val chr1 = genomeLocParser.parseGenomeInterval("1:1-1") + val chr2 = genomeLocParser.parseGenomeInterval("2:1-1") + val chr3 = genomeLocParser.parseGenomeInterval("3:2-2") val files = (1 to 3).toList.map(index => new File(testDir + "split." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size) @@ -134,17 +134,17 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testBasicScatterByContig = { - val chr1 = GenomeLocParser.parseGenomeInterval("1") - val chr2 = GenomeLocParser.parseGenomeInterval("2") - val chr3 = GenomeLocParser.parseGenomeInterval("3") + val chr1 = genomeLocParser.parseGenomeInterval("1") + val chr2 = genomeLocParser.parseGenomeInterval("2") + val chr3 = genomeLocParser.parseGenomeInterval("3") val files = (1 to 3).toList.map(index => new File(testDir + "contig_basic." + index + ".intervals")) IntervalScatterFunction.scatter(reference, List("1", "2", "3"), files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size) @@ -157,18 +157,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterByContigLessFiles = { - val chr1 = GenomeLocParser.parseGenomeInterval("1") - val chr2 = GenomeLocParser.parseGenomeInterval("2") - val chr3 = GenomeLocParser.parseGenomeInterval("3") - val chr4 = GenomeLocParser.parseGenomeInterval("4") + val chr1 = genomeLocParser.parseGenomeInterval("1") + val chr2 = genomeLocParser.parseGenomeInterval("2") + val chr3 = genomeLocParser.parseGenomeInterval("3") + val chr4 = genomeLocParser.parseGenomeInterval("4") val files = (1 to 3).toList.map(index => new File(testDir + "contig_less." + index + ".intervals")) IntervalScatterFunction.scatter(reference, List("1", "2", "3", "4"), files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size) @@ -189,18 +189,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterByContigIntervalsStart = { val intervals = List("1:1-2", "1:4-5", "2:1-1", "3:2-2") - val chr1a = GenomeLocParser.parseGenomeInterval("1:1-2") - val chr1b = GenomeLocParser.parseGenomeInterval("1:4-5") - val chr2 = GenomeLocParser.parseGenomeInterval("2:1-1") - val chr3 = GenomeLocParser.parseGenomeInterval("3:2-2") + val chr1a = genomeLocParser.parseGenomeInterval("1:1-2") + val chr1b = genomeLocParser.parseGenomeInterval("1:4-5") + val chr2 = genomeLocParser.parseGenomeInterval("2:1-1") + val chr3 = genomeLocParser.parseGenomeInterval("3:2-2") val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_start." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(2, locs1.size) Assert.assertEquals(1, locs2.size) @@ -215,18 +215,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterByContigIntervalsMiddle = { val intervals = List("1:1-1", "2:1-2", "2:4-5", "3:2-2") - val chr1 = GenomeLocParser.parseGenomeInterval("1:1-1") - val chr2a = GenomeLocParser.parseGenomeInterval("2:1-2") - val chr2b = GenomeLocParser.parseGenomeInterval("2:4-5") - val chr3 = GenomeLocParser.parseGenomeInterval("3:2-2") + val chr1 = genomeLocParser.parseGenomeInterval("1:1-1") + val chr2a = genomeLocParser.parseGenomeInterval("2:1-2") + val chr2b = genomeLocParser.parseGenomeInterval("2:4-5") + val chr3 = genomeLocParser.parseGenomeInterval("3:2-2") val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_middle." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(2, locs2.size) @@ -241,18 +241,18 @@ class IntervalScatterFunctionUnitTest extends BaseTest { @Test def testScatterByContigIntervalsEnd = { val intervals = List("1:1-1", "2:2-2", "3:1-2", "3:4-5") - val chr1 = GenomeLocParser.parseGenomeInterval("1:1-1") - val chr2 = GenomeLocParser.parseGenomeInterval("2:2-2") - val chr3a = GenomeLocParser.parseGenomeInterval("3:1-2") - val chr3b = GenomeLocParser.parseGenomeInterval("3:4-5") + val chr1 = genomeLocParser.parseGenomeInterval("1:1-1") + val chr2 = genomeLocParser.parseGenomeInterval("2:2-2") + val chr3a = genomeLocParser.parseGenomeInterval("3:1-2") + val chr3b = genomeLocParser.parseGenomeInterval("3:4-5") val files = (1 to 3).toList.map(index => new File(testDir + "contig_split_end." + index + ".intervals")) IntervalScatterFunction.scatter(reference, intervals, files, true) - val locs1 = IntervalUtils.parseIntervalArguments(List(files(0).toString), false) - val locs2 = IntervalUtils.parseIntervalArguments(List(files(1).toString), false) - val locs3 = IntervalUtils.parseIntervalArguments(List(files(2).toString), false) + val locs1 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(0).toString), false) + val locs2 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(1).toString), false) + val locs3 = IntervalUtils.parseIntervalArguments(genomeLocParser,List(files(2).toString), false) Assert.assertEquals(1, locs1.size) Assert.assertEquals(1, locs2.size)