From 5c3198520c03b903c08be7584f88955627e87ce0 Mon Sep 17 00:00:00 2001 From: hanna Date: Thu, 3 Feb 2011 17:59:19 +0000 Subject: [PATCH] A few minor modifications masquerading as significant changes according to svn's logs: - Copied BAM indexing engine from Picard back into the GATK anticipating shard merging algorithm. Tried to leave most of the building blocks in Picard. If this turns into a logistical nightmare, I'll merge the building blocks into the GATK as well. - Reorganized the org.broadinstitute.sting.gatk.datasources package, giving better separation of query and management functionality for reads, ref, rmd, and samples. - Merged Shard building blocks into org.broadinstitute.sting.gatk.datasources. reads package, indicating it's current strong relationship with the reads, rather than the general unifying element I wish this would be. - Collapsed BAMFormatAwareShard into Shard. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5184 348d0f76-0448-11de-a6fe-93d51630548a --- .../reference/FastaSequenceIndexBuilder.java | 2 +- java/src/net/sf/samtools/GATKBAMFileSpan.java | 59 ++ java/src/net/sf/samtools/GATKBin.java | 65 ++ java/src/net/sf/samtools/GATKBinList.java | 51 ++ java/src/net/sf/samtools/GATKChunk.java | 67 ++ .../sting/gatk/CommandLineExecutable.java | 2 +- .../sting/gatk/GenomeAnalysisEngine.java | 18 +- .../sting/gatk/ReadProperties.java | 3 +- .../sting/gatk/WalkerManager.java | 3 +- .../DataSourceGenerationException.java | 31 - .../providers/LocusShardDataProvider.java | 4 +- .../ManagingReferenceOrderedView.java | 2 +- .../ReadBasedReferenceOrderedView.java | 4 +- .../providers/ReadShardDataProvider.java | 5 +- .../datasources/providers/RodLocusView.java | 3 +- .../providers/ShardDataProvider.java | 4 +- .../datasources/reads/BAMIndexContent.java | 195 ++++++ .../gatk/datasources/reads/BAMOverlap.java | 29 + .../gatk/datasources/reads/FilePointer.java | 71 +++ .../reads/GATKBAMFileConstants.java | 51 ++ .../gatk/datasources/reads/GATKBAMIndex.java | 575 ++++++++++++++++++ .../{shards => reads}/IntervalSharder.java | 141 +---- .../{shards => reads}/LocusShard.java | 11 +- .../{shards => reads}/LocusShardStrategy.java | 6 +- .../{shards => reads}/MonolithicShard.java | 7 +- .../MonolithicShardStrategy.java | 3 +- .../{shards => reads}/ReadShard.java | 9 +- .../{shards => reads}/ReadShardStrategy.java | 9 +- .../gatk/datasources/reads/ReaderBin.java | 33 + .../SAMDataSource.java | 24 +- .../SAMReaderID.java | 4 +- .../sting/gatk/datasources/reads/Shard.java | 215 +++++++ .../{shards => reads}/ShardStrategy.java | 2 +- .../ShardStrategyFactory.java | 3 +- .../gatk/datasources/reads/package-info.java | 4 + .../{ => reads}/utilities/BAMFileStat.java | 2 +- .../{ => reads}/utilities/BAMTagRenamer.java | 2 +- .../reads/utilities/package-info.java | 4 + .../ReferenceDataSource.java | 3 +- .../ReferenceDataSourceProgressListener.java | 2 +- .../datasources/reference/package-info.java | 4 + .../datasources/rmd/DataStreamSegment.java | 31 + .../gatk/datasources/rmd/EntireStream.java | 31 + .../datasources/rmd/MappedStreamSegment.java | 47 ++ .../rmd/ReferenceOrderedDataPool.java | 152 +++++ .../ReferenceOrderedDataSource.java | 171 +----- .../ResourcePool.java | 82 +-- .../gatk/datasources/rmd/package-info.java | 4 + .../shards/BAMFormatAwareShard.java | 117 ---- .../sting/gatk/datasources/shards/Shard.java | 132 ---- .../gatk/datasources/shards/package-info.java | 6 - .../simpleDataSources/SimpleDataSource.java | 37 -- .../simpleDataSources/package-info.java | 6 - .../executive/HierarchicalMicroScheduler.java | 12 +- .../gatk/executive/LinearMicroScheduler.java | 8 +- .../sting/gatk/executive/MicroScheduler.java | 10 +- .../sting/gatk/executive/ShardTraverser.java | 2 +- .../sting/gatk/executive/WindowMaker.java | 4 +- .../refdata/utils/RMDIntervalGenerator.java | 3 +- .../gatk/traversals/TraversalEngine.java | 2 +- .../gatk/traversals/TraverseReadPairs.java | 2 +- .../sting/gatk/walkers/VariantsToVCF.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 2 +- .../genomicannotator/GenomicAnnotator.java | 2 +- .../TranscriptToGenomicInfo.java | 2 +- .../beagle/BeagleOutputToVCFWalker.java | 3 +- .../beagle/ProduceBeagleInputWalker.java | 1 - .../filters/VariantFiltrationWalker.java | 2 +- .../walkers/genotyper/UGCallVariants.java | 2 +- .../walkers/genotyper/UnifiedGenotyper.java | 2 +- .../indels/IndelGenotyperV2Walker.java | 5 +- .../gatk/walkers/indels/IndelRealigner.java | 2 +- .../gatk/walkers/qc/ProfileRodSystem.java | 2 +- .../recalibration/CountCovariatesWalker.java | 2 +- .../TableRecalibrationWalker.java | 2 +- .../varianteval/VariantEvalWalker.java | 2 +- .../ApplyVariantCuts.java | 2 +- .../GenerateVariantClustersWalker.java | 2 +- .../VariantRecalibrator.java | 2 +- .../variantutils/ValidateVariants.java | 2 +- .../walkers/DetectWGAWalker.java | 4 +- .../walkers/VCF4WriterTestWalker.java | 3 +- .../gatk/walkers/ValidationGenotyper.java | 2 +- .../validation/RodSystemValidationWalker.java | 3 +- .../sting/utils/interval/IntervalUtils.java | 2 +- .../sting/utils/vcf/VCFUtils.java | 6 +- .../FastaSequenceIndexBuilderUnitTest.java | 2 +- .../providers/LocusReferenceViewUnitTest.java | 5 +- .../providers/LocusViewTemplate.java | 12 +- .../ReferenceOrderedViewUnitTest.java | 6 +- .../{shards => reads}/MockLocusShard.java | 7 +- .../SAMBAMDataSourceUnitTest.java | 11 +- .../ReferenceOrderedDataPoolUnitTest.java | 4 +- .../LocusIteratorByStateUnitTest.java | 2 +- .../traversals/TraverseReadsUnitTest.java | 13 +- 95 files changed, 1900 insertions(+), 814 deletions(-) create mode 100644 java/src/net/sf/samtools/GATKBAMFileSpan.java create mode 100644 java/src/net/sf/samtools/GATKBin.java create mode 100644 java/src/net/sf/samtools/GATKBinList.java create mode 100644 java/src/net/sf/samtools/GATKChunk.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/DataSourceGenerationException.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexContent.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMOverlap.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMFileConstants.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java rename java/src/org/broadinstitute/sting/gatk/datasources/{shards => reads}/IntervalSharder.java (84%) rename java/src/org/broadinstitute/sting/gatk/datasources/{shards => reads}/LocusShard.java (65%) rename java/src/org/broadinstitute/sting/gatk/datasources/{shards => reads}/LocusShardStrategy.java (95%) rename java/src/org/broadinstitute/sting/gatk/datasources/{shards => reads}/MonolithicShard.java (78%) rename java/src/org/broadinstitute/sting/gatk/datasources/{shards => reads}/MonolithicShardStrategy.java (94%) rename java/src/org/broadinstitute/sting/gatk/datasources/{shards => reads}/ReadShard.java (88%) rename java/src/org/broadinstitute/sting/gatk/datasources/{shards => reads}/ReadShardStrategy.java (92%) create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/ReaderBin.java rename java/src/org/broadinstitute/sting/gatk/datasources/{simpleDataSources => reads}/SAMDataSource.java (97%) rename java/src/org/broadinstitute/sting/gatk/datasources/{simpleDataSources => reads}/SAMReaderID.java (93%) create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java rename java/src/org/broadinstitute/sting/gatk/datasources/{shards => reads}/ShardStrategy.java (93%) rename java/src/org/broadinstitute/sting/gatk/datasources/{shards => reads}/ShardStrategyFactory.java (97%) create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/package-info.java rename java/src/org/broadinstitute/sting/gatk/datasources/{ => reads}/utilities/BAMFileStat.java (99%) rename java/src/org/broadinstitute/sting/gatk/datasources/{ => reads}/utilities/BAMTagRenamer.java (98%) create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/package-info.java rename java/src/org/broadinstitute/sting/gatk/datasources/{simpleDataSources => reference}/ReferenceDataSource.java (98%) rename java/src/org/broadinstitute/sting/gatk/datasources/{simpleDataSources => reference}/ReferenceDataSourceProgressListener.java (94%) create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/reference/package-info.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/rmd/DataStreamSegment.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/rmd/EntireStream.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/rmd/MappedStreamSegment.java create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java rename java/src/org/broadinstitute/sting/gatk/datasources/{simpleDataSources => rmd}/ReferenceOrderedDataSource.java (59%) mode change 100755 => 100644 rename java/src/org/broadinstitute/sting/gatk/datasources/{simpleDataSources => rmd}/ResourcePool.java (81%) mode change 100755 => 100644 create mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/rmd/package-info.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/shards/BAMFormatAwareShard.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/shards/package-info.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SimpleDataSource.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/package-info.java rename java/test/org/broadinstitute/sting/gatk/datasources/{shards => reads}/MockLocusShard.java (87%) rename java/test/org/broadinstitute/sting/gatk/datasources/{simpleDataSources => reads}/SAMBAMDataSourceUnitTest.java (95%) rename java/test/org/broadinstitute/sting/gatk/datasources/{simpleDataSources => rmd}/ReferenceOrderedDataPoolUnitTest.java (98%) diff --git a/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java b/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java index 6f143ed89..8825c3767 100644 --- a/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java +++ b/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java @@ -25,7 +25,7 @@ package net.sf.picard.reference; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener; +import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*; diff --git a/java/src/net/sf/samtools/GATKBAMFileSpan.java b/java/src/net/sf/samtools/GATKBAMFileSpan.java new file mode 100644 index 000000000..d1bf9d1cd --- /dev/null +++ b/java/src/net/sf/samtools/GATKBAMFileSpan.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package net.sf.samtools; + +import java.util.ArrayList; +import java.util.List; + +/** + * A temporary solution to work around Java access rights issues: + * override BAMFileSpan and make it public. + * TODO: Eliminate once we determine the final fate of the BAM index reading code. + */ +public class GATKBAMFileSpan extends BAMFileSpan { + /** + * Create a new empty list of chunks. + */ + public GATKBAMFileSpan() { + super(); + } + + /** + * Convenience constructor to construct a BAM file span from + * a single chunk. + * @param chunk Chunk to use as the sole region in this span. + */ + public GATKBAMFileSpan(final Chunk chunk) { + super(chunk); + } + + /** + * Create a new chunk list from the given list of chunks. + * @param chunks Constituent chunks. + */ + public GATKBAMFileSpan(final List chunks) { + super(new ArrayList(chunks)); + } +} diff --git a/java/src/net/sf/samtools/GATKBin.java b/java/src/net/sf/samtools/GATKBin.java new file mode 100644 index 000000000..95f4000ec --- /dev/null +++ b/java/src/net/sf/samtools/GATKBin.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package net.sf.samtools; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * A temporary solution to work around Java access rights issues: + * override GATKBin and make it public. + * TODO: Eliminate once we determine the final fate of the BAM index reading code. + */ +public class GATKBin extends Bin { + public GATKBin(final int referenceSequence, final int binNumber) { + super(referenceSequence,binNumber); + } + + public GATKBin(final Bin bin) { + super(bin.getReferenceSequence(),bin.getBinNumber()); + } + + @Override + public int getReferenceSequence() { + return super.getReferenceSequence(); + } + + @Override + public int getBinNumber() { + return super.getBinNumber(); + } + + public List getGATKChunkList() { + List gatkChunks = new ArrayList(); + for(Chunk chunk: getChunkList()) + gatkChunks.add(new GATKChunk(chunk)); + return gatkChunks; + } + + public void setGATKChunkList(List chunks) { + super.setChunkList(new ArrayList(chunks)); + } +} diff --git a/java/src/net/sf/samtools/GATKBinList.java b/java/src/net/sf/samtools/GATKBinList.java new file mode 100644 index 000000000..b53062aaf --- /dev/null +++ b/java/src/net/sf/samtools/GATKBinList.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package net.sf.samtools; + +import java.util.BitSet; + +/** + * A temporary solution to work around Java access rights issues: + * override chunk and make it public. + * TODO: Eliminate once we determine the final fate of the BAM index reading code. + */ +public class GATKBinList extends BinList { + /** + * Create a new BinList over sequenceCount sequences, consisting of the given bins. + * @param referenceSequence Reference sequence to which these bins are relevant. + * @param bins The given bins to include. + */ + public GATKBinList(final int referenceSequence, final BitSet bins) { + super(referenceSequence,bins); + } + + /** + * Retrieves the bins stored in this list. + * @return A bitset where a bin is present in the list if the bit is true. + */ + public BitSet getBins() { + return super.getBins(); + } +} diff --git a/java/src/net/sf/samtools/GATKChunk.java b/java/src/net/sf/samtools/GATKChunk.java new file mode 100644 index 000000000..69ff70555 --- /dev/null +++ b/java/src/net/sf/samtools/GATKChunk.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package net.sf.samtools; + +/** + * A temporary solution to work around Java access rights issues: + * override chunk and make it public. + * TODO: Eliminate once we determine the final fate of the BAM index reading code. + */ +public class GATKChunk extends Chunk { + public GATKChunk(final long start, final long stop) { + super(start,stop); + } + + public GATKChunk(final Chunk chunk) { + super(chunk.getChunkStart(),chunk.getChunkEnd()); + } + + @Override + public GATKChunk clone() { + return new GATKChunk(getChunkStart(),getChunkEnd()); + } + + @Override + public long getChunkStart() { + return super.getChunkStart(); + } + + @Override + public void setChunkStart(final long value) { + super.setChunkStart(value); + } + + @Override + public long getChunkEnd() { + return super.getChunkEnd(); + } + + @Override + public void setChunkEnd(final long value) { + super.setChunkEnd(value); + } + + +} diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index c11d246df..8d4fdd978 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 294d06615..0c8d9198e 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -36,16 +36,16 @@ import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShardStrategy; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.MonolithicShardStrategy; +import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter; @@ -66,10 +66,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; -import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; -import java.io.FileNotFoundException; import java.util.*; /** diff --git a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index 14b17f265..47af69fa7 100755 --- a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -5,10 +5,9 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.utils.baq.BAQ; -import java.util.List; import java.util.Collection; /** * User: hanna diff --git a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index a75f30173..f73f65e9f 100755 --- a/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -27,9 +27,8 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.FilterManager; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.classloader.PluginManager; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/DataSourceGenerationException.java b/java/src/org/broadinstitute/sting/gatk/datasources/DataSourceGenerationException.java deleted file mode 100644 index 7e6cab6c8..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/DataSourceGenerationException.java +++ /dev/null @@ -1,31 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -/** - * User: aaron - * Date: Mar 26, 2009 - * Time: 9:25:49 AM - *

- * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - */ - -/** - * This exception is throw when we're unable to generate a data source, - * most likely due to an incomplete input source list - */ -public class DataSourceGenerationException extends ReviewedStingException { - public DataSourceGenerationException(String message) { - super(message); - } - - public DataSourceGenerationException(String message, Throwable throwable) { - super(message, throwable); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java index 75d03856b..72b962522 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusShardDataProvider.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.gatk.ReadProperties; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index 654f0999f..b4b1d7f8a 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java index f4adcefb9..142c8a178 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -25,14 +25,12 @@ package org.broadinstitute.sting.gatk.datasources.providers; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.ArrayList; import java.util.Collection; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java index 04120d4e5..5a672b09f 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadShardDataProvider.java @@ -1,9 +1,8 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import java.util.Collection; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 8a67a5db7..e2ce6b331 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -26,13 +26,12 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.RODMergingIterator; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java index e6f6c9879..a0ea32f9b 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ShardDataProvider.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexContent.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexContent.java new file mode 100644 index 000000000..48194dafa --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexContent.java @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads; + +import net.sf.samtools.GATKBin; +import net.sf.samtools.GATKChunk; +import net.sf.samtools.LinearIndex; + +import java.util.*; + +/** + * Represents the contents of a bam index file for one reference. + * A BAM index (.bai) file contains information for all references in the bam file. + * This class describes the data present in the index file for one of these references; + * including the bins, chunks, and linear index. + */ +class BAMIndexContent { + /** + * The reference sequence for the data currently loaded. + */ + private final int mReferenceSequence; + + /** + * A list of all bins in the above reference sequence. + */ + private final BinList mBinList; + + /** + * The linear index for the reference sequence above. + */ + private final LinearIndex mLinearIndex; + + + /** + * @param referenceSequence Content corresponds to this reference. + * @param bins Array of bins represented by this content, possibly sparse + * @param numberOfBins Number of non-null bins + * @param linearIndex Additional index used to optimize queries + */ + BAMIndexContent(final int referenceSequence, final GATKBin[] bins, final int numberOfBins, final LinearIndex linearIndex) { + this.mReferenceSequence = referenceSequence; + this.mBinList = new BinList(bins, numberOfBins); + this.mLinearIndex = linearIndex; + } + + /** + * Reference for this Content + */ + public int getReferenceSequence() { + return mReferenceSequence; + } + + /** + * Does this content have anything in this bin? + */ + public boolean containsBin(final GATKBin bin) { + return mBinList.getBin(bin.getBinNumber()) != null; + } + + /** + * @return iterable list of bins represented by this content + */ + public BinList getBins() { + return mBinList; + } + + /** + * @return the number of non-null bins represented by this content + */ + int getNumberOfNonNullBins() { + return mBinList.getNumberOfNonNullBins(); + } + + /** + * @return all chunks associated with all bins in this content + */ + public List getAllChunks() { + List allChunks = new ArrayList(); + for (GATKBin b : mBinList) + if (b.getChunkList() != null) { + allChunks.addAll(b.getGATKChunkList()); + } + return Collections.unmodifiableList(allChunks); + } + + /** + * @return the linear index represented by this content + */ + public LinearIndex getLinearIndex() { + return mLinearIndex; + } + + /** + * This class is used to encapsulate the list of Bins store in the BAMIndexContent + * While it is currently represented as an array, we may decide to change it to an ArrayList or other structure + */ + class BinList implements Iterable { + + private final GATKBin[] mBinArray; + public final int numberOfNonNullBins; + public final int maxBinNumber; // invariant: maxBinNumber = mBinArray.length -1 since array is 0 based + + /** + * @param binArray a sparse array representation of the bins. The index into the array is the bin number. + * @param numberOfNonNullBins + */ + BinList(GATKBin[] binArray, int numberOfNonNullBins) { + this.mBinArray = binArray; + this.numberOfNonNullBins = numberOfNonNullBins; + this.maxBinNumber = mBinArray.length - 1; + } + + GATKBin getBin(int binNumber) { + if (binNumber > maxBinNumber) return null; + return mBinArray[binNumber]; + } + + int getNumberOfNonNullBins() { + return numberOfNonNullBins; + } + + /** + * Gets an iterator over all non-null bins. + * + * @return An iterator over all bins. + */ + public Iterator iterator() { + return new BinIterator(); + } + + private class BinIterator implements Iterator { + /** + * Stores the bin # of the Bin currently in use. + */ + private int nextBin; + + public BinIterator() { + nextBin = 0; + } + + /** + * Are there more bins in this set, waiting to be returned? + * + * @return True if more bins are remaining. + */ + public boolean hasNext() { + while (nextBin <= maxBinNumber) { + if (getBin(nextBin) != null) return true; + nextBin++; + } + return false; + } + + /** + * Gets the next bin in the provided BinList. + * + * @return the next available bin in the BinList. + */ + public GATKBin next() { + if (!hasNext()) + throw new NoSuchElementException("This BinIterator is currently empty"); + GATKBin result = getBin(nextBin); + nextBin++; + return result; + } + + public void remove() { + throw new UnsupportedOperationException("Unable to remove from a bin iterator"); + } + } + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMOverlap.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMOverlap.java new file mode 100644 index 000000000..15a372ca6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMOverlap.java @@ -0,0 +1,29 @@ +package org.broadinstitute.sting.gatk.datasources.reads; + +import net.sf.samtools.Bin; + +import java.util.HashMap; +import java.util.Map; + +/** + * Models a bin at which all BAM files in the merged input stream overlap. + */ +class BAMOverlap { + public final int start; + public final int stop; + + private final Map bins = new HashMap(); + + public BAMOverlap(final int start, final int stop) { + this.start = start; + this.stop = stop; + } + + public void addBin(final SAMReaderID id, final Bin bin) { + bins.put(id,bin); + } + + public Bin getBin(final SAMReaderID id) { + return bins.get(id); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java new file mode 100644 index 000000000..69128b272 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads; + +import net.sf.samtools.SAMFileSpan; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Represents a small section of a BAM file, and every associated interval. + */ +class FilePointer { + protected final Map fileSpans = new HashMap(); + protected final String referenceSequence; + protected final BAMOverlap overlap; + protected final List locations; + + /** + * Does this file pointer point into an unmapped region? + */ + protected final boolean isRegionUnmapped; + + public FilePointer(final GenomeLoc location) { + this.referenceSequence = location.getContig(); + this.overlap = null; + this.locations = Collections.singletonList(location); + this.isRegionUnmapped = GenomeLoc.isUnmapped(location); + } + + public FilePointer(final String referenceSequence,final BAMOverlap overlap) { + this.referenceSequence = referenceSequence; + this.overlap = overlap; + this.locations = new ArrayList(); + this.isRegionUnmapped = false; + } + + public void addLocation(GenomeLoc location) { + locations.add(location); + } + + public void addFileSpans(SAMReaderID id, SAMFileSpan fileSpan) { + this.fileSpans.put(id,fileSpan); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMFileConstants.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMFileConstants.java new file mode 100644 index 000000000..47b56db94 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMFileConstants.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads; + +/** + * Constants used in reading & writing BAM files + */ +class GATKBAMFileConstants { + /** + * The beginning of a BAMRecord is a fixed-size block of 8 int32s + */ + static final int FIXED_BLOCK_SIZE = 8 * 4; + + /** + * Sanity check -- we never expect BAMRecords to be as big as this. + */ + static final int MAXIMUM_RECORD_LENGTH = 1024 * 1024; + + /** + * BAM file magic number. This is what is present in the gunzipped version of the file, + * which never exists on disk. + */ + + static final byte[] BAM_MAGIC = "BAM\1".getBytes(); + /** + * BAM index file magic number. + */ + static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes(); +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java new file mode 100644 index 000000000..bdc544d47 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -0,0 +1,575 @@ +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package org.broadinstitute.sting.gatk.datasources.reads; + +import net.sf.samtools.BAMIndex; +import net.sf.samtools.BAMIndexMetaData; +import net.sf.samtools.Bin; +import net.sf.samtools.BrowseableBAMIndex; +import net.sf.samtools.GATKBAMFileSpan; +import net.sf.samtools.GATKBin; +import net.sf.samtools.GATKBinList; +import net.sf.samtools.GATKChunk; +import net.sf.samtools.LinearIndex; +import net.sf.samtools.SAMException; +import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.util.RuntimeIOException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.util.*; + +/** + * A basic interface for querying BAM indices. + * + * @author mhanna + * @version 0.1 + */ +public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex { + /** + * Reports the total amount of genomic data that any bin can index. + */ + protected static final int BIN_GENOMIC_SPAN = 512*1024*1024; + + /** + * What is the starting bin for each level? + */ + private static final int[] LEVEL_STARTS = {0,1,9,73,585,4681}; + + /** + * Reports the maximum number of bins that can appear in a BAM file. + */ + public static final int MAX_BINS = 37450; // =(8^6-1)/7+1 + + public static final int MAX_LINEAR_INDEX_SIZE = MAX_BINS+1-LEVEL_STARTS[LEVEL_STARTS.length-1]; + + private final File mFile; + private final MappedByteBuffer mFileBuffer; + + private SAMSequenceDictionary mBamDictionary = null; + + private Integer mLastReferenceRetrieved = null; + private WeakHashMap mQueriesByReference = new WeakHashMap(); + + public GATKBAMIndex(final File file, final SAMSequenceDictionary dictionary) { + mFile = file; + mBamDictionary = dictionary; + // Open the file stream. + try { + FileInputStream fileStream = new FileInputStream(mFile); + FileChannel fileChannel = fileStream.getChannel(); + mFileBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0L, fileChannel.size()); + mFileBuffer.order(ByteOrder.LITTLE_ENDIAN); + + fileChannel.close(); + fileStream.close(); + } + catch (IOException exc) { + throw new RuntimeIOException(exc.getMessage(), exc); + } + + // Verify the magic number. + seek(0); + final byte[] buffer = new byte[4]; + readBytes(buffer); + if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) { + throw new RuntimeException("Invalid file header in BAM index " + mFile + + ": " + new String(buffer)); + } + } + + /** + * Get the number of levels employed by this index. + * @return Number of levels in this index. + */ + public static int getNumIndexLevels() { + return LEVEL_STARTS.length; + } + + /** + * Gets the first bin in the given level. + * @param levelNumber Level number. 0-based. + * @return The first bin in this level. + */ + public static int getFirstBinInLevel(final int levelNumber) { + return LEVEL_STARTS[levelNumber]; + } + + /** + * Gets the number of bins in the given level. + * @param levelNumber Level number. 0-based. + * @return The size (number of possible bins) of the given level. + */ + public int getLevelSize(final int levelNumber) { + if(levelNumber == getNumIndexLevels()) + return MAX_BINS+1-LEVEL_STARTS[levelNumber]; + else + return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber]; + } + + /** + * Gets the level associated with the given bin number. + * @param bin The bin for which to determine the level. + * @return the level associated with the given bin number. + */ + @Override + public int getLevelForBin(final Bin bin) { + GATKBin gatkBin = new GATKBin(bin); + if(gatkBin.getBinNumber() >= MAX_BINS) + throw new SAMException("Tried to get level for invalid bin."); + for(int i = getNumIndexLevels()-1; i >= 0; i--) { + if(gatkBin.getBinNumber() >= LEVEL_STARTS[i]) + return i; + } + throw new SAMException("Unable to find correct bin for bin "+bin); + } + + /** + * Gets the first locus that this bin can index into. + * @param bin The bin to test. + * @return The last position that the given bin can represent. + */ + public int getFirstLocusInBin(final Bin bin) { + final int level = getLevelForBin(bin); + final int levelStart = LEVEL_STARTS[level]; + final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart; + return (new GATKBin(bin).getBinNumber() - levelStart)*(BIN_GENOMIC_SPAN /levelSize)+1; + } + + /** + * Gets the last locus that this bin can index into. + * @param bin The bin to test. + * @return The last position that the given bin can represent. + */ + @Override + public int getLastLocusInBin(final Bin bin) { + final int level = getLevelForBin(bin); + final int levelStart = LEVEL_STARTS[level]; + final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart; + return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize); + } + + public int getNumberOfReferences() { + seek(4); + return readInteger(); + } + + /** + * Use to get close to the unmapped reads at the end of a BAM file. + * @return The file offset of the first record in the last linear bin, or -1 + * if there are no elements in linear bins (i.e. no mapped reads). + */ + public long getStartOfLastLinearBin() { + seek(4); + + final int sequenceCount = readInteger(); + // Because no reads may align to the last sequence in the sequence dictionary, + // grab the last element of the linear index for each sequence, and return + // the last one from the last sequence that has one. + long lastLinearIndexPointer = -1; + for (int i = 0; i < sequenceCount; i++) { + // System.out.println("# Sequence TID: " + i); + final int nBins = readInteger(); + // System.out.println("# nBins: " + nBins); + for (int j1 = 0; j1 < nBins; j1++) { + // Skip bin # + skipBytes(4); + final int nChunks = readInteger(); + // Skip chunks + skipBytes(16 * nChunks); + } + final int nLinearBins = readInteger(); + if (nLinearBins > 0) { + // Skip to last element of list of linear bins + skipBytes(8 * (nLinearBins - 1)); + lastLinearIndexPointer = readLong(); + } + } + + return lastLinearIndexPointer; + } + + /** + * Gets meta data for the given reference including information about number of aligned, unaligned, and noCoordinate records + * @param reference the reference of interest + * @return meta data for the reference + */ + public BAMIndexMetaData getMetaData(int reference) { + throw new UnsupportedOperationException("Cannot retrieve metadata for GATKBAMIndex"); + } + + /** + * Returns count of records unassociated with any reference. Call before the index file is closed + * + * @return meta data at the end of the bam index that indicates count of records holding no coordinates + * or null if no meta data (old index format) + */ + public Long getNoCoordinateCount() { + + seek(4); + final int sequenceCount = readInteger(); + + skipToSequence(sequenceCount); + try { // in case of old index file without meta data + return readLong(); + } catch (Exception e) { + return null; + } + } + + /** + * Get list of regions of BAM file that may contain SAMRecords for the given range + * @param referenceIndex sequence of desired SAMRecords + * @param startPos 1-based start of the desired interval, inclusive + * @param endPos 1-based end of the desired interval, inclusive + * @return the virtual file position. Each pair is the first and last virtual file position + * in a range that can be scanned to find SAMRecords that overlap the given positions. + */ + @Override + public GATKBAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) { + BAMIndexContent queryResults = getQueryResults(referenceIndex); + + if(queryResults == null) + return null; + + GATKBinList overlappingBins = getBinsOverlapping(referenceIndex,startPos,endPos); + + // System.out.println("# Sequence target TID: " + referenceIndex); + List bins = new ArrayList(); + for(GATKBin bin: queryResults.getBins()) { + if (overlappingBins.getBins().get(bin.getBinNumber())) + bins.add(bin); + } + + if (bins.isEmpty()) { + return null; + } + + List chunkList = new ArrayList(); + for(GATKBin bin: bins) { + for(GATKChunk chunk: bin.getGATKChunkList()) + chunkList.add(chunk.clone()); + } + + if (chunkList.isEmpty()) { + return null; + } + + chunkList = optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos)); + return new GATKBAMFileSpan(chunkList); + } + + /** + * Perform an overlapping query of all bins bounding the given location. + * @param bin The bin over which to perform an overlapping query. + * @return The file pointers + */ + @Override + public GATKBAMFileSpan getSpanOverlapping(final Bin bin) { + if(bin == null) + return null; + + GATKBin gatkBin = new GATKBin(bin); + + final int referenceSequence = gatkBin.getReferenceSequence(); + BAMIndexContent indexQuery = getQueryResults(referenceSequence); + + if(indexQuery == null) + return null; + + final int binLevel = getLevelForBin(bin); + final int firstLocusInBin = getFirstLocusInBin(bin); + + // Add the specified bin to the tree if it exists. + List binTree = new ArrayList(); + if(indexQuery.containsBin(gatkBin)) + binTree.add(indexQuery.getBins().getBin(gatkBin.getBinNumber())); + + int currentBinLevel = binLevel; + while(--currentBinLevel >= 0) { + final int binStart = getFirstBinInLevel(currentBinLevel); + final int binWidth = getMaxAddressibleGenomicLocation()/getLevelSize(currentBinLevel); + final int binNumber = firstLocusInBin/binWidth + binStart; + GATKBin parentBin = indexQuery.getBins().getBin(binNumber); + if(parentBin != null && indexQuery.containsBin(parentBin)) + binTree.add(parentBin); + } + + List chunkList = new ArrayList(); + for(GATKBin coveringBin: binTree) { + for(GATKChunk chunk: coveringBin.getGATKChunkList()) + chunkList.add(chunk.clone()); + } + + final int start = getFirstLocusInBin(bin); + chunkList = optimizeChunkList(chunkList,indexQuery.getLinearIndex().getMinimumOffset(start)); + return new GATKBAMFileSpan(chunkList); + } + + /** + * Get a list of bins in the BAM file that may contain SAMRecords for the given range. + * @param referenceIndex sequence of desired SAMRecords + * @param startPos 1-based start of the desired interval, inclusive + * @param endPos 1-based end of the desired interval, inclusive + * @return a list of bins that contain relevant data. + */ + public GATKBinList getBinsOverlapping(final int referenceIndex, final int startPos, final int endPos) { + final BitSet regionBins = regionToBins(startPos,endPos); + if (regionBins == null) { + return null; + } + return new GATKBinList(referenceIndex,regionBins); + } + + protected BAMIndexContent query(final int referenceSequence, final int startPos, final int endPos) { + seek(4); + + List metaDataChunks = new ArrayList(); + + final int sequenceCount = readInteger(); + + if (referenceSequence >= sequenceCount) { + return null; + } + + final BitSet regionBins = regionToBins(startPos, endPos); + if (regionBins == null) { + return null; + } + + skipToSequence(referenceSequence); + + int binCount = readInteger(); + boolean metaDataSeen = false; + GATKBin[] bins = new GATKBin[getMaxBinNumberForReference(referenceSequence) +1]; + for (int binNumber = 0; binNumber < binCount; binNumber++) { + final int indexBin = readInteger(); + final int nChunks = readInteger(); + List chunks = new ArrayList(nChunks); + // System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks); + GATKChunk lastChunk = null; + if (regionBins.get(indexBin)) { + for (int ci = 0; ci < nChunks; ci++) { + final long chunkBegin = readLong(); + final long chunkEnd = readLong(); + lastChunk = new GATKChunk(chunkBegin, chunkEnd); + chunks.add(lastChunk); + } + } else if (indexBin == MAX_BINS) { + // meta data - build the bin so that the count of bins is correct; + // but don't attach meta chunks to the bin, or normal queries will be off + for (int ci = 0; ci < nChunks; ci++) { + final long chunkBegin = readLong(); + final long chunkEnd = readLong(); + lastChunk = new GATKChunk(chunkBegin, chunkEnd); + metaDataChunks.add(lastChunk); + } + metaDataSeen = true; + continue; // don't create a Bin + } else { + skipBytes(16 * nChunks); + } + GATKBin bin = new GATKBin(referenceSequence, indexBin); + bin.setGATKChunkList(chunks); + bin.setLastChunk(lastChunk); + bins[indexBin] = bin; + } + + final int nLinearBins = readInteger(); + + final int regionLinearBinStart = LinearIndex.convertToLinearIndexOffset(startPos); + final int regionLinearBinStop = endPos > 0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins-1; + final int actualStop = Math.min(regionLinearBinStop, nLinearBins -1); + + long[] linearIndexEntries = new long[0]; + if (regionLinearBinStart < nLinearBins) { + linearIndexEntries = new long[actualStop-regionLinearBinStart+1]; + skipBytes(8 * regionLinearBinStart); + for(int linearBin = regionLinearBinStart; linearBin <= actualStop; linearBin++) + linearIndexEntries[linearBin-regionLinearBinStart] = readLong(); + } + + final LinearIndex linearIndex = new LinearIndex(referenceSequence,regionLinearBinStart,linearIndexEntries); + + return new BAMIndexContent(referenceSequence, bins, binCount - (metaDataSeen? 1 : 0), linearIndex); + } + + /** + * The maxiumum bin number for a reference sequence of a given length + */ + static int getMaxBinNumberForSequenceLength(int sequenceLength) { + return getFirstBinInLevel(getNumIndexLevels() - 1) + (sequenceLength >> 14); + // return 4680 + (sequenceLength >> 14); // note 4680 = getFirstBinInLevel(getNumIndexLevels() - 1) + } + + /** + * Looks up the cached BAM query results if they're still in the cache and not expired. Otherwise, + * retrieves the cache results from disk. + * @param referenceIndex The reference to load. CachingBAMFileIndex only stores index data for entire references. + * @return The index information for this reference. + */ + protected BAMIndexContent getQueryResults(final int referenceIndex) { + // WeakHashMap is a bit weird in that its lookups are done via equals() equality, but expirations must be + // handled via == equality. This implementation jumps through a few hoops to make sure that == equality still + // holds even in the context of boxing/unboxing. + + // If this query is for the same reference index as the last query, return it. + if(mLastReferenceRetrieved!=null && mLastReferenceRetrieved == referenceIndex) + return mQueriesByReference.get(referenceIndex); + + // If not, check to see whether it's available in the cache. + BAMIndexContent queryResults = mQueriesByReference.get(referenceIndex); + if(queryResults != null) { + mLastReferenceRetrieved = referenceIndex; + mQueriesByReference.put(referenceIndex,queryResults); + return queryResults; + } + + // If not in the cache, attempt to load it from disk. + queryResults = query(referenceIndex,1,-1); + if(queryResults != null) { + mLastReferenceRetrieved = referenceIndex; + mQueriesByReference.put(referenceIndex,queryResults); + return queryResults; + } + + // Not even available on disk. + return null; + } + + /** + * Gets the possible number of bins for a given reference sequence. + * @return How many bins could possibly be used according to this indexing scheme to index a single contig. + */ + protected int getMaxAddressibleGenomicLocation() { + return BIN_GENOMIC_SPAN; + } + + /** + * Get candidate bins for the specified region + * @param startPos 1-based start of target region, inclusive. + * @param endPos 1-based end of target region, inclusive. + * @return bit set for each bin that may contain SAMRecords in the target region. + */ + protected BitSet regionToBins(final int startPos, final int endPos) { + final int maxPos = 0x1FFFFFFF; + final int start = (startPos <= 0) ? 0 : (startPos-1) & maxPos; + final int end = (endPos <= 0) ? maxPos : (endPos-1) & maxPos; + if (start > end) { + return null; + } + int k; + final BitSet bitSet = new BitSet(MAX_BINS); + bitSet.set(0); + for (k = 1 + (start>>26); k <= 1 + (end>>26); ++k) bitSet.set(k); + for (k = 9 + (start>>23); k <= 9 + (end>>23); ++k) bitSet.set(k); + for (k = 73 + (start>>20); k <= 73 + (end>>20); ++k) bitSet.set(k); + for (k = 585 + (start>>17); k <= 585 + (end>>17); ++k) bitSet.set(k); + for (k = 4681 + (start>>14); k <= 4681 + (end>>14); ++k) bitSet.set(k); + return bitSet; + } + + protected List optimizeChunkList(final List chunks, final long minimumOffset) { + GATKChunk lastChunk = null; + Collections.sort(chunks); + final List result = new ArrayList(); + for (final GATKChunk chunk : chunks) { + if (chunk.getChunkEnd() <= minimumOffset) { + continue; // linear index optimization + } + if (result.isEmpty()) { + result.add(chunk); + lastChunk = chunk; + continue; + } + // Coalesce chunks that are in adjacent file blocks. + // This is a performance optimization. + if (!lastChunk.overlaps(chunk) && !lastChunk.isAdjacentTo(chunk)) { + result.add(chunk); + lastChunk = chunk; + } else { + if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) { + lastChunk.setChunkEnd(chunk.getChunkEnd()); + } + } + } + return result; + } + + /** + * The maximum possible bin number for this reference sequence. + * This is based on the maximum coordinate position of the reference + * which is based on the size of the reference + */ + private int getMaxBinNumberForReference(final int reference) { + try { + final int sequenceLength = mBamDictionary.getSequence(reference).getSequenceLength(); + return getMaxBinNumberForSequenceLength(sequenceLength); + } catch (Exception e) { + return MAX_BINS; + } + } + + private void skipToSequence(final int sequenceIndex) { + for (int i = 0; i < sequenceIndex; i++) { + // System.out.println("# Sequence TID: " + i); + final int nBins = readInteger(); + // System.out.println("# nBins: " + nBins); + for (int j = 0; j < nBins; j++) { + final int bin = readInteger(); + final int nChunks = readInteger(); + // System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks); + skipBytes(16 * nChunks); + } + final int nLinearBins = readInteger(); + // System.out.println("# nLinearBins: " + nLinearBins); + skipBytes(8 * nLinearBins); + } + } + + private void readBytes(final byte[] bytes) { + mFileBuffer.get(bytes); + } + + private int readInteger() { + return mFileBuffer.getInt(); + } + + private long readLong() { + return mFileBuffer.getLong(); + } + + private void skipBytes(final int count) { + mFileBuffer.position(mFileBuffer.position() + count); + } + + private void seek(final int position) { + mFileBuffer.position(position); + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java similarity index 84% rename from java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java index a85a6f3a4..0732b0de9 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The Broad Institute + * Copyright (c) 2011, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12,31 +12,40 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.apache.log4j.Logger; - -import java.util.*; - -import net.sf.samtools.*; import net.sf.picard.util.PeekableIterator; +import net.sf.samtools.AbstractBAMFileIndex; +import net.sf.samtools.Bin; +import net.sf.samtools.BrowseableBAMIndex; +import net.sf.samtools.SAMSequenceRecord; +import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.PriorityQueue; +import java.util.Queue; + /** * Shard intervals based on position within the BAM file. * @@ -47,7 +56,7 @@ public class IntervalSharder { private static Logger logger = Logger.getLogger(IntervalSharder.class); public static Iterator shardIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) { - return new FilePointerIterator(dataSource,loci); + return new IntervalSharder.FilePointerIterator(dataSource,loci); } /** @@ -117,7 +126,7 @@ public class IntervalSharder { for(SAMReaderID id: dataSource.getReaderIDs()) filePointer.addFileSpans(id,null); return Collections.singletonList(filePointer); - } + } // Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin. List filePointers = new ArrayList(); @@ -125,7 +134,7 @@ public class IntervalSharder { BAMOverlap lastBAMOverlap = null; Map readerToIndexMap = new HashMap(); - BinMergingIterator binMerger = new BinMergingIterator(); + IntervalSharder.BinMergingIterator binMerger = new IntervalSharder.BinMergingIterator(); for(SAMReaderID id: dataSource.getReaderIDs()) { final SAMSequenceRecord referenceSequence = dataSource.getHeader(id).getSequence(contig); // If this contig can't be found in the reference, skip over it. @@ -153,7 +162,7 @@ public class IntervalSharder { final int locationStop = (int)location.getStop(); // Advance to first bin. - while(binIterator.peek().stop < locationStart) + while(binIterator.peek().stop < locationStart) binIterator.next(); // Add all relevant bins to a list. If the given bin extends beyond the end of the current interval, make @@ -230,7 +239,7 @@ public class IntervalSharder { for(FilePointer filePointer: filePointers) filePointer.addFileSpans(id,index.getSpanOverlapping(filePointer.overlap.getBin(id))); } - + return filePointers; } @@ -239,7 +248,7 @@ public class IntervalSharder { private Queue pendingOverlaps = new LinkedList(); public void addReader(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, Iterator bins) { - binQueue.add(new BinQueueState(id,index,referenceSequence,new LowestLevelBinFilteringIterator(index,bins))); + binQueue.add(new BinQueueState(id,index,referenceSequence,new IntervalSharder.LowestLevelBinFilteringIterator(index,bins))); } public boolean hasNext() { @@ -369,89 +378,7 @@ public class IntervalSharder { } } -/** - * Represents a small section of a BAM file, and every associated interval. - */ -class FilePointer { - protected final Map fileSpans = new HashMap(); - protected final String referenceSequence; - protected final BAMOverlap overlap; - protected final List locations; - - /** - * Does this file pointer point into an unmapped region? - */ - protected final boolean isRegionUnmapped; - - public FilePointer(final GenomeLoc location) { - this.referenceSequence = location.getContig(); - this.overlap = null; - this.locations = Collections.singletonList(location); - this.isRegionUnmapped = GenomeLoc.isUnmapped(location); - } - - public FilePointer(final String referenceSequence,final BAMOverlap overlap) { - this.referenceSequence = referenceSequence; - this.overlap = overlap; - this.locations = new ArrayList(); - this.isRegionUnmapped = false; - } - - public void addLocation(GenomeLoc location) { - locations.add(location); - } - - public void addFileSpans(SAMReaderID id, SAMFileSpan fileSpan) { - this.fileSpans.put(id,fileSpan); - } -} - -/** - * Models a bin at which all BAM files in the merged input stream overlap. - */ -class BAMOverlap { - public final int start; - public final int stop; - - private final Map bins = new HashMap(); - - public BAMOverlap(final int start, final int stop) { - this.start = start; - this.stop = stop; - } - - public void addBin(final SAMReaderID id, final Bin bin) { - bins.put(id,bin); - } - - public Bin getBin(final SAMReaderID id) { - return bins.get(id); - } -} - -class ReaderBin { - public final SAMReaderID id; - public final BrowseableBAMIndex index; - public final int referenceSequence; - public final Bin bin; - - public ReaderBin(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Bin bin) { - this.id = id; - this.index = index; - this.referenceSequence = referenceSequence; - this.bin = bin; - } - - public int getStart() { - return index.getFirstLocusInBin(bin); - } - - public int getStop() { - return index.getLastLocusInBin(bin); - } -} - -class BinQueueState implements Comparable { +class BinQueueState implements Comparable { private final SAMReaderID id; private final BrowseableBAMIndex index; private final int referenceSequence; @@ -494,7 +421,7 @@ class BinQueueState implements Comparable { return nextBin; } - public int compareTo(BinQueueState other) { + public int compareTo(org.broadinstitute.sting.gatk.datasources.reads.BinQueueState other) { if(!this.bins.hasNext() && !other.bins.hasNext()) return 0; if(!this.bins.hasNext()) return -1; if(!this.bins.hasNext()) return 1; @@ -522,8 +449,4 @@ class BinQueueState implements Comparable { lastLocusInCurrentBin = index.getLastLocusInBin(bin); } } -} - - - - +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java similarity index 65% rename from java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java index dc38e252f..26af890b4 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShard.java @@ -1,20 +1,13 @@ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.ReadMetrics; -import org.broadinstitute.sting.gatk.ReadProperties; import java.util.List; import java.util.Map; import net.sf.samtools.SAMFileSpan; -import net.sf.samtools.SAMRecord; -import net.sf.picard.filter.SamRecordFilter; /** * Handles locus shards of BAM information. @@ -22,7 +15,7 @@ import net.sf.picard.filter.SamRecordFilter; * @version 1.0 * @date Apr 7, 2009 */ -public class LocusShard extends BAMFormatAwareShard { +public class LocusShard extends Shard { /** * Create a new locus shard, divided by index. * @param intervals List of intervals to process. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java similarity index 95% rename from java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java index b87b351b0..1309c0c18 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/LocusShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java @@ -22,17 +22,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; -import org.broadinstitute.sting.gatk.ReadProperties; import java.util.*; -import java.io.File; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMSequenceRecord; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java similarity index 78% rename from java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java index 6b24290c4..7579c22f6 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java @@ -1,11 +1,8 @@ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.ReadMetrics; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import java.util.List; @@ -14,7 +11,7 @@ import java.util.List; * @author mhanna * @version 0.1 */ -public class MonolithicShard extends BAMFormatAwareShard { +public class MonolithicShard extends Shard { /** * Creates a new monolithic shard of the given type. * @param shardType Type of the shard. Must be either read or locus; cannot be intervalic. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java similarity index 94% rename from java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java index d9df841e4..10228ecd7 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/MonolithicShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java @@ -1,7 +1,6 @@ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Iterator; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java similarity index 88% rename from java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java index 8c43f38da..9aecd7779 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShard.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java @@ -1,18 +1,13 @@ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import java.util.*; import net.sf.samtools.SAMFileSpan; import net.sf.samtools.SAMRecord; -import net.sf.picard.filter.SamRecordFilter; import org.broadinstitute.sting.utils.GenomeLocParser; /** @@ -37,7 +32,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; * @author mhanna * @version 0.1 */ -public class ReadShard extends BAMFormatAwareShard { +public class ReadShard extends Shard { /** * The reads making up this shard. */ diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java similarity index 92% rename from java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java index 6c229901e..456293134 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ReadShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java @@ -22,15 +22,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.SAMFileSpan; -import net.sf.picard.filter.SamRecordFilter; import java.util.*; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; @@ -159,7 +156,7 @@ public class ReadShardStrategy implements ShardStrategy { } if(selectedReaders.size() > 0) { - BAMFormatAwareShard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped); + Shard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped); dataSource.fillShard(shard); if(!shard.isBufferEmpty()) { @@ -175,7 +172,7 @@ public class ReadShardStrategy implements ShardStrategy { else { // todo -- this nulling of intervals is a bit annoying since readwalkers without // todo -- any -L values need to be special cased throughout the code. - BAMFormatAwareShard shard = new ReadShard(parser,dataSource,position,null,false); + Shard shard = new ReadShard(parser,dataSource,position,null,false); dataSource.fillShard(shard); nextShard = !shard.isBufferEmpty() ? shard : null; } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReaderBin.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReaderBin.java new file mode 100644 index 000000000..c76c1d8ae --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReaderBin.java @@ -0,0 +1,33 @@ +package org.broadinstitute.sting.gatk.datasources.reads; + +import net.sf.samtools.Bin; +import net.sf.samtools.BrowseableBAMIndex; + +/** + * Created by IntelliJ IDEA. + * User: mhanna + * Date: Feb 2, 2011 + * Time: 4:36:40 PM + * To change this template use File | Settings | File Templates. + */ +class ReaderBin { + public final SAMReaderID id; + public final BrowseableBAMIndex index; + public final int referenceSequence; + public final Bin bin; + + public ReaderBin(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Bin bin) { + this.id = id; + this.index = index; + this.referenceSequence = referenceSequence; + this.bin = bin; + } + + public int getStart() { + return index.getFirstLocusInBin(bin); + } + + public int getStop() { + return index.getLastLocusInBin(bin); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java similarity index 97% rename from java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 95a8b6229..7d43cb3a4 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; @@ -33,10 +33,6 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard; -import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard; -import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadMetrics; @@ -59,7 +55,7 @@ import java.util.*; *

* Converts shards to SAM iterators over the specified region */ -public class SAMDataSource implements SimpleDataSource { +public class SAMDataSource { /** Backing support for reads. */ protected final ReadProperties readProperties; @@ -423,7 +419,7 @@ public class SAMDataSource implements SimpleDataSource { * @param shard Shard to fill. * @return true if at the end of the stream. False otherwise. */ - public void fillShard(BAMFormatAwareShard shard) { + public void fillShard(Shard shard) { if(!shard.buffersReads()) throw new ReviewedStingException("Attempting to fill a non-buffering shard."); @@ -456,16 +452,12 @@ public class SAMDataSource implements SimpleDataSource { if(shard instanceof MonolithicShard) return seekMonolithic(shard); - if(!(shard instanceof BAMFormatAwareShard)) - throw new ReviewedStingException("BlockDrivenSAMDataSource cannot operate on shards of type: " + shard.getClass()); - BAMFormatAwareShard bamAwareShard = (BAMFormatAwareShard)shard; - - if(bamAwareShard.buffersReads()) { - return bamAwareShard.iterator(); + if(shard.buffersReads()) { + return shard.iterator(); } else { SAMReaders readers = resourcePool.getAvailableReaders(); - return getIterator(readers,bamAwareShard,shard instanceof ReadShard); + return getIterator(readers,shard,shard instanceof ReadShard); } } @@ -500,7 +492,7 @@ public class SAMDataSource implements SimpleDataSource { * TODO: Collapse this flag when the two sharding systems are merged. * @return An iterator over the selected data. */ - private StingSAMIterator getIterator(SAMReaders readers, BAMFormatAwareShard shard, boolean enableVerification) { + private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) { SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true); // Set up merging to dynamically merge together multiple BAMs. @@ -566,7 +558,7 @@ public class SAMDataSource implements SimpleDataSource { * @param id The id of the given reader. * @param read The read to add to the shard. */ - private void addReadToBufferingShard(BAMFormatAwareShard shard,SAMReaderID id,SAMRecord read) { + private void addReadToBufferingShard(Shard shard,SAMReaderID id,SAMRecord read) { SAMFileSpan endChunk = read.getFileSource().getFilePointer().getContentsFollowing(); shard.addRead(read); readerPositions.put(id,endChunk); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java similarity index 93% rename from java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java index 636e9b8e8..5ad3e208f 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMReaderID.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java @@ -1,10 +1,8 @@ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +package org.broadinstitute.sting.gatk.datasources.reads; import org.broadinstitute.sting.commandline.Tags; import java.io.File; -import java.util.List; -import java.util.Collections; /** * Uniquely identifies a SAM file reader. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java new file mode 100644 index 000000000..418f5d3ee --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/Shard.java @@ -0,0 +1,215 @@ +package org.broadinstitute.sting.gatk.datasources.reads; + +import net.sf.samtools.SAMFileSpan; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.gatk.ReadMetrics; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.Serializable; +import java.util.Collections; +import java.util.List; +import java.util.Map; +/** + * + * User: aaron + * Date: Apr 10, 2009 + * Time: 5:00:27 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + +/** + * @author aaron + * @version 1.0 + * @date Apr 10, 2009 + *

+ * Interface Shard + *

+ * The base abstract class for shards. + */ +public abstract class Shard implements HasGenomeLocation { + public enum ShardType { + READ, LOCUS + } + + protected final GenomeLocParser parser; // incredibly annoying! + + /** + * What type of shard is this? Read or locus? + */ + protected final ShardType shardType; + + /** + * Locations. + */ + protected final List locs; + + /** + * Whether the current location is unmapped. + */ + private final boolean isUnmapped; + + /** + * Reads data, if applicable. + */ + private final SAMDataSource readsDataSource; + + /** + * The data backing the next chunks to deliver to the traversal engine. + */ + private final Map fileSpans; + + + + /** + * Statistics about which reads in this shards were used and which were filtered away. + */ + protected final ReadMetrics readMetrics = new ReadMetrics(); + + /** + * Whether this shard points to an unmapped region. + * Some shard types conceptually be unmapped (e.g. LocusShards). In + * this case, isUnmapped should always return false. + * @return True if this shard is unmapped. False otherwise. + */ + public boolean isUnmapped() { + return isUnmapped; + } + + public Shard(GenomeLocParser parser, + ShardType shardType, + List locs, + SAMDataSource readsDataSource, + Map fileSpans, + boolean isUnmapped) { + this.locs = locs; + this.parser = parser; + this.shardType = shardType; + this.readsDataSource = readsDataSource; + this.fileSpans = fileSpans; + this.isUnmapped = isUnmapped; + } + + /** + * If isUnmapped is true, than getGenomeLocs by + * definition will return a singleton list with a GenomeLoc.UNMAPPED + * + * Can return null, indicating that the entire genome is covered. + * + * @return the genome location represented by this shard + */ + public List getGenomeLocs() { + return locs; + } + + /** + * Get the list of chunks delimiting this shard. + * @return a list of chunks that contain data for this shard. + */ + public Map getFileSpans() { + return Collections.unmodifiableMap(fileSpans); + } + + /** + * Returns the span of the genomeLocs comprising this shard + * @param + * @return + */ + public GenomeLoc getLocation() { + if ( getGenomeLocs() == null ) + return GenomeLoc.WHOLE_GENOME; + + int start = Integer.MAX_VALUE; + int stop = Integer.MIN_VALUE; + String contig = null; + + for ( GenomeLoc loc : getGenomeLocs() ) { + if ( GenomeLoc.isUnmapped(loc) ) + // special case the unmapped region marker, just abort out + return loc; + contig = loc.getContig(); + if ( loc.getStart() < start ) start = loc.getStart(); + if ( loc.getStop() > stop ) stop = loc.getStop(); + } + + return parser.createGenomeLoc(contig, start, stop); + } + + + /** + * what kind of shard do we return + * @return ShardType, indicating the type + */ + public ShardType getShardType() { + return shardType; + } + + /** + * Does any releasing / aggregation required when the shard is through being processed. + */ + public void close() { + readsDataSource.incorporateReadMetrics(readMetrics); + } + + /** + * Gets key read validation and filtering properties. + * @return set of read properties associated with this shard. + */ + public ReadProperties getReadProperties() { + return readsDataSource.getReadsInfo(); + } + + /** + * Gets the runtime metrics associated with this shard. + * Retrieves a storage space of metrics about number of reads included, filtered, etc. + * @return Storage space for metrics. + */ + public ReadMetrics getReadMetrics() { + return readMetrics; + } + + /** + * Returns true if this shard is meant to buffer reads, rather + * than just holding pointers to their locations. + * @return True if this shard can buffer reads. False otherwise. + */ + public boolean buffersReads() { return false; } + + /** + * Returns true if the read buffer is currently full. + * @return True if this shard's buffer is full (and the shard can buffer reads). + */ + public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); } + + /** + * Returns true if the read buffer is currently full. + * @return True if this shard's buffer is full (and the shard can buffer reads). + */ + public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); } + + /** + * Adds a read to the read buffer. + * @param read Add a read to the internal shard buffer. + */ + public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); } + + /** + * Gets the iterator over the elements cached in the shard. + * @return + */ + public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategy.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategy.java similarity index 93% rename from java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategy.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategy.java index c454a6aca..989cf9fce 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategy.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategy.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; import java.util.Iterator; /** diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java similarity index 97% rename from java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java index 6777e69ee..fa733ce12 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java @@ -1,11 +1,10 @@ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.SAMSequenceDictionary; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; /** * diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/package-info.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/package-info.java new file mode 100644 index 000000000..40a616a33 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/package-info.java @@ -0,0 +1,4 @@ +/** + * Efficiently queries BAM read storage files by genomic location. + */ +package org.broadinstitute.sting.gatk.datasources.reads; \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/utilities/BAMFileStat.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java similarity index 99% rename from java/src/org/broadinstitute/sting/gatk/datasources/utilities/BAMFileStat.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java index 327c747f3..ae0f1cf43 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/utilities/BAMFileStat.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMFileStat.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.utilities; +package org.broadinstitute.sting.gatk.datasources.reads.utilities; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Argument; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/utilities/BAMTagRenamer.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java similarity index 98% rename from java/src/org/broadinstitute/sting/gatk/datasources/utilities/BAMTagRenamer.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java index 722466cae..f03e2a44f 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/utilities/BAMTagRenamer.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/BAMTagRenamer.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.utilities; +package org.broadinstitute.sting.gatk.datasources.reads.utilities; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Argument; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/package-info.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/package-info.java new file mode 100644 index 000000000..4dd3c7601 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/package-info.java @@ -0,0 +1,4 @@ +/** + * Standalone utilities for working with BAM files. + */ +package org.broadinstitute.sting.gatk.datasources.reads.utilities; \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java similarity index 98% rename from java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSource.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index c5e3ea98d..32fc3c79b 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -23,14 +23,13 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +package org.broadinstitute.sting.gatk.datasources.reference; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import net.sf.picard.reference.FastaSequenceIndexBuilder; import net.sf.picard.sam.CreateSequenceDictionary; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.FastaSequenceIndex; -import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.file.FSLockWithShared; import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSourceProgressListener.java b/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java similarity index 94% rename from java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSourceProgressListener.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java index b22f139a6..8dace8fe4 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceDataSourceProgressListener.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +package org.broadinstitute.sting.gatk.datasources.reference; public interface ReferenceDataSourceProgressListener { public void percentProgress(int percent); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reference/package-info.java b/java/src/org/broadinstitute/sting/gatk/datasources/reference/package-info.java new file mode 100644 index 000000000..dc95e89ef --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reference/package-info.java @@ -0,0 +1,4 @@ +/** + * Efficiently queries FASTA reference sequence files by genomic location. + */ +package org.broadinstitute.sting.gatk.datasources.reference; \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/rmd/DataStreamSegment.java b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/DataStreamSegment.java new file mode 100644 index 000000000..4fd0c7267 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/DataStreamSegment.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.rmd; + +/** + * Marker interface that represents an arbitrary consecutive segment within a data stream. + */ +interface DataStreamSegment { +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/rmd/EntireStream.java b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/EntireStream.java new file mode 100644 index 000000000..d5b887295 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/EntireStream.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.rmd; + +/** + * Models the entire stream of data. + */ +class EntireStream implements DataStreamSegment { +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/rmd/MappedStreamSegment.java b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/MappedStreamSegment.java new file mode 100644 index 000000000..b110f7311 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/MappedStreamSegment.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.rmd; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.HasGenomeLocation; + +/** + * Models a mapped position within a stream of GATK input data. + */ +class MappedStreamSegment implements DataStreamSegment, HasGenomeLocation { + public final GenomeLoc locus; + + /** + * Retrieves the first location covered by a mapped stream segment. + * @return Location of the first base in this segment. + */ + public GenomeLoc getLocation() { + return locus; + } + + public MappedStreamSegment(GenomeLoc locus) { + this.locus = locus; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java new file mode 100644 index 000000000..abd5929eb --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.rmd; + +import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.List; + +/** + * A pool of reference-ordered data iterators. + */ +class ReferenceOrderedDataPool extends ResourcePool { + // the reference-ordered data itself. + private final RMDTriplet fileDescriptor; + + // our tribble track builder + private final RMDTrackBuilder builder; + + /** + * The header from this RMD, if present. + */ + private final Object header; + + /** + * The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's. + */ + private final SAMSequenceDictionary sequenceDictionary; + + boolean flashbackData = false; + public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser,boolean flashbackData) { + super(sequenceDictionary,genomeLocParser); + this.fileDescriptor = fileDescriptor; + this.builder = builder; + this.flashbackData = flashbackData; + + // prepopulate one RMDTrack + LocationAwareSeekableRODIterator iterator = createNewResource(); + this.addNewResource(iterator); + + // Pull the proper header and sequence dictionary from the prepopulated track. + this.header = iterator.getHeader(); + this.sequenceDictionary = iterator.getSequenceDictionary(); + } + + /** + * Gets the header used by this resource pool. + * @return Header used by this resource pool. + */ + public Object getHeader() { + return header; + } + + /** + * Gets the sequence dictionary built into the ROD index file. + * @return Sequence dictionary from the index file. + */ + public SAMSequenceDictionary getSequenceDictionary() { + return sequenceDictionary; + } + + /** + * Create a new iterator from the existing reference-ordered data. This new iterator is expected + * to be completely independent of any other iterator. + * @return The newly created resource. + */ + public LocationAwareSeekableRODIterator createNewResource() { + if(numIterators() > 0) + throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface"); + RMDTrack track = builder.createInstanceOfTrack(fileDescriptor); + LocationAwareSeekableRODIterator iter = new SeekableRODIterator(track.getHeader(),track.getSequenceDictionary(),referenceSequenceDictionary,genomeLocParser,track.getIterator()); + return (flashbackData) ? new FlashBackIterator(iter) : iter; + } + + /** + * Finds the best existing ROD iterator from the pool. In this case, the best existing ROD is defined as + * the first one encountered that is at or before the given position. + * @param segment @{inheritedDoc} + * @param resources @{inheritedDoc} + * @return @{inheritedDoc} + */ + public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List resources ) { + if(segment instanceof MappedStreamSegment) { + GenomeLoc position = ((MappedStreamSegment)segment).getLocation(); + + for( LocationAwareSeekableRODIterator RODIterator : resources ) { + + if( (RODIterator.position() == null && RODIterator.hasNext()) || + (RODIterator.position() != null && RODIterator.position().isBefore(position)) ) + return RODIterator; + if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) { + ((FlashBackIterator)RODIterator).flashBackTo(position); + return RODIterator; + } + + } + return null; + } + else if(segment instanceof EntireStream) { + // Asking for a segment over the entire stream, so by definition, there is no best existing resource. + // Force the system to create a new one. + return null; + } + else { + throw new ReviewedStingException("Unable to find a ROD iterator for segments of type " + segment.getClass()); + } + } + + /** + * In this case, the iterator is the resource. Pass it through. + */ + public LocationAwareSeekableRODIterator createIteratorFromResource( DataStreamSegment segment, LocationAwareSeekableRODIterator resource ) { + return resource; + } + + /** + * kill the buffers in the iterator + */ + public void closeResource( LocationAwareSeekableRODIterator resource ) { + if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close(); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java old mode 100755 new mode 100644 similarity index 59% rename from java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java rename to java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java index 36842124c..4680c003a --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java @@ -1,42 +1,49 @@ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.rmd; import net.sf.samtools.SAMSequenceDictionary; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.GenomeLoc; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.lang.reflect.Type; -import java.util.Iterator; import java.util.List; -import java.util.Scanner; -/** - * User: hanna - * Date: May 21, 2009 - * Time: 10:04:12 AM - * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT - * Software and documentation are copyright 2005 by the Broad Institute. - * All rights are reserved. - * - * Users acknowledge that this software is supplied without any warranty or support. - * The Broad Institute is not responsible for its use, misuse, or - * functionality. - */ /** * A data source which provides a single type of reference-ordered data. */ -public class ReferenceOrderedDataSource implements SimpleDataSource { +public class ReferenceOrderedDataSource { /** * The reference-ordered data itself. */ @@ -102,7 +109,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { } public Class getType() { - return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); } public Class getRecordType() { @@ -114,7 +121,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { } public Object getHeader() { - return header; + return header; } /** @@ -170,120 +177,6 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { } -/** - * A pool of reference-ordered data iterators. - */ -class ReferenceOrderedDataPool extends ResourcePool { - // the reference-ordered data itself. - private final RMDTriplet fileDescriptor; - - // our tribble track builder - private final RMDTrackBuilder builder; - - /** - * The header from this RMD, if present. - */ - private final Object header; - - /** - * The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's. - */ - private final SAMSequenceDictionary sequenceDictionary; - - boolean flashbackData = false; - public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,boolean flashbackData) { - super(sequenceDictionary,genomeLocParser); - this.fileDescriptor = fileDescriptor; - this.builder = builder; - this.flashbackData = flashbackData; - - // prepopulate one RMDTrack - LocationAwareSeekableRODIterator iterator = createNewResource(); - this.addNewResource(iterator); - - // Pull the proper header and sequence dictionary from the prepopulated track. - this.header = iterator.getHeader(); - this.sequenceDictionary = iterator.getSequenceDictionary(); - } - - /** - * Gets the header used by this resource pool. - * @return Header used by this resource pool. - */ - public Object getHeader() { - return header; - } - - /** - * Gets the sequence dictionary built into the ROD index file. - * @return Sequence dictionary from the index file. - */ - public SAMSequenceDictionary getSequenceDictionary() { - return sequenceDictionary; - } - - /** - * Create a new iterator from the existing reference-ordered data. This new iterator is expected - * to be completely independent of any other iterator. - * @return The newly created resource. - */ - public LocationAwareSeekableRODIterator createNewResource() { - if(numIterators() > 0) - throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface"); - RMDTrack track = builder.createInstanceOfTrack(fileDescriptor); - LocationAwareSeekableRODIterator iter = new SeekableRODIterator(track.getHeader(),track.getSequenceDictionary(),referenceSequenceDictionary,genomeLocParser,track.getIterator()); - return (flashbackData) ? new FlashBackIterator(iter) : iter; - } - - /** - * Finds the best existing ROD iterator from the pool. In this case, the best existing ROD is defined as - * the first one encountered that is at or before the given position. - * @param segment @{inheritedDoc} - * @param resources @{inheritedDoc} - * @return @{inheritedDoc} - */ - public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List resources ) { - if(segment instanceof MappedStreamSegment) { - GenomeLoc position = ((MappedStreamSegment)segment).getLocation(); - - for( LocationAwareSeekableRODIterator RODIterator : resources ) { - - if( (RODIterator.position() == null && RODIterator.hasNext()) || - (RODIterator.position() != null && RODIterator.position().isBefore(position)) ) - return RODIterator; - if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) { - ((FlashBackIterator)RODIterator).flashBackTo(position); - return RODIterator; - } - - } - return null; - } - else if(segment instanceof EntireStream) { - // Asking for a segment over the entire stream, so by definition, there is no best existing resource. - // Force the system to create a new one. - return null; - } - else { - throw new ReviewedStingException("Unable to find a ROD iterator for segments of type " + segment.getClass()); - } - } - - /** - * In this case, the iterator is the resource. Pass it through. - */ - public LocationAwareSeekableRODIterator createIteratorFromResource( DataStreamSegment segment, LocationAwareSeekableRODIterator resource ) { - return resource; - } - - /** - * kill the buffers in the iterator - */ - public void closeResource( LocationAwareSeekableRODIterator resource ) { - if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close(); - } -} - /** * a data pool for the new query based RODs */ @@ -356,6 +249,4 @@ class ReferenceOrderedQueryDataPool extends ResourcePool { * Builder/parser for GenomeLocs. */ protected final GenomeLocParser genomeLocParser; - + /** * All iterators of this reference-ordered data. */ @@ -89,7 +99,7 @@ abstract class ResourcePool { // Make a note of this assignment for proper releasing later. resourceAssignments.put(iterator, selectedResource); } - + return iterator; } @@ -133,7 +143,7 @@ abstract class ResourcePool { protected abstract T createNewResource(); /** - * Find the most appropriate resource to acquire the specified data. + * Find the most appropriate resource to acquire the specified data. * @param segment The data over which the resource is required. * @param availableResources A list of candidate resources to evaluate. * @return The best choice of the availableResources, or null if no resource meets the criteria. @@ -175,33 +185,3 @@ abstract class ResourcePool { } -/** - * Marker interface that represents an arbitrary consecutive segment within a data stream. - */ -interface DataStreamSegment { -} - -/** - * Models the entire stream of data. - */ -class EntireStream implements DataStreamSegment { -} - -/** - * Models a mapped position within a stream of GATK input data. - */ -class MappedStreamSegment implements DataStreamSegment, HasGenomeLocation { - public final GenomeLoc locus; - - /** - * Retrieves the first location covered by a mapped stream segment. - * @return Location of the first base in this segment. - */ - public GenomeLoc getLocation() { - return locus; - } - - public MappedStreamSegment(GenomeLoc locus) { - this.locus = locus; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/rmd/package-info.java b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/package-info.java new file mode 100644 index 000000000..c1cfccf5e --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/rmd/package-info.java @@ -0,0 +1,4 @@ +/** + * Efficiently queries arbitrary files sorted according to reference coordinate order. + */ +package org.broadinstitute.sting.gatk.datasources.rmd; \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/BAMFormatAwareShard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/BAMFormatAwareShard.java deleted file mode 100644 index 624f7e949..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/BAMFormatAwareShard.java +++ /dev/null @@ -1,117 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import net.sf.samtools.*; -import net.sf.picard.filter.SamRecordFilter; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - -/** - * A common interface for shards that natively understand the BAM format. - * - * @author mhanna - * @version 0.1 - */ -public abstract class BAMFormatAwareShard extends Shard { - /** - * Whether the current location is unmapped. - */ - private final boolean isUnmapped; - - /** - * Reads data, if applicable. - */ - private final SAMDataSource readsDataSource; - - /** - * The data backing the next chunks to deliver to the traversal engine. - */ - private final Map fileSpans; - - public BAMFormatAwareShard(GenomeLocParser parser, - ShardType shardType, - List locs, - SAMDataSource readsDataSource, - Map fileSpans, - boolean isUnmapped) { - super(parser, shardType, locs); - this.readsDataSource = readsDataSource; - this.fileSpans = fileSpans; - this.isUnmapped = isUnmapped; - } - - /** - * Closes the shard, tallying and incorporating read data. - */ - @Override - public void close() { - readsDataSource.incorporateReadMetrics(readMetrics); - } - - /** - * Get the list of chunks delimiting this shard. - * @return a list of chunks that contain data for this shard. - */ - public Map getFileSpans() { - return Collections.unmodifiableMap(fileSpans); - } - - /** - * Gets key read validation and filtering properties. - * @return set of read properties associated with this shard. - */ - @Override - public ReadProperties getReadProperties() { - return readsDataSource.getReadsInfo(); - } - - /** - * Returns true if this shard is meant to buffer reads, rather - * than just holding pointers to their locations. - * @return True if this shard can buffer reads. False otherwise. - */ - public boolean buffersReads() { return false; } - - /** - * Returns true if the read buffer is currently full. - * @return True if this shard's buffer is full (and the shard can buffer reads). - */ - public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); } - - /** - * Returns true if the read buffer is currently full. - * @return True if this shard's buffer is full (and the shard can buffer reads). - */ - public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); } - - /** - * Adds a read to the read buffer. - * @param read Add a read to the internal shard buffer. - */ - public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); } - - /** - * Gets the iterator over the elements cached in the shard. - * @return - */ - public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); } - - - /** - * Whether this shard points to an unmapped region. - * Some shard types conceptually be unmapped (e.g. LocusShards). In - * this case, isUnmapped should always return false. - * @return True if this shard is unmapped. False otherwise. - */ - public boolean isUnmapped() { - return isUnmapped; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java deleted file mode 100644 index c4928103a..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/Shard.java +++ /dev/null @@ -1,132 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.shards; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.ReadMetrics; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.HasGenomeLocation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - -import java.io.Serializable; -import java.util.List; -/** - * - * User: aaron - * Date: Apr 10, 2009 - * Time: 5:00:27 PM - * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ - -/** - * @author aaron - * @version 1.0 - * @date Apr 10, 2009 - *

- * Interface Shard - *

- * The base abstract class for shards. - */ -public abstract class Shard implements HasGenomeLocation { - public enum ShardType { - READ, LOCUS - } - - protected final GenomeLocParser parser; // incredibly annoying! - - /** - * What type of MonolithicShard is this? Read or locus? - */ - protected final ShardType shardType; - - /** - * Locations. For the monolithic shard, should be a list of all available contigs in the reference. - */ - protected final List locs; - - /** - * Statistics about which reads in this shards were used and which were filtered away. - */ - protected final ReadMetrics readMetrics = new ReadMetrics(); - - public Shard(GenomeLocParser parser, ShardType shardType, List locs) { - this.locs = locs; - this.parser = parser; - this.shardType = shardType; - } - - /** - * If isUnmapped is true, than getGenomeLocs by - * definition will return a singleton list with a GenomeLoc.UNMAPPED - * - * Can return null, indicating that the entire genome is covered. - * - * @return the genome location represented by this shard - */ - public List getGenomeLocs() { - return locs; - } - - /** - * Returns the span of the genomeLocs comprising this shard - * @param - * @return - */ - public GenomeLoc getLocation() { - if ( getGenomeLocs() == null ) - return GenomeLoc.WHOLE_GENOME; - - int start = Integer.MAX_VALUE; - int stop = Integer.MIN_VALUE; - String contig = null; - - for ( GenomeLoc loc : getGenomeLocs() ) { - if ( GenomeLoc.isUnmapped(loc) ) - // special case the unmapped region marker, just abort out - return loc; - contig = loc.getContig(); - if ( loc.getStart() < start ) start = loc.getStart(); - if ( loc.getStop() > stop ) stop = loc.getStop(); - } - - return parser.createGenomeLoc(contig, start, stop); - } - - - /** - * what kind of shard do we return - * @return ShardType, indicating the type - */ - public ShardType getShardType() { - return shardType; - } - - /** - * Does any releasing / aggregation required when the shard is through being processed. - */ - public void close() { - ; // by default don't do anything - } - - /** - * Gets required configuration for validating and filtering reads. - * @return read configuration properties. - */ - public abstract ReadProperties getReadProperties(); - - /** - * Gets the runtime metrics associated with this shard. - * Retrieves a storage space of metrics about number of reads included, filtered, etc. - * @return Storage space for metrics. - */ - public ReadMetrics getReadMetrics() { - return readMetrics; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/shards/package-info.java b/java/src/org/broadinstitute/sting/gatk/datasources/shards/package-info.java deleted file mode 100644 index 7d209f9c6..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/package-info.java +++ /dev/null @@ -1,6 +0,0 @@ -/** - * Divides core input data into manageable blocks, each representing - * a subsection of data that is small enough to be processed by a single - * worker. - */ -package org.broadinstitute.sting.gatk.datasources.shards; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SimpleDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SimpleDataSource.java deleted file mode 100644 index 6d46b3623..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SimpleDataSource.java +++ /dev/null @@ -1,37 +0,0 @@ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; - -import org.broadinstitute.sting.gatk.datasources.shards.Shard; - -import java.io.Serializable; -import java.util.Iterator; - - -/** - * User: aaron - * Date: Mar 26, 2009 - * Time: 2:39:05 PM - *

- * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. - * - */ - -/** This class is the interface for all data sources */ -public interface SimpleDataSource extends Serializable { - - - /** - * Query the data source for a region of interest, specified by the genome location. - * The iterator will generate successive calls - * - * @param shard the region - * @return an iterator of the appropriate type, that is limited by the region - */ - public Iterator seek(Shard shard); - -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/package-info.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/package-info.java deleted file mode 100644 index e0f73e794..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/package-info.java +++ /dev/null @@ -1,6 +0,0 @@ -/** - * Retrieves core GATK data, given a shard which represents a contiguous - * subsequence of core GATK data (reads, reference, or reference-ordered - * data). - */ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index b1e2c9082..384f8cdf7 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -1,20 +1,17 @@ package org.broadinstitute.sting.gatk.executive; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor; -import javax.management.MBeanServer; -import javax.management.ObjectName; -import javax.management.JMException; import java.util.Queue; import java.util.LinkedList; import java.util.Collection; @@ -22,7 +19,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; -import java.lang.management.ManagementFactory; import net.sf.picard.reference.IndexedFastaSequenceFile; diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 1e21b6542..8f0c4f87e 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -3,10 +3,10 @@ package org.broadinstitute.sting.gatk.executive; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.io.DirectOutputTracker; diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 418220b9e..eb02a591a 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -26,10 +26,10 @@ package org.broadinstitute.sting.gatk.executive; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.io.OutputTracker; @@ -40,13 +40,11 @@ import org.broadinstitute.sting.gatk.ReadMetrics; import java.io.FileNotFoundException; import java.io.FileOutputStream; -import java.io.OutputStream; import java.io.PrintStream; import java.lang.management.ManagementFactory; import java.util.*; import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.threading.*; diff --git a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java index 16d750420..636b760e5 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.executive; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker; import org.broadinstitute.sting.gatk.walkers.Walker; diff --git a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index 1044f1a40..1953f5581 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -1,12 +1,10 @@ package org.broadinstitute.sting.gatk.executive; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.datasources.sample.Sample; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.iterators.*; import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java index 59ece0ee5..a7666981c 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.refdata.utils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.GenomeLoc; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 76f147b58..2be70585d 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.traversals; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java index 7362b83a4..f15a20cd3 100644 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java @@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadPairWalker; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.apache.log4j.Logger; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecordCoordinateComparator; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java index d1b0c953c..5c0be33ee 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java @@ -36,7 +36,7 @@ import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 537be0b44..67ac390a3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -43,7 +43,7 @@ import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java index c54eb190f..3b4ee9863 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java @@ -40,7 +40,7 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; import org.broadinstitute.sting.gatk.walkers.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java index c1b0440da..0ed61fc48 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 37e9dc4d4..df710146a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -33,10 +33,9 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index edc3056a8..968bd7131 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RMD; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 704607d40..3c4e07ef9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -31,7 +31,7 @@ import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.commandline.Argument; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 294e31f0b..4d94f041a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index c6f6d986a..1aae82eda 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.DownsampleType; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.commandline.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index eca06f3ee..6227ad75d 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -31,6 +31,8 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; @@ -38,13 +40,10 @@ import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 5057ba2eb..b864cccee 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -33,6 +33,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.StingException; @@ -45,7 +46,6 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.BAQMode; import org.broadinstitute.sting.gatk.filters.BadMateFilter; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java index 13110f725..142c383e3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java @@ -33,10 +33,10 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.SimpleTimer; import java.io.PrintStream; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 187b26426..3667cf062 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -32,7 +32,7 @@ import org.broad.tribble.vcf.VCFCodec; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index fa7c48a3f..90ffac8d4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -33,7 +33,7 @@ import java.util.regex.Pattern; import net.sf.samtools.*; import net.sf.samtools.util.SequenceUtil; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 3cf4cc37e..0aae62c5c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -8,7 +8,7 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.report.GATKReport; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java index 5d3802439..3ed350b91 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java @@ -30,7 +30,7 @@ import org.broad.tribble.vcf.*; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java index 7b5193345..827db8979 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 3a7dfc423..da16fbb08 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.RodWalker; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 54a869aaf..3fca6088b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DetectWGAWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DetectWGAWalker.java index 995fd764c..529b78aca 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DetectWGAWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DetectWGAWalker.java @@ -31,10 +31,8 @@ import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource; +import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.oneoffprojects.utils.ReadPair; -import org.broadinstitute.sting.oneoffprojects.utils.AlignmentInfo; -import org.broadinstitute.sting.oneoffprojects.utils.Assembly; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import net.sf.samtools.SAMRecord; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java index 5c09d984f..2c476442a 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java @@ -7,9 +7,8 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ValidationGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ValidationGenotyper.java index e4a4e6947..9b71c8d5c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ValidationGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ValidationGenotyper.java @@ -29,7 +29,7 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java index ae72d6b7f..ce73dfd8c 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java @@ -3,10 +3,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.validation; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; diff --git a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 07936d583..04c89e8a3 100644 --- a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.interval; import net.sf.picard.util.IntervalList; import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource; +import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; diff --git a/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java index 6ad1a6b97..79b99c7d8 100755 --- a/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/vcf/VCFUtils.java @@ -25,14 +25,10 @@ package org.broadinstitute.sting.utils.vcf; -import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.apache.log4j.Logger; diff --git a/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java b/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java index e45d2395d..27b76537f 100644 --- a/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java +++ b/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java @@ -27,7 +27,7 @@ package net.sf.picard.reference; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener; +import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java index 0375e0efd..34cf8a88a 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusReferenceViewUnitTest.java @@ -1,12 +1,11 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.testng.Assert; import org.testng.annotations.Test; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard; +import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard; import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index d3c92c254..9a2f94a90 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -1,18 +1,16 @@ package org.broadinstitute.sting.gatk.datasources.providers; -import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.reference.ReferenceSequence; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.*; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.executive.WindowMaker; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.gatk.datasources.shards.LocusShard; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.LocusShard; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index 0ab804eb9..ccf4ae17b 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java b/java/test/org/broadinstitute/sting/gatk/datasources/reads/MockLocusShard.java similarity index 87% rename from java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java rename to java/test/org/broadinstitute/sting/gatk/datasources/reads/MockLocusShard.java index db2aaf7b5..dc3a6cafe 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/reads/MockLocusShard.java @@ -22,11 +22,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.datasources.shards; +package org.broadinstitute.sting.gatk.datasources.reads; +import org.broadinstitute.sting.gatk.datasources.reads.LocusShard; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.List; diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMBAMDataSourceUnitTest.java similarity index 95% rename from java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java rename to java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMBAMDataSourceUnitTest.java index dc7b25163..e41a6b3b7 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMBAMDataSourceUnitTest.java @@ -1,13 +1,15 @@ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +package org.broadinstitute.sting.gatk.datasources.reads; import static org.testng.Assert.fail; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; @@ -22,7 +24,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.List; -import java.util.Collections; /** * diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java similarity index 98% rename from java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java rename to java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java index d85c1d634..13f0b3ac1 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataPoolUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java @@ -1,9 +1,8 @@ -package org.broadinstitute.sting.gatk.datasources.simpleDataSources; +package org.broadinstitute.sting.gatk.datasources.rmd; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; @@ -19,7 +18,6 @@ import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; -import java.util.Collections; import net.sf.picard.reference.IndexedFastaSequenceFile; /** diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index e7a1ea826..7a03b76db 100644 --- a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -12,7 +12,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.baq.BAQ; diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index faa6fbdbc..446df326c 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -8,11 +8,11 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; -import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource; +import org.broadinstitute.sting.gatk.datasources.reads.Shard; +import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy; +import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -31,7 +31,6 @@ import java.io.FileOutputStream; import java.io.PrintStream; import java.util.ArrayList; import java.util.List; -import java.util.Collections; /** * @@ -119,7 +118,7 @@ public class TraverseReadsUnitTest extends BaseTest { @Test public void testUnmappedReadCount() { SAMDataSource dataSource = new SAMDataSource(bamList,genomeLocParser); - ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, + ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref, ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, ref.getSequenceDictionary(), readSize, genomeLocParser);