A few minor modifications masquerading as significant changes according to

svn's logs:
- Copied BAM indexing engine from Picard back into the GATK anticipating
  shard merging algorithm.  Tried to leave most of the building blocks in
  Picard.  If this turns into a logistical nightmare, I'll merge the building
  blocks into the GATK as well.
- Reorganized the org.broadinstitute.sting.gatk.datasources package, giving
  better separation of query and management functionality for reads, ref, rmd,
  and samples.  
- Merged Shard building blocks into org.broadinstitute.sting.gatk.datasources.
  reads package, indicating it's current strong relationship with the reads,
  rather than the general unifying element I wish this would be.
- Collapsed BAMFormatAwareShard into Shard.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5184 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-02-03 17:59:19 +00:00
parent 7af003666d
commit 5c3198520c
95 changed files with 1900 additions and 814 deletions

View File

@ -25,7 +25,7 @@
package net.sf.picard.reference;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;

View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package net.sf.samtools;
import java.util.ArrayList;
import java.util.List;
/**
* A temporary solution to work around Java access rights issues:
* override BAMFileSpan and make it public.
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
*/
public class GATKBAMFileSpan extends BAMFileSpan {
/**
* Create a new empty list of chunks.
*/
public GATKBAMFileSpan() {
super();
}
/**
* Convenience constructor to construct a BAM file span from
* a single chunk.
* @param chunk Chunk to use as the sole region in this span.
*/
public GATKBAMFileSpan(final Chunk chunk) {
super(chunk);
}
/**
* Create a new chunk list from the given list of chunks.
* @param chunks Constituent chunks.
*/
public GATKBAMFileSpan(final List<GATKChunk> chunks) {
super(new ArrayList<Chunk>(chunks));
}
}

View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package net.sf.samtools;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* A temporary solution to work around Java access rights issues:
* override GATKBin and make it public.
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
*/
public class GATKBin extends Bin {
public GATKBin(final int referenceSequence, final int binNumber) {
super(referenceSequence,binNumber);
}
public GATKBin(final Bin bin) {
super(bin.getReferenceSequence(),bin.getBinNumber());
}
@Override
public int getReferenceSequence() {
return super.getReferenceSequence();
}
@Override
public int getBinNumber() {
return super.getBinNumber();
}
public List<GATKChunk> getGATKChunkList() {
List<GATKChunk> gatkChunks = new ArrayList<GATKChunk>();
for(Chunk chunk: getChunkList())
gatkChunks.add(new GATKChunk(chunk));
return gatkChunks;
}
public void setGATKChunkList(List<GATKChunk> chunks) {
super.setChunkList(new ArrayList<Chunk>(chunks));
}
}

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package net.sf.samtools;
import java.util.BitSet;
/**
* A temporary solution to work around Java access rights issues:
* override chunk and make it public.
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
*/
public class GATKBinList extends BinList {
/**
* Create a new BinList over sequenceCount sequences, consisting of the given bins.
* @param referenceSequence Reference sequence to which these bins are relevant.
* @param bins The given bins to include.
*/
public GATKBinList(final int referenceSequence, final BitSet bins) {
super(referenceSequence,bins);
}
/**
* Retrieves the bins stored in this list.
* @return A bitset where a bin is present in the list if the bit is true.
*/
public BitSet getBins() {
return super.getBins();
}
}

View File

@ -0,0 +1,67 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package net.sf.samtools;
/**
* A temporary solution to work around Java access rights issues:
* override chunk and make it public.
* TODO: Eliminate once we determine the final fate of the BAM index reading code.
*/
public class GATKChunk extends Chunk {
public GATKChunk(final long start, final long stop) {
super(start,stop);
}
public GATKChunk(final Chunk chunk) {
super(chunk.getChunkStart(),chunk.getChunkEnd());
}
@Override
public GATKChunk clone() {
return new GATKChunk(getChunkStart(),getChunkEnd());
}
@Override
public long getChunkStart() {
return super.getChunkStart();
}
@Override
public void setChunkStart(final long value) {
super.setChunkStart(value);
}
@Override
public long getChunkEnd() {
return super.getChunkEnd();
}
@Override
public void setChunkEnd(final long value) {
super.setChunkEnd(value);
}
}

View File

@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;

View File

@ -36,16 +36,16 @@ import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.commandline.ParsingEngine;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.MonolithicShardStrategy;
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
@ -66,10 +66,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
/**

View File

@ -5,10 +5,9 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.util.List;
import java.util.Collection;
/**
* User: hanna

View File

@ -27,9 +27,8 @@ package org.broadinstitute.sting.gatk;
import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.classloader.PluginManager;

View File

@ -1,31 +0,0 @@
package org.broadinstitute.sting.gatk.datasources;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
/**
* User: aaron
* Date: Mar 26, 2009
* Time: 9:25:49 AM
* <p/>
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
/**
* This exception is throw when we're unable to generate a data source,
* most likely due to an incomplete input source list
*/
public class DataSourceGenerationException extends ReviewedStingException {
public DataSourceGenerationException(String message) {
super(message);
}
public DataSourceGenerationException(String message, Throwable throwable) {
super(message, throwable);
}
}

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.ReadProperties;

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.utils.GenomeLoc;

View File

@ -25,14 +25,12 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.ArrayList;
import java.util.Collection;

View File

@ -1,9 +1,8 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import java.util.Collection;

View File

@ -26,13 +26,12 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.RODMergingIterator;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import java.util.*;

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

View File

@ -0,0 +1,195 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.GATKBin;
import net.sf.samtools.GATKChunk;
import net.sf.samtools.LinearIndex;
import java.util.*;
/**
* Represents the contents of a bam index file for one reference.
* A BAM index (.bai) file contains information for all references in the bam file.
* This class describes the data present in the index file for one of these references;
* including the bins, chunks, and linear index.
*/
class BAMIndexContent {
/**
* The reference sequence for the data currently loaded.
*/
private final int mReferenceSequence;
/**
* A list of all bins in the above reference sequence.
*/
private final BinList mBinList;
/**
* The linear index for the reference sequence above.
*/
private final LinearIndex mLinearIndex;
/**
* @param referenceSequence Content corresponds to this reference.
* @param bins Array of bins represented by this content, possibly sparse
* @param numberOfBins Number of non-null bins
* @param linearIndex Additional index used to optimize queries
*/
BAMIndexContent(final int referenceSequence, final GATKBin[] bins, final int numberOfBins, final LinearIndex linearIndex) {
this.mReferenceSequence = referenceSequence;
this.mBinList = new BinList(bins, numberOfBins);
this.mLinearIndex = linearIndex;
}
/**
* Reference for this Content
*/
public int getReferenceSequence() {
return mReferenceSequence;
}
/**
* Does this content have anything in this bin?
*/
public boolean containsBin(final GATKBin bin) {
return mBinList.getBin(bin.getBinNumber()) != null;
}
/**
* @return iterable list of bins represented by this content
*/
public BinList getBins() {
return mBinList;
}
/**
* @return the number of non-null bins represented by this content
*/
int getNumberOfNonNullBins() {
return mBinList.getNumberOfNonNullBins();
}
/**
* @return all chunks associated with all bins in this content
*/
public List<GATKChunk> getAllChunks() {
List<GATKChunk> allChunks = new ArrayList<GATKChunk>();
for (GATKBin b : mBinList)
if (b.getChunkList() != null) {
allChunks.addAll(b.getGATKChunkList());
}
return Collections.unmodifiableList(allChunks);
}
/**
* @return the linear index represented by this content
*/
public LinearIndex getLinearIndex() {
return mLinearIndex;
}
/**
* This class is used to encapsulate the list of Bins store in the BAMIndexContent
* While it is currently represented as an array, we may decide to change it to an ArrayList or other structure
*/
class BinList implements Iterable<GATKBin> {
private final GATKBin[] mBinArray;
public final int numberOfNonNullBins;
public final int maxBinNumber; // invariant: maxBinNumber = mBinArray.length -1 since array is 0 based
/**
* @param binArray a sparse array representation of the bins. The index into the array is the bin number.
* @param numberOfNonNullBins
*/
BinList(GATKBin[] binArray, int numberOfNonNullBins) {
this.mBinArray = binArray;
this.numberOfNonNullBins = numberOfNonNullBins;
this.maxBinNumber = mBinArray.length - 1;
}
GATKBin getBin(int binNumber) {
if (binNumber > maxBinNumber) return null;
return mBinArray[binNumber];
}
int getNumberOfNonNullBins() {
return numberOfNonNullBins;
}
/**
* Gets an iterator over all non-null bins.
*
* @return An iterator over all bins.
*/
public Iterator<GATKBin> iterator() {
return new BinIterator();
}
private class BinIterator implements Iterator<GATKBin> {
/**
* Stores the bin # of the Bin currently in use.
*/
private int nextBin;
public BinIterator() {
nextBin = 0;
}
/**
* Are there more bins in this set, waiting to be returned?
*
* @return True if more bins are remaining.
*/
public boolean hasNext() {
while (nextBin <= maxBinNumber) {
if (getBin(nextBin) != null) return true;
nextBin++;
}
return false;
}
/**
* Gets the next bin in the provided BinList.
*
* @return the next available bin in the BinList.
*/
public GATKBin next() {
if (!hasNext())
throw new NoSuchElementException("This BinIterator is currently empty");
GATKBin result = getBin(nextBin);
nextBin++;
return result;
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove from a bin iterator");
}
}
}
}

View File

@ -0,0 +1,29 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.Bin;
import java.util.HashMap;
import java.util.Map;
/**
* Models a bin at which all BAM files in the merged input stream overlap.
*/
class BAMOverlap {
public final int start;
public final int stop;
private final Map<SAMReaderID,Bin> bins = new HashMap<SAMReaderID,Bin>();
public BAMOverlap(final int start, final int stop) {
this.start = start;
this.stop = stop;
}
public void addBin(final SAMReaderID id, final Bin bin) {
bins.put(id,bin);
}
public Bin getBin(final SAMReaderID id) {
return bins.get(id);
}
}

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.SAMFileSpan;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Represents a small section of a BAM file, and every associated interval.
*/
class FilePointer {
protected final Map<SAMReaderID,SAMFileSpan> fileSpans = new HashMap<SAMReaderID,SAMFileSpan>();
protected final String referenceSequence;
protected final BAMOverlap overlap;
protected final List<GenomeLoc> locations;
/**
* Does this file pointer point into an unmapped region?
*/
protected final boolean isRegionUnmapped;
public FilePointer(final GenomeLoc location) {
this.referenceSequence = location.getContig();
this.overlap = null;
this.locations = Collections.singletonList(location);
this.isRegionUnmapped = GenomeLoc.isUnmapped(location);
}
public FilePointer(final String referenceSequence,final BAMOverlap overlap) {
this.referenceSequence = referenceSequence;
this.overlap = overlap;
this.locations = new ArrayList<GenomeLoc>();
this.isRegionUnmapped = false;
}
public void addLocation(GenomeLoc location) {
locations.add(location);
}
public void addFileSpans(SAMReaderID id, SAMFileSpan fileSpan) {
this.fileSpans.put(id,fileSpan);
}
}

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
/**
* Constants used in reading & writing BAM files
*/
class GATKBAMFileConstants {
/**
* The beginning of a BAMRecord is a fixed-size block of 8 int32s
*/
static final int FIXED_BLOCK_SIZE = 8 * 4;
/**
* Sanity check -- we never expect BAMRecords to be as big as this.
*/
static final int MAXIMUM_RECORD_LENGTH = 1024 * 1024;
/**
* BAM file magic number. This is what is present in the gunzipped version of the file,
* which never exists on disk.
*/
static final byte[] BAM_MAGIC = "BAM\1".getBytes();
/**
* BAM index file magic number.
*/
static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes();
}

View File

@ -0,0 +1,575 @@
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.BAMIndex;
import net.sf.samtools.BAMIndexMetaData;
import net.sf.samtools.Bin;
import net.sf.samtools.BrowseableBAMIndex;
import net.sf.samtools.GATKBAMFileSpan;
import net.sf.samtools.GATKBin;
import net.sf.samtools.GATKBinList;
import net.sf.samtools.GATKChunk;
import net.sf.samtools.LinearIndex;
import net.sf.samtools.SAMException;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.RuntimeIOException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.ByteOrder;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.*;
/**
* A basic interface for querying BAM indices.
*
* @author mhanna
* @version 0.1
*/
public class GATKBAMIndex implements BAMIndex, BrowseableBAMIndex {
/**
* Reports the total amount of genomic data that any bin can index.
*/
protected static final int BIN_GENOMIC_SPAN = 512*1024*1024;
/**
* What is the starting bin for each level?
*/
private static final int[] LEVEL_STARTS = {0,1,9,73,585,4681};
/**
* Reports the maximum number of bins that can appear in a BAM file.
*/
public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
public static final int MAX_LINEAR_INDEX_SIZE = MAX_BINS+1-LEVEL_STARTS[LEVEL_STARTS.length-1];
private final File mFile;
private final MappedByteBuffer mFileBuffer;
private SAMSequenceDictionary mBamDictionary = null;
private Integer mLastReferenceRetrieved = null;
private WeakHashMap<Integer,BAMIndexContent> mQueriesByReference = new WeakHashMap<Integer,BAMIndexContent>();
public GATKBAMIndex(final File file, final SAMSequenceDictionary dictionary) {
mFile = file;
mBamDictionary = dictionary;
// Open the file stream.
try {
FileInputStream fileStream = new FileInputStream(mFile);
FileChannel fileChannel = fileStream.getChannel();
mFileBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0L, fileChannel.size());
mFileBuffer.order(ByteOrder.LITTLE_ENDIAN);
fileChannel.close();
fileStream.close();
}
catch (IOException exc) {
throw new RuntimeIOException(exc.getMessage(), exc);
}
// Verify the magic number.
seek(0);
final byte[] buffer = new byte[4];
readBytes(buffer);
if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) {
throw new RuntimeException("Invalid file header in BAM index " + mFile +
": " + new String(buffer));
}
}
/**
* Get the number of levels employed by this index.
* @return Number of levels in this index.
*/
public static int getNumIndexLevels() {
return LEVEL_STARTS.length;
}
/**
* Gets the first bin in the given level.
* @param levelNumber Level number. 0-based.
* @return The first bin in this level.
*/
public static int getFirstBinInLevel(final int levelNumber) {
return LEVEL_STARTS[levelNumber];
}
/**
* Gets the number of bins in the given level.
* @param levelNumber Level number. 0-based.
* @return The size (number of possible bins) of the given level.
*/
public int getLevelSize(final int levelNumber) {
if(levelNumber == getNumIndexLevels())
return MAX_BINS+1-LEVEL_STARTS[levelNumber];
else
return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber];
}
/**
* Gets the level associated with the given bin number.
* @param bin The bin for which to determine the level.
* @return the level associated with the given bin number.
*/
@Override
public int getLevelForBin(final Bin bin) {
GATKBin gatkBin = new GATKBin(bin);
if(gatkBin.getBinNumber() >= MAX_BINS)
throw new SAMException("Tried to get level for invalid bin.");
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
return i;
}
throw new SAMException("Unable to find correct bin for bin "+bin);
}
/**
* Gets the first locus that this bin can index into.
* @param bin The bin to test.
* @return The last position that the given bin can represent.
*/
public int getFirstLocusInBin(final Bin bin) {
final int level = getLevelForBin(bin);
final int levelStart = LEVEL_STARTS[level];
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
return (new GATKBin(bin).getBinNumber() - levelStart)*(BIN_GENOMIC_SPAN /levelSize)+1;
}
/**
* Gets the last locus that this bin can index into.
* @param bin The bin to test.
* @return The last position that the given bin can represent.
*/
@Override
public int getLastLocusInBin(final Bin bin) {
final int level = getLevelForBin(bin);
final int levelStart = LEVEL_STARTS[level];
final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
}
public int getNumberOfReferences() {
seek(4);
return readInteger();
}
/**
* Use to get close to the unmapped reads at the end of a BAM file.
* @return The file offset of the first record in the last linear bin, or -1
* if there are no elements in linear bins (i.e. no mapped reads).
*/
public long getStartOfLastLinearBin() {
seek(4);
final int sequenceCount = readInteger();
// Because no reads may align to the last sequence in the sequence dictionary,
// grab the last element of the linear index for each sequence, and return
// the last one from the last sequence that has one.
long lastLinearIndexPointer = -1;
for (int i = 0; i < sequenceCount; i++) {
// System.out.println("# Sequence TID: " + i);
final int nBins = readInteger();
// System.out.println("# nBins: " + nBins);
for (int j1 = 0; j1 < nBins; j1++) {
// Skip bin #
skipBytes(4);
final int nChunks = readInteger();
// Skip chunks
skipBytes(16 * nChunks);
}
final int nLinearBins = readInteger();
if (nLinearBins > 0) {
// Skip to last element of list of linear bins
skipBytes(8 * (nLinearBins - 1));
lastLinearIndexPointer = readLong();
}
}
return lastLinearIndexPointer;
}
/**
* Gets meta data for the given reference including information about number of aligned, unaligned, and noCoordinate records
* @param reference the reference of interest
* @return meta data for the reference
*/
public BAMIndexMetaData getMetaData(int reference) {
throw new UnsupportedOperationException("Cannot retrieve metadata for GATKBAMIndex");
}
/**
* Returns count of records unassociated with any reference. Call before the index file is closed
*
* @return meta data at the end of the bam index that indicates count of records holding no coordinates
* or null if no meta data (old index format)
*/
public Long getNoCoordinateCount() {
seek(4);
final int sequenceCount = readInteger();
skipToSequence(sequenceCount);
try { // in case of old index file without meta data
return readLong();
} catch (Exception e) {
return null;
}
}
/**
* Get list of regions of BAM file that may contain SAMRecords for the given range
* @param referenceIndex sequence of desired SAMRecords
* @param startPos 1-based start of the desired interval, inclusive
* @param endPos 1-based end of the desired interval, inclusive
* @return the virtual file position. Each pair is the first and last virtual file position
* in a range that can be scanned to find SAMRecords that overlap the given positions.
*/
@Override
public GATKBAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) {
BAMIndexContent queryResults = getQueryResults(referenceIndex);
if(queryResults == null)
return null;
GATKBinList overlappingBins = getBinsOverlapping(referenceIndex,startPos,endPos);
// System.out.println("# Sequence target TID: " + referenceIndex);
List<GATKBin> bins = new ArrayList<GATKBin>();
for(GATKBin bin: queryResults.getBins()) {
if (overlappingBins.getBins().get(bin.getBinNumber()))
bins.add(bin);
}
if (bins.isEmpty()) {
return null;
}
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
for(GATKBin bin: bins) {
for(GATKChunk chunk: bin.getGATKChunkList())
chunkList.add(chunk.clone());
}
if (chunkList.isEmpty()) {
return null;
}
chunkList = optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
return new GATKBAMFileSpan(chunkList);
}
/**
* Perform an overlapping query of all bins bounding the given location.
* @param bin The bin over which to perform an overlapping query.
* @return The file pointers
*/
@Override
public GATKBAMFileSpan getSpanOverlapping(final Bin bin) {
if(bin == null)
return null;
GATKBin gatkBin = new GATKBin(bin);
final int referenceSequence = gatkBin.getReferenceSequence();
BAMIndexContent indexQuery = getQueryResults(referenceSequence);
if(indexQuery == null)
return null;
final int binLevel = getLevelForBin(bin);
final int firstLocusInBin = getFirstLocusInBin(bin);
// Add the specified bin to the tree if it exists.
List<GATKBin> binTree = new ArrayList<GATKBin>();
if(indexQuery.containsBin(gatkBin))
binTree.add(indexQuery.getBins().getBin(gatkBin.getBinNumber()));
int currentBinLevel = binLevel;
while(--currentBinLevel >= 0) {
final int binStart = getFirstBinInLevel(currentBinLevel);
final int binWidth = getMaxAddressibleGenomicLocation()/getLevelSize(currentBinLevel);
final int binNumber = firstLocusInBin/binWidth + binStart;
GATKBin parentBin = indexQuery.getBins().getBin(binNumber);
if(parentBin != null && indexQuery.containsBin(parentBin))
binTree.add(parentBin);
}
List<GATKChunk> chunkList = new ArrayList<GATKChunk>();
for(GATKBin coveringBin: binTree) {
for(GATKChunk chunk: coveringBin.getGATKChunkList())
chunkList.add(chunk.clone());
}
final int start = getFirstLocusInBin(bin);
chunkList = optimizeChunkList(chunkList,indexQuery.getLinearIndex().getMinimumOffset(start));
return new GATKBAMFileSpan(chunkList);
}
/**
* Get a list of bins in the BAM file that may contain SAMRecords for the given range.
* @param referenceIndex sequence of desired SAMRecords
* @param startPos 1-based start of the desired interval, inclusive
* @param endPos 1-based end of the desired interval, inclusive
* @return a list of bins that contain relevant data.
*/
public GATKBinList getBinsOverlapping(final int referenceIndex, final int startPos, final int endPos) {
final BitSet regionBins = regionToBins(startPos,endPos);
if (regionBins == null) {
return null;
}
return new GATKBinList(referenceIndex,regionBins);
}
protected BAMIndexContent query(final int referenceSequence, final int startPos, final int endPos) {
seek(4);
List<GATKChunk> metaDataChunks = new ArrayList<GATKChunk>();
final int sequenceCount = readInteger();
if (referenceSequence >= sequenceCount) {
return null;
}
final BitSet regionBins = regionToBins(startPos, endPos);
if (regionBins == null) {
return null;
}
skipToSequence(referenceSequence);
int binCount = readInteger();
boolean metaDataSeen = false;
GATKBin[] bins = new GATKBin[getMaxBinNumberForReference(referenceSequence) +1];
for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger();
final int nChunks = readInteger();
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
// System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks);
GATKChunk lastChunk = null;
if (regionBins.get(indexBin)) {
for (int ci = 0; ci < nChunks; ci++) {
final long chunkBegin = readLong();
final long chunkEnd = readLong();
lastChunk = new GATKChunk(chunkBegin, chunkEnd);
chunks.add(lastChunk);
}
} else if (indexBin == MAX_BINS) {
// meta data - build the bin so that the count of bins is correct;
// but don't attach meta chunks to the bin, or normal queries will be off
for (int ci = 0; ci < nChunks; ci++) {
final long chunkBegin = readLong();
final long chunkEnd = readLong();
lastChunk = new GATKChunk(chunkBegin, chunkEnd);
metaDataChunks.add(lastChunk);
}
metaDataSeen = true;
continue; // don't create a Bin
} else {
skipBytes(16 * nChunks);
}
GATKBin bin = new GATKBin(referenceSequence, indexBin);
bin.setGATKChunkList(chunks);
bin.setLastChunk(lastChunk);
bins[indexBin] = bin;
}
final int nLinearBins = readInteger();
final int regionLinearBinStart = LinearIndex.convertToLinearIndexOffset(startPos);
final int regionLinearBinStop = endPos > 0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins-1;
final int actualStop = Math.min(regionLinearBinStop, nLinearBins -1);
long[] linearIndexEntries = new long[0];
if (regionLinearBinStart < nLinearBins) {
linearIndexEntries = new long[actualStop-regionLinearBinStart+1];
skipBytes(8 * regionLinearBinStart);
for(int linearBin = regionLinearBinStart; linearBin <= actualStop; linearBin++)
linearIndexEntries[linearBin-regionLinearBinStart] = readLong();
}
final LinearIndex linearIndex = new LinearIndex(referenceSequence,regionLinearBinStart,linearIndexEntries);
return new BAMIndexContent(referenceSequence, bins, binCount - (metaDataSeen? 1 : 0), linearIndex);
}
/**
* The maxiumum bin number for a reference sequence of a given length
*/
static int getMaxBinNumberForSequenceLength(int sequenceLength) {
return getFirstBinInLevel(getNumIndexLevels() - 1) + (sequenceLength >> 14);
// return 4680 + (sequenceLength >> 14); // note 4680 = getFirstBinInLevel(getNumIndexLevels() - 1)
}
/**
* Looks up the cached BAM query results if they're still in the cache and not expired. Otherwise,
* retrieves the cache results from disk.
* @param referenceIndex The reference to load. CachingBAMFileIndex only stores index data for entire references.
* @return The index information for this reference.
*/
protected BAMIndexContent getQueryResults(final int referenceIndex) {
// WeakHashMap is a bit weird in that its lookups are done via equals() equality, but expirations must be
// handled via == equality. This implementation jumps through a few hoops to make sure that == equality still
// holds even in the context of boxing/unboxing.
// If this query is for the same reference index as the last query, return it.
if(mLastReferenceRetrieved!=null && mLastReferenceRetrieved == referenceIndex)
return mQueriesByReference.get(referenceIndex);
// If not, check to see whether it's available in the cache.
BAMIndexContent queryResults = mQueriesByReference.get(referenceIndex);
if(queryResults != null) {
mLastReferenceRetrieved = referenceIndex;
mQueriesByReference.put(referenceIndex,queryResults);
return queryResults;
}
// If not in the cache, attempt to load it from disk.
queryResults = query(referenceIndex,1,-1);
if(queryResults != null) {
mLastReferenceRetrieved = referenceIndex;
mQueriesByReference.put(referenceIndex,queryResults);
return queryResults;
}
// Not even available on disk.
return null;
}
/**
* Gets the possible number of bins for a given reference sequence.
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.
*/
protected int getMaxAddressibleGenomicLocation() {
return BIN_GENOMIC_SPAN;
}
/**
* Get candidate bins for the specified region
* @param startPos 1-based start of target region, inclusive.
* @param endPos 1-based end of target region, inclusive.
* @return bit set for each bin that may contain SAMRecords in the target region.
*/
protected BitSet regionToBins(final int startPos, final int endPos) {
final int maxPos = 0x1FFFFFFF;
final int start = (startPos <= 0) ? 0 : (startPos-1) & maxPos;
final int end = (endPos <= 0) ? maxPos : (endPos-1) & maxPos;
if (start > end) {
return null;
}
int k;
final BitSet bitSet = new BitSet(MAX_BINS);
bitSet.set(0);
for (k = 1 + (start>>26); k <= 1 + (end>>26); ++k) bitSet.set(k);
for (k = 9 + (start>>23); k <= 9 + (end>>23); ++k) bitSet.set(k);
for (k = 73 + (start>>20); k <= 73 + (end>>20); ++k) bitSet.set(k);
for (k = 585 + (start>>17); k <= 585 + (end>>17); ++k) bitSet.set(k);
for (k = 4681 + (start>>14); k <= 4681 + (end>>14); ++k) bitSet.set(k);
return bitSet;
}
protected List<GATKChunk> optimizeChunkList(final List<GATKChunk> chunks, final long minimumOffset) {
GATKChunk lastChunk = null;
Collections.sort(chunks);
final List<GATKChunk> result = new ArrayList<GATKChunk>();
for (final GATKChunk chunk : chunks) {
if (chunk.getChunkEnd() <= minimumOffset) {
continue; // linear index optimization
}
if (result.isEmpty()) {
result.add(chunk);
lastChunk = chunk;
continue;
}
// Coalesce chunks that are in adjacent file blocks.
// This is a performance optimization.
if (!lastChunk.overlaps(chunk) && !lastChunk.isAdjacentTo(chunk)) {
result.add(chunk);
lastChunk = chunk;
} else {
if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) {
lastChunk.setChunkEnd(chunk.getChunkEnd());
}
}
}
return result;
}
/**
* The maximum possible bin number for this reference sequence.
* This is based on the maximum coordinate position of the reference
* which is based on the size of the reference
*/
private int getMaxBinNumberForReference(final int reference) {
try {
final int sequenceLength = mBamDictionary.getSequence(reference).getSequenceLength();
return getMaxBinNumberForSequenceLength(sequenceLength);
} catch (Exception e) {
return MAX_BINS;
}
}
private void skipToSequence(final int sequenceIndex) {
for (int i = 0; i < sequenceIndex; i++) {
// System.out.println("# Sequence TID: " + i);
final int nBins = readInteger();
// System.out.println("# nBins: " + nBins);
for (int j = 0; j < nBins; j++) {
final int bin = readInteger();
final int nChunks = readInteger();
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
skipBytes(16 * nChunks);
}
final int nLinearBins = readInteger();
// System.out.println("# nLinearBins: " + nLinearBins);
skipBytes(8 * nLinearBins);
}
}
private void readBytes(final byte[] bytes) {
mFileBuffer.get(bytes);
}
private int readInteger() {
return mFileBuffer.getInt();
}
private long readLong() {
return mFileBuffer.getLong();
}
private void skipBytes(final int count) {
mFileBuffer.position(mFileBuffer.position() + count);
}
private void seek(final int position) {
mFileBuffer.position(position);
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010 The Broad Institute
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -12,31 +12,40 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.apache.log4j.Logger;
import java.util.*;
import net.sf.samtools.*;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.AbstractBAMFileIndex;
import net.sf.samtools.Bin;
import net.sf.samtools.BrowseableBAMIndex;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.PriorityQueue;
import java.util.Queue;
/**
* Shard intervals based on position within the BAM file.
*
@ -47,7 +56,7 @@ public class IntervalSharder {
private static Logger logger = Logger.getLogger(IntervalSharder.class);
public static Iterator<FilePointer> shardIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
return new FilePointerIterator(dataSource,loci);
return new IntervalSharder.FilePointerIterator(dataSource,loci);
}
/**
@ -117,7 +126,7 @@ public class IntervalSharder {
for(SAMReaderID id: dataSource.getReaderIDs())
filePointer.addFileSpans(id,null);
return Collections.singletonList(filePointer);
}
}
// Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.
List<FilePointer> filePointers = new ArrayList<FilePointer>();
@ -125,7 +134,7 @@ public class IntervalSharder {
BAMOverlap lastBAMOverlap = null;
Map<SAMReaderID,BrowseableBAMIndex> readerToIndexMap = new HashMap<SAMReaderID,BrowseableBAMIndex>();
BinMergingIterator binMerger = new BinMergingIterator();
IntervalSharder.BinMergingIterator binMerger = new IntervalSharder.BinMergingIterator();
for(SAMReaderID id: dataSource.getReaderIDs()) {
final SAMSequenceRecord referenceSequence = dataSource.getHeader(id).getSequence(contig);
// If this contig can't be found in the reference, skip over it.
@ -153,7 +162,7 @@ public class IntervalSharder {
final int locationStop = (int)location.getStop();
// Advance to first bin.
while(binIterator.peek().stop < locationStart)
while(binIterator.peek().stop < locationStart)
binIterator.next();
// Add all relevant bins to a list. If the given bin extends beyond the end of the current interval, make
@ -230,7 +239,7 @@ public class IntervalSharder {
for(FilePointer filePointer: filePointers)
filePointer.addFileSpans(id,index.getSpanOverlapping(filePointer.overlap.getBin(id)));
}
return filePointers;
}
@ -239,7 +248,7 @@ public class IntervalSharder {
private Queue<BAMOverlap> pendingOverlaps = new LinkedList<BAMOverlap>();
public void addReader(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, Iterator<Bin> bins) {
binQueue.add(new BinQueueState(id,index,referenceSequence,new LowestLevelBinFilteringIterator(index,bins)));
binQueue.add(new BinQueueState(id,index,referenceSequence,new IntervalSharder.LowestLevelBinFilteringIterator(index,bins)));
}
public boolean hasNext() {
@ -369,89 +378,7 @@ public class IntervalSharder {
}
}
/**
* Represents a small section of a BAM file, and every associated interval.
*/
class FilePointer {
protected final Map<SAMReaderID,SAMFileSpan> fileSpans = new HashMap<SAMReaderID,SAMFileSpan>();
protected final String referenceSequence;
protected final BAMOverlap overlap;
protected final List<GenomeLoc> locations;
/**
* Does this file pointer point into an unmapped region?
*/
protected final boolean isRegionUnmapped;
public FilePointer(final GenomeLoc location) {
this.referenceSequence = location.getContig();
this.overlap = null;
this.locations = Collections.singletonList(location);
this.isRegionUnmapped = GenomeLoc.isUnmapped(location);
}
public FilePointer(final String referenceSequence,final BAMOverlap overlap) {
this.referenceSequence = referenceSequence;
this.overlap = overlap;
this.locations = new ArrayList<GenomeLoc>();
this.isRegionUnmapped = false;
}
public void addLocation(GenomeLoc location) {
locations.add(location);
}
public void addFileSpans(SAMReaderID id, SAMFileSpan fileSpan) {
this.fileSpans.put(id,fileSpan);
}
}
/**
* Models a bin at which all BAM files in the merged input stream overlap.
*/
class BAMOverlap {
public final int start;
public final int stop;
private final Map<SAMReaderID,Bin> bins = new HashMap<SAMReaderID,Bin>();
public BAMOverlap(final int start, final int stop) {
this.start = start;
this.stop = stop;
}
public void addBin(final SAMReaderID id, final Bin bin) {
bins.put(id,bin);
}
public Bin getBin(final SAMReaderID id) {
return bins.get(id);
}
}
class ReaderBin {
public final SAMReaderID id;
public final BrowseableBAMIndex index;
public final int referenceSequence;
public final Bin bin;
public ReaderBin(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Bin bin) {
this.id = id;
this.index = index;
this.referenceSequence = referenceSequence;
this.bin = bin;
}
public int getStart() {
return index.getFirstLocusInBin(bin);
}
public int getStop() {
return index.getLastLocusInBin(bin);
}
}
class BinQueueState implements Comparable<BinQueueState> {
class BinQueueState implements Comparable<org.broadinstitute.sting.gatk.datasources.reads.BinQueueState> {
private final SAMReaderID id;
private final BrowseableBAMIndex index;
private final int referenceSequence;
@ -494,7 +421,7 @@ class BinQueueState implements Comparable<BinQueueState> {
return nextBin;
}
public int compareTo(BinQueueState other) {
public int compareTo(org.broadinstitute.sting.gatk.datasources.reads.BinQueueState other) {
if(!this.bins.hasNext() && !other.bins.hasNext()) return 0;
if(!this.bins.hasNext()) return -1;
if(!this.bins.hasNext()) return 1;
@ -522,8 +449,4 @@ class BinQueueState implements Comparable<BinQueueState> {
lastLocusInCurrentBin = index.getLastLocusInBin(bin);
}
}
}
}

View File

@ -1,20 +1,13 @@
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties;
import java.util.List;
import java.util.Map;
import net.sf.samtools.SAMFileSpan;
import net.sf.samtools.SAMRecord;
import net.sf.picard.filter.SamRecordFilter;
/**
* Handles locus shards of BAM information.
@ -22,7 +15,7 @@ import net.sf.picard.filter.SamRecordFilter;
* @version 1.0
* @date Apr 7, 2009
*/
public class LocusShard extends BAMFormatAwareShard {
public class LocusShard extends Shard {
/**
* Create a new locus shard, divided by index.
* @param intervals List of intervals to process.

View File

@ -22,17 +22,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.ReadProperties;
import java.util.*;
import java.io.File;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;

View File

@ -1,11 +1,8 @@
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import java.util.List;
@ -14,7 +11,7 @@ import java.util.List;
* @author mhanna
* @version 0.1
*/
public class MonolithicShard extends BAMFormatAwareShard {
public class MonolithicShard extends Shard {
/**
* Creates a new monolithic shard of the given type.
* @param shardType Type of the shard. Must be either read or locus; cannot be intervalic.

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Iterator;

View File

@ -1,18 +1,13 @@
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import java.util.*;
import net.sf.samtools.SAMFileSpan;
import net.sf.samtools.SAMRecord;
import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.utils.GenomeLocParser;
/**
@ -37,7 +32,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
* @author mhanna
* @version 0.1
*/
public class ReadShard extends BAMFormatAwareShard {
public class ReadShard extends Shard {
/**
* The reads making up this shard.
*/

View File

@ -22,15 +22,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.SAMFileSpan;
import net.sf.picard.filter.SamRecordFilter;
import java.util.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@ -159,7 +156,7 @@ public class ReadShardStrategy implements ShardStrategy {
}
if(selectedReaders.size() > 0) {
BAMFormatAwareShard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
Shard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
dataSource.fillShard(shard);
if(!shard.isBufferEmpty()) {
@ -175,7 +172,7 @@ public class ReadShardStrategy implements ShardStrategy {
else {
// todo -- this nulling of intervals is a bit annoying since readwalkers without
// todo -- any -L values need to be special cased throughout the code.
BAMFormatAwareShard shard = new ReadShard(parser,dataSource,position,null,false);
Shard shard = new ReadShard(parser,dataSource,position,null,false);
dataSource.fillShard(shard);
nextShard = !shard.isBufferEmpty() ? shard : null;
}

View File

@ -0,0 +1,33 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.Bin;
import net.sf.samtools.BrowseableBAMIndex;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 2, 2011
* Time: 4:36:40 PM
* To change this template use File | Settings | File Templates.
*/
class ReaderBin {
public final SAMReaderID id;
public final BrowseableBAMIndex index;
public final int referenceSequence;
public final Bin bin;
public ReaderBin(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Bin bin) {
this.id = id;
this.index = index;
this.referenceSequence = referenceSequence;
this.bin = bin;
}
public int getStart() {
return index.getFirstLocusInBin(bin);
}
public int getStop() {
return index.getLastLocusInBin(bin);
}
}

View File

@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.*;
import net.sf.samtools.util.CloseableIterator;
@ -33,10 +33,6 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard;
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.ReadMetrics;
@ -59,7 +55,7 @@ import java.util.*;
* <p/>
* Converts shards to SAM iterators over the specified region
*/
public class SAMDataSource implements SimpleDataSource {
public class SAMDataSource {
/** Backing support for reads. */
protected final ReadProperties readProperties;
@ -423,7 +419,7 @@ public class SAMDataSource implements SimpleDataSource {
* @param shard Shard to fill.
* @return true if at the end of the stream. False otherwise.
*/
public void fillShard(BAMFormatAwareShard shard) {
public void fillShard(Shard shard) {
if(!shard.buffersReads())
throw new ReviewedStingException("Attempting to fill a non-buffering shard.");
@ -456,16 +452,12 @@ public class SAMDataSource implements SimpleDataSource {
if(shard instanceof MonolithicShard)
return seekMonolithic(shard);
if(!(shard instanceof BAMFormatAwareShard))
throw new ReviewedStingException("BlockDrivenSAMDataSource cannot operate on shards of type: " + shard.getClass());
BAMFormatAwareShard bamAwareShard = (BAMFormatAwareShard)shard;
if(bamAwareShard.buffersReads()) {
return bamAwareShard.iterator();
if(shard.buffersReads()) {
return shard.iterator();
}
else {
SAMReaders readers = resourcePool.getAvailableReaders();
return getIterator(readers,bamAwareShard,shard instanceof ReadShard);
return getIterator(readers,shard,shard instanceof ReadShard);
}
}
@ -500,7 +492,7 @@ public class SAMDataSource implements SimpleDataSource {
* TODO: Collapse this flag when the two sharding systems are merged.
* @return An iterator over the selected data.
*/
private StingSAMIterator getIterator(SAMReaders readers, BAMFormatAwareShard shard, boolean enableVerification) {
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
// Set up merging to dynamically merge together multiple BAMs.
@ -566,7 +558,7 @@ public class SAMDataSource implements SimpleDataSource {
* @param id The id of the given reader.
* @param read The read to add to the shard.
*/
private void addReadToBufferingShard(BAMFormatAwareShard shard,SAMReaderID id,SAMRecord read) {
private void addReadToBufferingShard(Shard shard,SAMReaderID id,SAMRecord read) {
SAMFileSpan endChunk = read.getFileSource().getFilePointer().getContentsFollowing();
shard.addRead(read);
readerPositions.put(id,endChunk);

View File

@ -1,10 +1,8 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broadinstitute.sting.commandline.Tags;
import java.io.File;
import java.util.List;
import java.util.Collections;
/**
* Uniquely identifies a SAM file reader.

View File

@ -0,0 +1,215 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.SAMFileSpan;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.Serializable;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
*
* User: aaron
* Date: Apr 10, 2009
* Time: 5:00:27 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 10, 2009
* <p/>
* Interface Shard
* <p/>
* The base abstract class for shards.
*/
public abstract class Shard implements HasGenomeLocation {
public enum ShardType {
READ, LOCUS
}
protected final GenomeLocParser parser; // incredibly annoying!
/**
* What type of shard is this? Read or locus?
*/
protected final ShardType shardType;
/**
* Locations.
*/
protected final List<GenomeLoc> locs;
/**
* Whether the current location is unmapped.
*/
private final boolean isUnmapped;
/**
* Reads data, if applicable.
*/
private final SAMDataSource readsDataSource;
/**
* The data backing the next chunks to deliver to the traversal engine.
*/
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
/**
* Statistics about which reads in this shards were used and which were filtered away.
*/
protected final ReadMetrics readMetrics = new ReadMetrics();
/**
* Whether this shard points to an unmapped region.
* Some shard types conceptually be unmapped (e.g. LocusShards). In
* this case, isUnmapped should always return false.
* @return True if this shard is unmapped. False otherwise.
*/
public boolean isUnmapped() {
return isUnmapped;
}
public Shard(GenomeLocParser parser,
ShardType shardType,
List<GenomeLoc> locs,
SAMDataSource readsDataSource,
Map<SAMReaderID,SAMFileSpan> fileSpans,
boolean isUnmapped) {
this.locs = locs;
this.parser = parser;
this.shardType = shardType;
this.readsDataSource = readsDataSource;
this.fileSpans = fileSpans;
this.isUnmapped = isUnmapped;
}
/**
* If isUnmapped is true, than getGenomeLocs by
* definition will return a singleton list with a GenomeLoc.UNMAPPED
*
* Can return null, indicating that the entire genome is covered.
*
* @return the genome location represented by this shard
*/
public List<GenomeLoc> getGenomeLocs() {
return locs;
}
/**
* Get the list of chunks delimiting this shard.
* @return a list of chunks that contain data for this shard.
*/
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
return Collections.unmodifiableMap(fileSpans);
}
/**
* Returns the span of the genomeLocs comprising this shard
* @param
* @return
*/
public GenomeLoc getLocation() {
if ( getGenomeLocs() == null )
return GenomeLoc.WHOLE_GENOME;
int start = Integer.MAX_VALUE;
int stop = Integer.MIN_VALUE;
String contig = null;
for ( GenomeLoc loc : getGenomeLocs() ) {
if ( GenomeLoc.isUnmapped(loc) )
// special case the unmapped region marker, just abort out
return loc;
contig = loc.getContig();
if ( loc.getStart() < start ) start = loc.getStart();
if ( loc.getStop() > stop ) stop = loc.getStop();
}
return parser.createGenomeLoc(contig, start, stop);
}
/**
* what kind of shard do we return
* @return ShardType, indicating the type
*/
public ShardType getShardType() {
return shardType;
}
/**
* Does any releasing / aggregation required when the shard is through being processed.
*/
public void close() {
readsDataSource.incorporateReadMetrics(readMetrics);
}
/**
* Gets key read validation and filtering properties.
* @return set of read properties associated with this shard.
*/
public ReadProperties getReadProperties() {
return readsDataSource.getReadsInfo();
}
/**
* Gets the runtime metrics associated with this shard.
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
* @return Storage space for metrics.
*/
public ReadMetrics getReadMetrics() {
return readMetrics;
}
/**
* Returns true if this shard is meant to buffer reads, rather
* than just holding pointers to their locations.
* @return True if this shard can buffer reads. False otherwise.
*/
public boolean buffersReads() { return false; }
/**
* Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads).
*/
public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads).
*/
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Adds a read to the read buffer.
* @param read Add a read to the internal shard buffer.
*/
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Gets the iterator over the elements cached in the shard.
* @return
*/
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
}

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import java.util.Iterator;
/**

View File

@ -1,11 +1,10 @@
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
/**
*

View File

@ -0,0 +1,4 @@
/**
* Efficiently queries BAM read storage files by genomic location.
*/
package org.broadinstitute.sting.gatk.datasources.reads;

View File

@ -23,7 +23,7 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.utilities;
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;

View File

@ -22,7 +22,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.utilities;
package org.broadinstitute.sting.gatk.datasources.reads.utilities;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;

View File

@ -0,0 +1,4 @@
/**
* Standalone utilities for working with BAM files.
*/
package org.broadinstitute.sting.gatk.datasources.reads.utilities;

View File

@ -23,14 +23,13 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
package org.broadinstitute.sting.gatk.datasources.reference;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import net.sf.picard.reference.FastaSequenceIndexBuilder;
import net.sf.picard.sam.CreateSequenceDictionary;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.FastaSequenceIndex;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.file.FSLockWithShared;
import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;

View File

@ -23,7 +23,7 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
package org.broadinstitute.sting.gatk.datasources.reference;
public interface ReferenceDataSourceProgressListener {
public void percentProgress(int percent);

View File

@ -0,0 +1,4 @@
/**
* Efficiently queries FASTA reference sequence files by genomic location.
*/
package org.broadinstitute.sting.gatk.datasources.reference;

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.rmd;
/**
* Marker interface that represents an arbitrary consecutive segment within a data stream.
*/
interface DataStreamSegment {
}

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.rmd;
/**
* Models the entire stream of data.
*/
class EntireStream implements DataStreamSegment {
}

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.rmd;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.HasGenomeLocation;
/**
* Models a mapped position within a stream of GATK input data.
*/
class MappedStreamSegment implements DataStreamSegment, HasGenomeLocation {
public final GenomeLoc locus;
/**
* Retrieves the first location covered by a mapped stream segment.
* @return Location of the first base in this segment.
*/
public GenomeLoc getLocation() {
return locus;
}
public MappedStreamSegment(GenomeLoc locus) {
this.locus = locus;
}
}

View File

@ -0,0 +1,152 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.rmd;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.List;
/**
* A pool of reference-ordered data iterators.
*/
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
// the reference-ordered data itself.
private final RMDTriplet fileDescriptor;
// our tribble track builder
private final RMDTrackBuilder builder;
/**
* The header from this RMD, if present.
*/
private final Object header;
/**
* The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's.
*/
private final SAMSequenceDictionary sequenceDictionary;
boolean flashbackData = false;
public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser,boolean flashbackData) {
super(sequenceDictionary,genomeLocParser);
this.fileDescriptor = fileDescriptor;
this.builder = builder;
this.flashbackData = flashbackData;
// prepopulate one RMDTrack
LocationAwareSeekableRODIterator iterator = createNewResource();
this.addNewResource(iterator);
// Pull the proper header and sequence dictionary from the prepopulated track.
this.header = iterator.getHeader();
this.sequenceDictionary = iterator.getSequenceDictionary();
}
/**
* Gets the header used by this resource pool.
* @return Header used by this resource pool.
*/
public Object getHeader() {
return header;
}
/**
* Gets the sequence dictionary built into the ROD index file.
* @return Sequence dictionary from the index file.
*/
public SAMSequenceDictionary getSequenceDictionary() {
return sequenceDictionary;
}
/**
* Create a new iterator from the existing reference-ordered data. This new iterator is expected
* to be completely independent of any other iterator.
* @return The newly created resource.
*/
public LocationAwareSeekableRODIterator createNewResource() {
if(numIterators() > 0)
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(track.getHeader(),track.getSequenceDictionary(),referenceSequenceDictionary,genomeLocParser,track.getIterator());
return (flashbackData) ? new FlashBackIterator(iter) : iter;
}
/**
* Finds the best existing ROD iterator from the pool. In this case, the best existing ROD is defined as
* the first one encountered that is at or before the given position.
* @param segment @{inheritedDoc}
* @param resources @{inheritedDoc}
* @return @{inheritedDoc}
*/
public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
if(segment instanceof MappedStreamSegment) {
GenomeLoc position = ((MappedStreamSegment)segment).getLocation();
for( LocationAwareSeekableRODIterator RODIterator : resources ) {
if( (RODIterator.position() == null && RODIterator.hasNext()) ||
(RODIterator.position() != null && RODIterator.position().isBefore(position)) )
return RODIterator;
if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) {
((FlashBackIterator)RODIterator).flashBackTo(position);
return RODIterator;
}
}
return null;
}
else if(segment instanceof EntireStream) {
// Asking for a segment over the entire stream, so by definition, there is no best existing resource.
// Force the system to create a new one.
return null;
}
else {
throw new ReviewedStingException("Unable to find a ROD iterator for segments of type " + segment.getClass());
}
}
/**
* In this case, the iterator is the resource. Pass it through.
*/
public LocationAwareSeekableRODIterator createIteratorFromResource( DataStreamSegment segment, LocationAwareSeekableRODIterator resource ) {
return resource;
}
/**
* kill the buffers in the iterator
*/
public void closeResource( LocationAwareSeekableRODIterator resource ) {
if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close();
}
}

View File

@ -1,42 +1,49 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.rmd;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.reflect.Type;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
/**
* User: hanna
* Date: May 21, 2009
* Time: 10:04:12 AM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* A data source which provides a single type of reference-ordered data.
*/
public class ReferenceOrderedDataSource implements SimpleDataSource {
public class ReferenceOrderedDataSource {
/**
* The reference-ordered data itself.
*/
@ -102,7 +109,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
}
public Class getType() {
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
}
public Class getRecordType() {
@ -114,7 +121,7 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
}
public Object getHeader() {
return header;
return header;
}
/**
@ -170,120 +177,6 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
}
/**
* A pool of reference-ordered data iterators.
*/
class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIterator, LocationAwareSeekableRODIterator> {
// the reference-ordered data itself.
private final RMDTriplet fileDescriptor;
// our tribble track builder
private final RMDTrackBuilder builder;
/**
* The header from this RMD, if present.
*/
private final Object header;
/**
* The sequence dictionary from this ROD. If no sequence dictionary is present, this dictionary will be the same as the reference's.
*/
private final SAMSequenceDictionary sequenceDictionary;
boolean flashbackData = false;
public ReferenceOrderedDataPool(RMDTriplet fileDescriptor,RMDTrackBuilder builder,SAMSequenceDictionary sequenceDictionary,GenomeLocParser genomeLocParser,boolean flashbackData) {
super(sequenceDictionary,genomeLocParser);
this.fileDescriptor = fileDescriptor;
this.builder = builder;
this.flashbackData = flashbackData;
// prepopulate one RMDTrack
LocationAwareSeekableRODIterator iterator = createNewResource();
this.addNewResource(iterator);
// Pull the proper header and sequence dictionary from the prepopulated track.
this.header = iterator.getHeader();
this.sequenceDictionary = iterator.getSequenceDictionary();
}
/**
* Gets the header used by this resource pool.
* @return Header used by this resource pool.
*/
public Object getHeader() {
return header;
}
/**
* Gets the sequence dictionary built into the ROD index file.
* @return Sequence dictionary from the index file.
*/
public SAMSequenceDictionary getSequenceDictionary() {
return sequenceDictionary;
}
/**
* Create a new iterator from the existing reference-ordered data. This new iterator is expected
* to be completely independent of any other iterator.
* @return The newly created resource.
*/
public LocationAwareSeekableRODIterator createNewResource() {
if(numIterators() > 0)
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(track.getHeader(),track.getSequenceDictionary(),referenceSequenceDictionary,genomeLocParser,track.getIterator());
return (flashbackData) ? new FlashBackIterator(iter) : iter;
}
/**
* Finds the best existing ROD iterator from the pool. In this case, the best existing ROD is defined as
* the first one encountered that is at or before the given position.
* @param segment @{inheritedDoc}
* @param resources @{inheritedDoc}
* @return @{inheritedDoc}
*/
public LocationAwareSeekableRODIterator selectBestExistingResource( DataStreamSegment segment, List<LocationAwareSeekableRODIterator> resources ) {
if(segment instanceof MappedStreamSegment) {
GenomeLoc position = ((MappedStreamSegment)segment).getLocation();
for( LocationAwareSeekableRODIterator RODIterator : resources ) {
if( (RODIterator.position() == null && RODIterator.hasNext()) ||
(RODIterator.position() != null && RODIterator.position().isBefore(position)) )
return RODIterator;
if (RODIterator.position() != null && RODIterator instanceof FlashBackIterator && ((FlashBackIterator)RODIterator).canFlashBackTo(position)) {
((FlashBackIterator)RODIterator).flashBackTo(position);
return RODIterator;
}
}
return null;
}
else if(segment instanceof EntireStream) {
// Asking for a segment over the entire stream, so by definition, there is no best existing resource.
// Force the system to create a new one.
return null;
}
else {
throw new ReviewedStingException("Unable to find a ROD iterator for segments of type " + segment.getClass());
}
}
/**
* In this case, the iterator is the resource. Pass it through.
*/
public LocationAwareSeekableRODIterator createIteratorFromResource( DataStreamSegment segment, LocationAwareSeekableRODIterator resource ) {
return resource;
}
/**
* kill the buffers in the iterator
*/
public void closeResource( LocationAwareSeekableRODIterator resource ) {
if (resource instanceof FlashBackIterator) ((FlashBackIterator)resource).close();
}
}
/**
* a data pool for the new query based RODs
*/
@ -356,6 +249,4 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<RMDTrack,LocationAwareS
protected void closeResource(RMDTrack track) {
track.close();
}
}
}

View File

@ -1,28 +1,38 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.rmd;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* User: hanna
* Date: May 21, 2009
* Time: 10:55:26 AM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
* Software and documentation are copyright 2005 by the Broad Institute.
* All rights are reserved.
*
* Users acknowledge that this software is supplied without any warranty or support.
* The Broad Institute is not responsible for its use, misuse, or
* functionality.
*/
/**
* A pool of open resources, all of which can create a closeable iterator.
@ -37,7 +47,7 @@ abstract class ResourcePool <T,I extends Iterator> {
* Builder/parser for GenomeLocs.
*/
protected final GenomeLocParser genomeLocParser;
/**
* All iterators of this reference-ordered data.
*/
@ -89,7 +99,7 @@ abstract class ResourcePool <T,I extends Iterator> {
// Make a note of this assignment for proper releasing later.
resourceAssignments.put(iterator, selectedResource);
}
return iterator;
}
@ -133,7 +143,7 @@ abstract class ResourcePool <T,I extends Iterator> {
protected abstract T createNewResource();
/**
* Find the most appropriate resource to acquire the specified data.
* Find the most appropriate resource to acquire the specified data.
* @param segment The data over which the resource is required.
* @param availableResources A list of candidate resources to evaluate.
* @return The best choice of the availableResources, or null if no resource meets the criteria.
@ -175,33 +185,3 @@ abstract class ResourcePool <T,I extends Iterator> {
}
/**
* Marker interface that represents an arbitrary consecutive segment within a data stream.
*/
interface DataStreamSegment {
}
/**
* Models the entire stream of data.
*/
class EntireStream implements DataStreamSegment {
}
/**
* Models a mapped position within a stream of GATK input data.
*/
class MappedStreamSegment implements DataStreamSegment, HasGenomeLocation {
public final GenomeLoc locus;
/**
* Retrieves the first location covered by a mapped stream segment.
* @return Location of the first base in this segment.
*/
public GenomeLoc getLocation() {
return locus;
}
public MappedStreamSegment(GenomeLoc locus) {
this.locus = locus;
}
}

View File

@ -0,0 +1,4 @@
/**
* Efficiently queries arbitrary files sorted according to reference coordinate order.
*/
package org.broadinstitute.sting.gatk.datasources.rmd;

View File

@ -1,117 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import net.sf.samtools.*;
import net.sf.picard.filter.SamRecordFilter;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
/**
* A common interface for shards that natively understand the BAM format.
*
* @author mhanna
* @version 0.1
*/
public abstract class BAMFormatAwareShard extends Shard {
/**
* Whether the current location is unmapped.
*/
private final boolean isUnmapped;
/**
* Reads data, if applicable.
*/
private final SAMDataSource readsDataSource;
/**
* The data backing the next chunks to deliver to the traversal engine.
*/
private final Map<SAMReaderID,SAMFileSpan> fileSpans;
public BAMFormatAwareShard(GenomeLocParser parser,
ShardType shardType,
List<GenomeLoc> locs,
SAMDataSource readsDataSource,
Map<SAMReaderID,SAMFileSpan> fileSpans,
boolean isUnmapped) {
super(parser, shardType, locs);
this.readsDataSource = readsDataSource;
this.fileSpans = fileSpans;
this.isUnmapped = isUnmapped;
}
/**
* Closes the shard, tallying and incorporating read data.
*/
@Override
public void close() {
readsDataSource.incorporateReadMetrics(readMetrics);
}
/**
* Get the list of chunks delimiting this shard.
* @return a list of chunks that contain data for this shard.
*/
public Map<SAMReaderID,SAMFileSpan> getFileSpans() {
return Collections.unmodifiableMap(fileSpans);
}
/**
* Gets key read validation and filtering properties.
* @return set of read properties associated with this shard.
*/
@Override
public ReadProperties getReadProperties() {
return readsDataSource.getReadsInfo();
}
/**
* Returns true if this shard is meant to buffer reads, rather
* than just holding pointers to their locations.
* @return True if this shard can buffer reads. False otherwise.
*/
public boolean buffersReads() { return false; }
/**
* Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads).
*/
public boolean isBufferEmpty() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Returns true if the read buffer is currently full.
* @return True if this shard's buffer is full (and the shard can buffer reads).
*/
public boolean isBufferFull() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Adds a read to the read buffer.
* @param read Add a read to the internal shard buffer.
*/
public void addRead(SAMRecord read) { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Gets the iterator over the elements cached in the shard.
* @return
*/
public StingSAMIterator iterator() { throw new UnsupportedOperationException("This shard does not buffer reads."); }
/**
* Whether this shard points to an unmapped region.
* Some shard types conceptually be unmapped (e.g. LocusShards). In
* this case, isUnmapped should always return false.
* @return True if this shard is unmapped. False otherwise.
*/
public boolean isUnmapped() {
return isUnmapped;
}
}

View File

@ -1,132 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.Serializable;
import java.util.List;
/**
*
* User: aaron
* Date: Apr 10, 2009
* Time: 5:00:27 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 10, 2009
* <p/>
* Interface Shard
* <p/>
* The base abstract class for shards.
*/
public abstract class Shard implements HasGenomeLocation {
public enum ShardType {
READ, LOCUS
}
protected final GenomeLocParser parser; // incredibly annoying!
/**
* What type of MonolithicShard is this? Read or locus?
*/
protected final ShardType shardType;
/**
* Locations. For the monolithic shard, should be a list of all available contigs in the reference.
*/
protected final List<GenomeLoc> locs;
/**
* Statistics about which reads in this shards were used and which were filtered away.
*/
protected final ReadMetrics readMetrics = new ReadMetrics();
public Shard(GenomeLocParser parser, ShardType shardType, List<GenomeLoc> locs) {
this.locs = locs;
this.parser = parser;
this.shardType = shardType;
}
/**
* If isUnmapped is true, than getGenomeLocs by
* definition will return a singleton list with a GenomeLoc.UNMAPPED
*
* Can return null, indicating that the entire genome is covered.
*
* @return the genome location represented by this shard
*/
public List<GenomeLoc> getGenomeLocs() {
return locs;
}
/**
* Returns the span of the genomeLocs comprising this shard
* @param
* @return
*/
public GenomeLoc getLocation() {
if ( getGenomeLocs() == null )
return GenomeLoc.WHOLE_GENOME;
int start = Integer.MAX_VALUE;
int stop = Integer.MIN_VALUE;
String contig = null;
for ( GenomeLoc loc : getGenomeLocs() ) {
if ( GenomeLoc.isUnmapped(loc) )
// special case the unmapped region marker, just abort out
return loc;
contig = loc.getContig();
if ( loc.getStart() < start ) start = loc.getStart();
if ( loc.getStop() > stop ) stop = loc.getStop();
}
return parser.createGenomeLoc(contig, start, stop);
}
/**
* what kind of shard do we return
* @return ShardType, indicating the type
*/
public ShardType getShardType() {
return shardType;
}
/**
* Does any releasing / aggregation required when the shard is through being processed.
*/
public void close() {
; // by default don't do anything
}
/**
* Gets required configuration for validating and filtering reads.
* @return read configuration properties.
*/
public abstract ReadProperties getReadProperties();
/**
* Gets the runtime metrics associated with this shard.
* Retrieves a storage space of metrics about number of reads included, filtered, etc.
* @return Storage space for metrics.
*/
public ReadMetrics getReadMetrics() {
return readMetrics;
}
}

View File

@ -1,6 +0,0 @@
/**
* Divides core input data into manageable blocks, each representing
* a subsection of data that is small enough to be processed by a single
* worker.
*/
package org.broadinstitute.sting.gatk.datasources.shards;

View File

@ -1,37 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import java.io.Serializable;
import java.util.Iterator;
/**
* User: aaron
* Date: Mar 26, 2009
* Time: 2:39:05 PM
* <p/>
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/** This class is the interface for all data sources */
public interface SimpleDataSource extends Serializable {
/**
* Query the data source for a region of interest, specified by the genome location.
* The iterator will generate successive calls
*
* @param shard the region
* @return an iterator of the appropriate type, that is limited by the region
*/
public Iterator seek(Shard shard);
}

View File

@ -1,6 +0,0 @@
/**
* Retrieves core GATK data, given a shard which represents a contiguous
* subsequence of core GATK data (reads, reference, or reference-ordered
* data).
*/
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;

View File

@ -1,20 +1,17 @@
package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.io.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import javax.management.JMException;
import java.util.Queue;
import java.util.LinkedList;
import java.util.Collection;
@ -22,7 +19,6 @@ import java.util.concurrent.Executors;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.lang.management.ManagementFactory;
import net.sf.picard.reference.IndexedFastaSequenceFile;

View File

@ -3,10 +3,10 @@ package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;

View File

@ -26,10 +26,10 @@
package org.broadinstitute.sting.gatk.executive;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.io.OutputTracker;
@ -40,13 +40,11 @@ import org.broadinstitute.sting.gatk.ReadMetrics;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.lang.management.ManagementFactory;
import java.util.*;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.threading.*;

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.executive;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
import org.broadinstitute.sting.gatk.walkers.Walker;

View File

@ -1,12 +1,10 @@
package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.*;

View File

@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.traversals;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;

View File

@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadPairWalker;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.apache.log4j.Logger;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecordCoordinateComparator;

View File

@ -36,7 +36,7 @@ import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;

View File

@ -43,7 +43,7 @@ import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;

View File

@ -40,7 +40,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec;
import org.broadinstitute.sting.gatk.walkers.*;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec;
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature;

View File

@ -33,10 +33,9 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;

View File

@ -35,7 +35,6 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.RMD;

View File

@ -31,7 +31,7 @@ import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.Argument;

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.commandline.*;

View File

@ -31,6 +31,8 @@ import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
@ -38,13 +40,10 @@ import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.interval.IntervalFileMergingIterator;

View File

@ -33,6 +33,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.exceptions.StingException;
@ -45,7 +46,6 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;

View File

@ -33,10 +33,10 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.SimpleTimer;
import java.io.PrintStream;

View File

@ -32,7 +32,7 @@ import org.broad.tribble.vcf.VCFCodec;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;

View File

@ -33,7 +33,7 @@ import java.util.regex.Pattern;
import net.sf.samtools.*;
import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;

View File

@ -8,7 +8,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.report.GATKReport;

View File

@ -30,7 +30,7 @@ import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.*;

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.RodWalker;

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;

View File

@ -31,10 +31,8 @@ import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.oneoffprojects.utils.ReadPair;
import org.broadinstitute.sting.oneoffprojects.utils.AlignmentInfo;
import org.broadinstitute.sting.oneoffprojects.utils.Assembly;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import net.sf.samtools.SAMRecord;

View File

@ -7,9 +7,8 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;

View File

@ -29,7 +29,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;

View File

@ -3,10 +3,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.validation;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.*;

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.interval;
import net.sf.picard.util.IntervalList;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;

View File

@ -25,14 +25,10 @@
package org.broadinstitute.sting.utils.vcf;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.apache.log4j.Logger;

View File

@ -27,7 +27,7 @@ package net.sf.picard.reference;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSourceProgressListener;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

View File

@ -1,12 +1,11 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;

View File

@ -1,18 +1,16 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLoc;

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;

View File

@ -22,11 +22,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.shards;
package org.broadinstitute.sting.gatk.datasources.reads;
import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.List;

View File

@ -1,13 +1,15 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
package org.broadinstitute.sting.gatk.datasources.reads;
import static org.testng.Assert.fail;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -22,7 +24,6 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
import java.util.Collections;
/**
*

View File

@ -1,9 +1,8 @@
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
package org.broadinstitute.sting.gatk.datasources.rmd;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
@ -19,7 +18,6 @@ import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Collections;
import net.sf.picard.reference.IndexedFastaSequenceFile;
/**

View File

@ -12,7 +12,7 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;

View File

@ -8,11 +8,11 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -31,7 +31,6 @@ import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Collections;
/**
*
@ -119,7 +118,7 @@ public class TraverseReadsUnitTest extends BaseTest {
@Test
public void testUnmappedReadCount() {
SAMDataSource dataSource = new SAMDataSource(bamList,genomeLocParser);
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref, ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
ref.getSequenceDictionary(),
readSize,
genomeLocParser);