changes to fix and test the interval based traversals
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1095 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3b24264c2b
commit
5b1c23a7f2
|
|
@ -45,7 +45,7 @@ import java.util.List;
|
||||||
* User: aaron
|
* User: aaron
|
||||||
* Date: Mar 26, 2009
|
* Date: Mar 26, 2009
|
||||||
* Time: 2:36:16 PM
|
* Time: 2:36:16 PM
|
||||||
*
|
* <p/>
|
||||||
* Converts shards to SAM iterators over the specified region
|
* Converts shards to SAM iterators over the specified region
|
||||||
*/
|
*/
|
||||||
public class SAMDataSource implements SimpleDataSource {
|
public class SAMDataSource implements SimpleDataSource {
|
||||||
|
|
@ -77,11 +77,8 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* constructor, given sam files
|
* constructor, given sam files
|
||||||
*
|
*
|
||||||
* @param reads the list of sam files
|
* @param reads the list of sam files
|
||||||
* @param byReads are we a by reads traversal, or a loci traversal. We could delete this field
|
|
||||||
* if we passed in iterGen, which would be a better (although more complicated for the
|
|
||||||
* consumers of SAMDataSources).
|
|
||||||
*/
|
*/
|
||||||
public SAMDataSource( Reads reads, boolean byReads ) throws SimpleDataSourceLoadException {
|
public SAMDataSource( Reads reads ) throws SimpleDataSourceLoadException {
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
|
|
||||||
// check the length
|
// check the length
|
||||||
|
|
@ -93,7 +90,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
iteratorPool = new SAMIteratorPool(reads, byReads);
|
iteratorPool = new SAMIteratorPool(reads);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -116,19 +113,20 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* @return an iterator for that region
|
* @return an iterator for that region
|
||||||
*/
|
*/
|
||||||
public StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
|
public StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
|
||||||
return iteratorPool.iterator( new MappedStreamSegment(location) );
|
return iteratorPool.iterator(new MappedStreamSegment(location));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
|
||||||
* seek
|
|
||||||
* </p>
|
|
||||||
*
|
*
|
||||||
* @param shard the shard to get data for
|
* @param shard the shard to get data for
|
||||||
*
|
*
|
||||||
* @return an iterator for that region
|
* @return an iterator for that region
|
||||||
*/
|
*/
|
||||||
public StingSAMIterator seek( Shard shard ) throws SimpleDataSourceLoadException {
|
public StingSAMIterator seek( Shard shard ) throws SimpleDataSourceLoadException {
|
||||||
|
// setup the iterator pool if it's not setup
|
||||||
|
boolean queryOverlapping = ( shard.getShardType() == Shard.ShardType.READ ) ? false : true;
|
||||||
|
iteratorPool.setQueryOverlapping(queryOverlapping);
|
||||||
|
|
||||||
StingSAMIterator iterator = null;
|
StingSAMIterator iterator = null;
|
||||||
if (shard.getShardType() == Shard.ShardType.READ) {
|
if (shard.getShardType() == Shard.ShardType.READ) {
|
||||||
iterator = seekRead((ReadShard) shard);
|
iterator = seekRead((ReadShard) shard);
|
||||||
|
|
@ -186,15 +184,15 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, iteratorPool.iterator(new MappedStreamSegment(lastReadPos))));
|
iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, iteratorPool.iterator(new MappedStreamSegment(lastReadPos))));
|
||||||
}
|
}
|
||||||
|
|
||||||
if( intoUnmappedReads && !includeUnmappedReads )
|
if (intoUnmappedReads && !includeUnmappedReads)
|
||||||
shard.signalDone();
|
shard.signalDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (intoUnmappedReads && includeUnmappedReads) {
|
if (intoUnmappedReads && includeUnmappedReads) {
|
||||||
if( iter != null )
|
if (iter != null)
|
||||||
iter.close();
|
iter.close();
|
||||||
iter = toUnmappedReads( shard.getSize() );
|
iter = toUnmappedReads(shard.getSize());
|
||||||
if( !iter.hasNext() )
|
if (!iter.hasNext())
|
||||||
shard.signalDone();
|
shard.signalDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -215,11 +213,13 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve unmapped reads.
|
* Retrieve unmapped reads.
|
||||||
|
*
|
||||||
* @param readCount how many reads to retrieve
|
* @param readCount how many reads to retrieve
|
||||||
|
*
|
||||||
* @return the bounded iterator that you can use to get the intervaled reads from
|
* @return the bounded iterator that you can use to get the intervaled reads from
|
||||||
*/
|
*/
|
||||||
StingSAMIterator toUnmappedReads( long readCount ) {
|
StingSAMIterator toUnmappedReads( long readCount ) {
|
||||||
StingSAMIterator iter = iteratorPool.iterator( new UnmappedStreamSegment( readsTaken,readCount) );
|
StingSAMIterator iter = iteratorPool.iterator(new UnmappedStreamSegment(readsTaken, readCount));
|
||||||
readsTaken += readCount;
|
readsTaken += readCount;
|
||||||
return iter;
|
return iter;
|
||||||
}
|
}
|
||||||
|
|
@ -342,7 +342,7 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
||||||
protected Reads reads;
|
protected Reads reads;
|
||||||
|
|
||||||
/** Is this a by-reads traversal or a by-locus? */
|
/** Is this a by-reads traversal or a by-locus? */
|
||||||
protected boolean byReads;
|
protected boolean queryOverlapping;
|
||||||
|
|
||||||
/** File header for the combined file. */
|
/** File header for the combined file. */
|
||||||
protected SAMFileHeader header;
|
protected SAMFileHeader header;
|
||||||
|
|
@ -350,9 +350,9 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
protected static Logger logger = Logger.getLogger(SAMIteratorPool.class);
|
protected static Logger logger = Logger.getLogger(SAMIteratorPool.class);
|
||||||
|
|
||||||
public SAMIteratorPool( Reads reads, boolean byReads ) {
|
public SAMIteratorPool( Reads reads ) {
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
this.byReads = byReads;
|
this.queryOverlapping = true;
|
||||||
|
|
||||||
ReadStreamPointer streamPointer = createNewResource();
|
ReadStreamPointer streamPointer = createNewResource();
|
||||||
this.header = streamPointer.getHeader();
|
this.header = streamPointer.getHeader();
|
||||||
|
|
@ -366,8 +366,8 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
||||||
}
|
}
|
||||||
|
|
||||||
protected ReadStreamPointer selectBestExistingResource( DataStreamSegment segment, List<ReadStreamPointer> pointers ) {
|
protected ReadStreamPointer selectBestExistingResource( DataStreamSegment segment, List<ReadStreamPointer> pointers ) {
|
||||||
for( ReadStreamPointer pointer: pointers ) {
|
for (ReadStreamPointer pointer : pointers) {
|
||||||
if( pointer.canAccessSegmentEfficiently( segment ) ) {
|
if (pointer.canAccessSegmentEfficiently(segment)) {
|
||||||
return pointer;
|
return pointer;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -375,18 +375,18 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
||||||
}
|
}
|
||||||
|
|
||||||
protected ReadStreamPointer createNewResource() {
|
protected ReadStreamPointer createNewResource() {
|
||||||
return new ReadStreamPointer( reads );
|
return new ReadStreamPointer(reads);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected StingSAMIterator createIteratorFromResource( DataStreamSegment segment, ReadStreamPointer streamPointer ) {
|
protected StingSAMIterator createIteratorFromResource( DataStreamSegment segment, ReadStreamPointer streamPointer ) {
|
||||||
StingSAMIterator iterator = null;
|
StingSAMIterator iterator = null;
|
||||||
|
|
||||||
if( byReads )
|
if (!queryOverlapping)
|
||||||
iterator = streamPointer.getReadsContainedBy( segment );
|
iterator = streamPointer.getReadsContainedBy(segment);
|
||||||
else {
|
else {
|
||||||
if( !(segment instanceof MappedStreamSegment) )
|
if (!( segment instanceof MappedStreamSegment ))
|
||||||
throw new StingException("Segment is unmapped; true overlaps cannot be determined.");
|
throw new StingException("Segment is unmapped; true overlaps cannot be determined.");
|
||||||
iterator = streamPointer.getReadsOverlapping( (MappedStreamSegment)segment );
|
iterator = streamPointer.getReadsOverlapping((MappedStreamSegment) segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ReleasingIterator(iterator);
|
return new ReleasingIterator(iterator);
|
||||||
|
|
@ -399,18 +399,41 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
||||||
private class ReleasingIterator implements StingSAMIterator {
|
private class ReleasingIterator implements StingSAMIterator {
|
||||||
private final StingSAMIterator wrappedIterator;
|
private final StingSAMIterator wrappedIterator;
|
||||||
|
|
||||||
public Reads getSourceInfo() { return wrappedIterator.getSourceInfo(); }
|
public Reads getSourceInfo() {
|
||||||
|
return wrappedIterator.getSourceInfo();
|
||||||
|
}
|
||||||
|
|
||||||
public ReleasingIterator( StingSAMIterator wrapped ) { this.wrappedIterator = wrapped; }
|
public ReleasingIterator( StingSAMIterator wrapped ) {
|
||||||
|
this.wrappedIterator = wrapped;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ReleasingIterator iterator() {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
|
||||||
|
}
|
||||||
|
|
||||||
public ReleasingIterator iterator() { return this; }
|
|
||||||
public void remove() { throw new UnsupportedOperationException("Can't remove from a StingSAMIterator"); }
|
|
||||||
public void close() {
|
public void close() {
|
||||||
wrappedIterator.close();
|
wrappedIterator.close();
|
||||||
release(this);
|
release(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() { return wrappedIterator.hasNext(); }
|
public boolean hasNext() {
|
||||||
public SAMRecord next() { return wrappedIterator.next(); }
|
return wrappedIterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord next() {
|
||||||
|
return wrappedIterator.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isQueryOverlapping() {
|
||||||
|
return queryOverlapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setQueryOverlapping( boolean queryOverlapping ) {
|
||||||
|
this.queryOverlapping = queryOverlapping;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -105,18 +105,14 @@ public abstract class MicroScheduler {
|
||||||
protected MicroScheduler(Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
protected MicroScheduler(Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||||
if (walker instanceof ReadWalker) {
|
if (walker instanceof ReadWalker) {
|
||||||
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
|
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
|
||||||
this.reads = getReadsDataSource(reads, true);
|
|
||||||
} else if (walker instanceof LocusWalker) {
|
} else if (walker instanceof LocusWalker) {
|
||||||
traversalEngine = new TraverseLoci(reads.getReadsFiles(), refFile, rods);
|
traversalEngine = new TraverseLoci(reads.getReadsFiles(), refFile, rods);
|
||||||
this.reads = getReadsDataSource(reads, false);
|
|
||||||
} else if (walker instanceof DuplicateWalker) {
|
} else if (walker instanceof DuplicateWalker) {
|
||||||
traversalEngine = new TraverseDuplicates(reads.getReadsFiles(), refFile, rods);
|
traversalEngine = new TraverseDuplicates(reads.getReadsFiles(), refFile, rods);
|
||||||
this.reads = getReadsDataSource(reads, true);
|
|
||||||
} else {
|
} else {
|
||||||
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
||||||
}
|
}
|
||||||
|
this.reads = getReadsDataSource(reads);
|
||||||
|
|
||||||
this.reference = openReferenceSequenceFile(refFile);
|
this.reference = openReferenceSequenceFile(refFile);
|
||||||
this.rods = getReferenceOrderedDataSources(rods);
|
this.rods = getReferenceOrderedDataSources(rods);
|
||||||
}
|
}
|
||||||
|
|
@ -209,16 +205,15 @@ public abstract class MicroScheduler {
|
||||||
* Gets a data source for the given set of reads.
|
* Gets a data source for the given set of reads.
|
||||||
*
|
*
|
||||||
* @param reads the read source information
|
* @param reads the read source information
|
||||||
* @param byReads are we a by reads traversal, or not
|
|
||||||
*
|
*
|
||||||
* @return A data source for the given set of reads.
|
* @return A data source for the given set of reads.
|
||||||
*/
|
*/
|
||||||
private SAMDataSource getReadsDataSource(Reads reads, boolean byReads) {
|
private SAMDataSource getReadsDataSource(Reads reads) {
|
||||||
// By reference traversals are happy with no reads. Make sure that case is handled.
|
// By reference traversals are happy with no reads. Make sure that case is handled.
|
||||||
if (reads.getReadsFiles().size() == 0)
|
if (reads.getReadsFiles().size() == 0)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource(reads, byReads);
|
SAMDataSource dataSource = new SAMDataSource(reads);
|
||||||
|
|
||||||
// Side effect: initialize the traversal engine with reads data.
|
// Side effect: initialize the traversal engine with reads data.
|
||||||
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,96 @@
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMIterator;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* use this to inject into SAMDataSource for testing
|
||||||
|
*/
|
||||||
|
public class ArtificialResourcePool extends SAMIteratorPool {
|
||||||
|
// How strict should we be with SAM/BAM parsing?
|
||||||
|
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
||||||
|
|
||||||
|
// the header
|
||||||
|
private SAMFileHeader header;
|
||||||
|
private ArtificialSAMIterator iterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Track the iterator to see whether it's venturing into unmapped reads for the first
|
||||||
|
* time. If so, query straight there. Only works for query iterators.
|
||||||
|
*
|
||||||
|
* TODO: Clean up.
|
||||||
|
*/
|
||||||
|
private boolean intoUnmappedReads = false;
|
||||||
|
|
||||||
|
public ArtificialResourcePool( SAMFileHeader header, ArtificialSAMIterator iterator ) {
|
||||||
|
super( new Reads(Collections.<File>emptyList()) );
|
||||||
|
this.header = header;
|
||||||
|
this.iterator = iterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StingSAMIterator iterator( DataStreamSegment segment ) {
|
||||||
|
if (segment instanceof MappedStreamSegment && iterator instanceof ArtificialSAMQueryIterator) {
|
||||||
|
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
||||||
|
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
|
||||||
|
queryIterator.queryContained(mappedSegment.locus.getContig(), (int)mappedSegment.locus.getStart(), (int)mappedSegment.locus.getStop());
|
||||||
|
return queryIterator;
|
||||||
|
}
|
||||||
|
else if (segment instanceof UnmappedStreamSegment) {
|
||||||
|
if( !intoUnmappedReads ) {
|
||||||
|
if( iterator instanceof ArtificialSAMQueryIterator ) {
|
||||||
|
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
||||||
|
queryIterator.queryUnmappedReads();
|
||||||
|
}
|
||||||
|
intoUnmappedReads = true;
|
||||||
|
}
|
||||||
|
return new BoundedReadIterator(iterator,((UnmappedStreamSegment)segment).size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw new StingException("Unsupported segment type passed to test");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the merged header
|
||||||
|
*
|
||||||
|
* @return the merged header
|
||||||
|
*/
|
||||||
|
public SAMFileHeader getHeader() {
|
||||||
|
return this.header;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -88,7 +88,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
Reads reads = new Reads(fl);
|
Reads reads = new Reads(fl);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SAMDataSource data = new SAMDataSource(reads,false);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
for (Shard sh : strat) {
|
for (Shard sh : strat) {
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
count++;
|
count++;
|
||||||
|
|
@ -138,7 +138,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SAMDataSource data = new SAMDataSource(reads,false);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
for (Shard sh : strat) {
|
for (Shard sh : strat) {
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
count++;
|
count++;
|
||||||
|
|
@ -175,7 +175,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
||||||
|
|
||||||
logger.debug("Pile two:");
|
logger.debug("Pile two:");
|
||||||
try {
|
try {
|
||||||
SAMDataSource data = new SAMDataSource(reads,false);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
for (Shard sh : strat) {
|
for (Shard sh : strat) {
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
count++;
|
count++;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,150 @@
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* <p/>
|
||||||
|
* Class SAMByIntervalTest
|
||||||
|
* <p/>
|
||||||
|
* Test that the SAM data source behaves well given intervals
|
||||||
|
*/
|
||||||
|
public class SAMByIntervalTest extends BaseTest {
|
||||||
|
private List<File> fl;
|
||||||
|
ShardStrategy shardStrategy;
|
||||||
|
Reads reads;
|
||||||
|
private int targetReadCount = 14;
|
||||||
|
|
||||||
|
|
||||||
|
// constants we use throughout the tests
|
||||||
|
protected final int READ_COUNT;
|
||||||
|
protected final int ENDING_CHROMO;
|
||||||
|
protected final int STARTING_CHROMO;
|
||||||
|
protected final int UNMAPPED_READ_COUNT;
|
||||||
|
|
||||||
|
public SAMByIntervalTest() {
|
||||||
|
READ_COUNT = 100;
|
||||||
|
ENDING_CHROMO = 10;
|
||||||
|
STARTING_CHROMO = 1;
|
||||||
|
UNMAPPED_READ_COUNT = 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function does the setup of our parser, before each method call.
|
||||||
|
* <p/>
|
||||||
|
* Called before every test case method.
|
||||||
|
*/
|
||||||
|
@Before
|
||||||
|
public void doForEachTest() {
|
||||||
|
fl = new ArrayList<File>();
|
||||||
|
|
||||||
|
// sequence
|
||||||
|
//seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta"));
|
||||||
|
//GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||||
|
|
||||||
|
// setup the test files
|
||||||
|
fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"));
|
||||||
|
reads = new Reads(fl);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** run a test on data over a specific interval */
|
||||||
|
private void testRead( int start, int stop, int readCount ) {
|
||||||
|
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(STARTING_CHROMO, ENDING_CHROMO, READ_COUNT, UNMAPPED_READ_COUNT),
|
||||||
|
ArtificialSAMUtils.mappedAndUnmappedReadIterator(STARTING_CHROMO, ENDING_CHROMO, READ_COUNT, UNMAPPED_READ_COUNT));
|
||||||
|
|
||||||
|
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||||
|
int unmappedReadsSeen = 0;
|
||||||
|
int iterations = 0;
|
||||||
|
|
||||||
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
|
data.setResourcePool(gen);
|
||||||
|
GenomeLocSortedSet set = new GenomeLocSortedSet();
|
||||||
|
set.add(GenomeLocParser.createGenomeLoc(0, start, stop));
|
||||||
|
ShardStrategy strat = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, gen.getHeader().getSequenceDictionary(), UNMAPPED_READ_COUNT, set);
|
||||||
|
|
||||||
|
StingSAMIterator iter = data.seek(strat.next());
|
||||||
|
int count = 0;
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
SAMRecord r = iter.next();
|
||||||
|
// uncomment for debugging - System.err.println(r.getAlignmentStart() + " " + r.getAlignmentEnd());
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
assertEquals(readCount, count);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* test out that we get a single read, given the specific size
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testSingleRead() {
|
||||||
|
testRead(1,ArtificialSAMUtils.DEFAULT_READ_LENGTH,1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* test out that we get the expected amount for a whole chromosome
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testChromosome() {
|
||||||
|
testRead(1, READ_COUNT, READ_COUNT -ArtificialSAMUtils.DEFAULT_READ_LENGTH+1); // +1 because we go from 1 up to 101
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* test out that we get the expected amount for a whole chromosome
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testMiddle() {
|
||||||
|
testRead(20, READ_COUNT-20, READ_COUNT -ArtificialSAMUtils.DEFAULT_READ_LENGTH-40+2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private SAMFileHeader createArtificialSamHeader( int startingChr, int endingChr, int readCount, int readSize ) {
|
||||||
|
return ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1,
|
||||||
|
startingChr,
|
||||||
|
readCount + readSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,19 +1,14 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
import static junit.framework.Assert.fail;
|
import static junit.framework.Assert.fail;
|
||||||
import net.sf.samtools.SAMFileReader;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.*;
|
import org.broadinstitute.sting.gatk.iterators.*;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator;
|
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMIterator;
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
@ -22,7 +17,6 @@ import org.junit.Test;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
@ -78,14 +72,14 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testToUnmappedReads() {
|
public void testToUnmappedReads() {
|
||||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1,10,100,1000),
|
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1,10,100,1000),
|
||||||
ArtificialSAMUtils.unmappedReadIterator(1, 100, 10, 1000) );
|
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 100, 10, 1000) );
|
||||||
|
|
||||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||||
try {
|
try {
|
||||||
int unmappedReadsSeen = 0;
|
int unmappedReadsSeen = 0;
|
||||||
int iterations = 0;
|
int iterations = 0;
|
||||||
|
|
||||||
SAMDataSource data = new SAMDataSource(reads,true);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
data.setResourcePool(gen);
|
data.setResourcePool(gen);
|
||||||
|
|
||||||
for (int x = 0; x < 10; x++) {
|
for (int x = 0; x < 10; x++) {
|
||||||
|
|
@ -121,7 +115,7 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
try {
|
try {
|
||||||
int iterations = 0;
|
int iterations = 0;
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
SAMDataSource data = new SAMDataSource(reads,true);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
|
|
||||||
ArrayList<Integer> readsPerShard = new ArrayList<Integer>();
|
ArrayList<Integer> readsPerShard = new ArrayList<Integer>();
|
||||||
|
|
||||||
|
|
@ -176,7 +170,7 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
try {
|
try {
|
||||||
int iterations = 0;
|
int iterations = 0;
|
||||||
int readCount = 0;
|
int readCount = 0;
|
||||||
SAMDataSource data = new SAMDataSource(reads,true);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
|
|
||||||
|
|
||||||
data.setResourcePool(gen);
|
data.setResourcePool(gen);
|
||||||
|
|
@ -224,59 +218,3 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* use this to inject into SAMDataSource for testing
|
|
||||||
*/
|
|
||||||
class ArtificialResourcePool extends SAMIteratorPool {
|
|
||||||
// How strict should we be with SAM/BAM parsing?
|
|
||||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
|
||||||
|
|
||||||
// the header
|
|
||||||
private SAMFileHeader header;
|
|
||||||
private ArtificialSAMIterator iterator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Track the iterator to see whether it's venturing into unmapped reads for the first
|
|
||||||
* time. If so, query straight there. Only works for query iterators.
|
|
||||||
*
|
|
||||||
* TODO: Clean up.
|
|
||||||
*/
|
|
||||||
private boolean intoUnmappedReads = false;
|
|
||||||
|
|
||||||
public ArtificialResourcePool( SAMFileHeader header, ArtificialSAMIterator iterator ) {
|
|
||||||
super( new Reads(Collections.<File>emptyList()),true );
|
|
||||||
this.header = header;
|
|
||||||
this.iterator = iterator;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public StingSAMIterator iterator( DataStreamSegment segment ) {
|
|
||||||
if (segment instanceof MappedStreamSegment && iterator instanceof ArtificialSAMQueryIterator) {
|
|
||||||
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
|
||||||
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
|
|
||||||
queryIterator.queryContained(mappedSegment.locus.getContig(), (int)mappedSegment.locus.getStart(), (int)mappedSegment.locus.getStop());
|
|
||||||
return queryIterator;
|
|
||||||
}
|
|
||||||
else if (segment instanceof UnmappedStreamSegment) {
|
|
||||||
if( !intoUnmappedReads ) {
|
|
||||||
if( iterator instanceof ArtificialSAMQueryIterator ) {
|
|
||||||
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
|
||||||
queryIterator.queryUnmappedReads();
|
|
||||||
}
|
|
||||||
intoUnmappedReads = true;
|
|
||||||
}
|
|
||||||
return new BoundedReadIterator(iterator,((UnmappedStreamSegment)segment).size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw new StingException("Unsupported segment type passed to test");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get the merged header
|
|
||||||
*
|
|
||||||
* @return the merged header
|
|
||||||
*/
|
|
||||||
public SAMFileHeader getHeader() {
|
|
||||||
return this.header;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ public class BoundedReadIteratorTest extends BaseTest {
|
||||||
long shardReadCount = 0;
|
long shardReadCount = 0;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
SAMDataSource data = new SAMDataSource(reads,true);
|
SAMDataSource data = new SAMDataSource(reads);
|
||||||
|
|
||||||
// make sure we have a shard
|
// make sure we have a shard
|
||||||
if (!strat.hasNext()) {
|
if (!strat.hasNext()) {
|
||||||
|
|
|
||||||
|
|
@ -122,7 +122,7 @@ public class TraverseReadsTest extends BaseTest {
|
||||||
ref.getSequenceDictionary(),
|
ref.getSequenceDictionary(),
|
||||||
readSize);
|
readSize);
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList),true);
|
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
||||||
dataSource.viewUnmappedReads(false);
|
dataSource.viewUnmappedReads(false);
|
||||||
|
|
||||||
countReadWalker.initialize();
|
countReadWalker.initialize();
|
||||||
|
|
@ -169,7 +169,7 @@ public class TraverseReadsTest extends BaseTest {
|
||||||
ref.getSequenceDictionary(),
|
ref.getSequenceDictionary(),
|
||||||
readSize);
|
readSize);
|
||||||
|
|
||||||
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList),true);
|
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
||||||
dataSource.viewUnmappedReads(true);
|
dataSource.viewUnmappedReads(true);
|
||||||
|
|
||||||
countReadWalker.initialize();
|
countReadWalker.initialize();
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.QueryIterator;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
@ -21,7 +20,7 @@ public class ArtificialSAMUtilsTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void basicReadIteratorTest() {
|
public void basicReadIteratorTest() {
|
||||||
StingSAMIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 100);
|
StingSAMIterator iter = ArtificialSAMUtils.mappedReadIterator(1, 100, 100);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
SAMRecord rec = iter.next();
|
SAMRecord rec = iter.next();
|
||||||
|
|
@ -32,7 +31,7 @@ public class ArtificialSAMUtilsTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void tenPerChromosome() {
|
public void tenPerChromosome() {
|
||||||
StingSAMIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 10);
|
StingSAMIterator iter = ArtificialSAMUtils.mappedReadIterator(1, 100, 10);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
SAMRecord rec = iter.next();
|
SAMRecord rec = iter.next();
|
||||||
|
|
@ -45,7 +44,7 @@ public class ArtificialSAMUtilsTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void onePerChromosome() {
|
public void onePerChromosome() {
|
||||||
StingSAMIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 1);
|
StingSAMIterator iter = ArtificialSAMUtils.mappedReadIterator(1, 100, 1);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
SAMRecord rec = iter.next();
|
SAMRecord rec = iter.next();
|
||||||
|
|
@ -58,7 +57,7 @@ public class ArtificialSAMUtilsTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void basicUnmappedIteratorTest() {
|
public void basicUnmappedIteratorTest() {
|
||||||
StingSAMIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 100, 1000);
|
StingSAMIterator iter = ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 100, 100, 1000);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for (int x = 0; x < (100* 100); x++ ) {
|
for (int x = 0; x < (100* 100); x++ ) {
|
||||||
if (!iter.hasNext()) {
|
if (!iter.hasNext()) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue