changes to fix and test the interval based traversals
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1095 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3b24264c2b
commit
5b1c23a7f2
|
|
@ -45,7 +45,7 @@ import java.util.List;
|
|||
* User: aaron
|
||||
* Date: Mar 26, 2009
|
||||
* Time: 2:36:16 PM
|
||||
*
|
||||
* <p/>
|
||||
* Converts shards to SAM iterators over the specified region
|
||||
*/
|
||||
public class SAMDataSource implements SimpleDataSource {
|
||||
|
|
@ -76,12 +76,9 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
/**
|
||||
* constructor, given sam files
|
||||
*
|
||||
* @param reads the list of sam files
|
||||
* @param byReads are we a by reads traversal, or a loci traversal. We could delete this field
|
||||
* if we passed in iterGen, which would be a better (although more complicated for the
|
||||
* consumers of SAMDataSources).
|
||||
* @param reads the list of sam files
|
||||
*/
|
||||
public SAMDataSource( Reads reads, boolean byReads ) throws SimpleDataSourceLoadException {
|
||||
public SAMDataSource( Reads reads ) throws SimpleDataSourceLoadException {
|
||||
this.reads = reads;
|
||||
|
||||
// check the length
|
||||
|
|
@ -93,7 +90,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
||||
}
|
||||
}
|
||||
iteratorPool = new SAMIteratorPool(reads, byReads);
|
||||
iteratorPool = new SAMIteratorPool(reads);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -116,19 +113,20 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* @return an iterator for that region
|
||||
*/
|
||||
public StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
|
||||
return iteratorPool.iterator( new MappedStreamSegment(location) );
|
||||
return iteratorPool.iterator(new MappedStreamSegment(location));
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* seek
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
* @param shard the shard to get data for
|
||||
*
|
||||
* @return an iterator for that region
|
||||
*/
|
||||
public StingSAMIterator seek( Shard shard ) throws SimpleDataSourceLoadException {
|
||||
// setup the iterator pool if it's not setup
|
||||
boolean queryOverlapping = ( shard.getShardType() == Shard.ShardType.READ ) ? false : true;
|
||||
iteratorPool.setQueryOverlapping(queryOverlapping);
|
||||
|
||||
StingSAMIterator iterator = null;
|
||||
if (shard.getShardType() == Shard.ShardType.READ) {
|
||||
iterator = seekRead((ReadShard) shard);
|
||||
|
|
@ -186,15 +184,15 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, iteratorPool.iterator(new MappedStreamSegment(lastReadPos))));
|
||||
}
|
||||
|
||||
if( intoUnmappedReads && !includeUnmappedReads )
|
||||
if (intoUnmappedReads && !includeUnmappedReads)
|
||||
shard.signalDone();
|
||||
}
|
||||
|
||||
if (intoUnmappedReads && includeUnmappedReads) {
|
||||
if( iter != null )
|
||||
if (iter != null)
|
||||
iter.close();
|
||||
iter = toUnmappedReads( shard.getSize() );
|
||||
if( !iter.hasNext() )
|
||||
iter = toUnmappedReads(shard.getSize());
|
||||
if (!iter.hasNext())
|
||||
shard.signalDone();
|
||||
}
|
||||
|
||||
|
|
@ -214,12 +212,14 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
}
|
||||
|
||||
/**
|
||||
* Retrieve unmapped reads.
|
||||
* Retrieve unmapped reads.
|
||||
*
|
||||
* @param readCount how many reads to retrieve
|
||||
*
|
||||
* @return the bounded iterator that you can use to get the intervaled reads from
|
||||
*/
|
||||
StingSAMIterator toUnmappedReads( long readCount ) {
|
||||
StingSAMIterator iter = iteratorPool.iterator( new UnmappedStreamSegment( readsTaken,readCount) );
|
||||
StingSAMIterator toUnmappedReads( long readCount ) {
|
||||
StingSAMIterator iter = iteratorPool.iterator(new UnmappedStreamSegment(readsTaken, readCount));
|
||||
readsTaken += readCount;
|
||||
return iter;
|
||||
}
|
||||
|
|
@ -342,7 +342,7 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
|||
protected Reads reads;
|
||||
|
||||
/** Is this a by-reads traversal or a by-locus? */
|
||||
protected boolean byReads;
|
||||
protected boolean queryOverlapping;
|
||||
|
||||
/** File header for the combined file. */
|
||||
protected SAMFileHeader header;
|
||||
|
|
@ -350,9 +350,9 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
|||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(SAMIteratorPool.class);
|
||||
|
||||
public SAMIteratorPool( Reads reads, boolean byReads ) {
|
||||
public SAMIteratorPool( Reads reads ) {
|
||||
this.reads = reads;
|
||||
this.byReads = byReads;
|
||||
this.queryOverlapping = true;
|
||||
|
||||
ReadStreamPointer streamPointer = createNewResource();
|
||||
this.header = streamPointer.getHeader();
|
||||
|
|
@ -366,8 +366,8 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
|||
}
|
||||
|
||||
protected ReadStreamPointer selectBestExistingResource( DataStreamSegment segment, List<ReadStreamPointer> pointers ) {
|
||||
for( ReadStreamPointer pointer: pointers ) {
|
||||
if( pointer.canAccessSegmentEfficiently( segment ) ) {
|
||||
for (ReadStreamPointer pointer : pointers) {
|
||||
if (pointer.canAccessSegmentEfficiently(segment)) {
|
||||
return pointer;
|
||||
}
|
||||
}
|
||||
|
|
@ -375,18 +375,18 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
|||
}
|
||||
|
||||
protected ReadStreamPointer createNewResource() {
|
||||
return new ReadStreamPointer( reads );
|
||||
return new ReadStreamPointer(reads);
|
||||
}
|
||||
|
||||
protected StingSAMIterator createIteratorFromResource( DataStreamSegment segment, ReadStreamPointer streamPointer ) {
|
||||
StingSAMIterator iterator = null;
|
||||
|
||||
if( byReads )
|
||||
iterator = streamPointer.getReadsContainedBy( segment );
|
||||
if (!queryOverlapping)
|
||||
iterator = streamPointer.getReadsContainedBy(segment);
|
||||
else {
|
||||
if( !(segment instanceof MappedStreamSegment) )
|
||||
if (!( segment instanceof MappedStreamSegment ))
|
||||
throw new StingException("Segment is unmapped; true overlaps cannot be determined.");
|
||||
iterator = streamPointer.getReadsOverlapping( (MappedStreamSegment)segment );
|
||||
iterator = streamPointer.getReadsOverlapping((MappedStreamSegment) segment);
|
||||
}
|
||||
|
||||
return new ReleasingIterator(iterator);
|
||||
|
|
@ -399,18 +399,41 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
|||
private class ReleasingIterator implements StingSAMIterator {
|
||||
private final StingSAMIterator wrappedIterator;
|
||||
|
||||
public Reads getSourceInfo() { return wrappedIterator.getSourceInfo(); }
|
||||
public Reads getSourceInfo() {
|
||||
return wrappedIterator.getSourceInfo();
|
||||
}
|
||||
|
||||
public ReleasingIterator( StingSAMIterator wrapped ) { this.wrappedIterator = wrapped; }
|
||||
public ReleasingIterator( StingSAMIterator wrapped ) {
|
||||
this.wrappedIterator = wrapped;
|
||||
}
|
||||
|
||||
public ReleasingIterator iterator() {
|
||||
return this;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
|
||||
}
|
||||
|
||||
public ReleasingIterator iterator() { return this; }
|
||||
public void remove() { throw new UnsupportedOperationException("Can't remove from a StingSAMIterator"); }
|
||||
public void close() {
|
||||
wrappedIterator.close();
|
||||
release(this);
|
||||
}
|
||||
|
||||
public boolean hasNext() { return wrappedIterator.hasNext(); }
|
||||
public SAMRecord next() { return wrappedIterator.next(); }
|
||||
public boolean hasNext() {
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecord next() {
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isQueryOverlapping() {
|
||||
return queryOverlapping;
|
||||
}
|
||||
|
||||
public void setQueryOverlapping( boolean queryOverlapping ) {
|
||||
this.queryOverlapping = queryOverlapping;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,19 +104,15 @@ public abstract class MicroScheduler {
|
|||
*/
|
||||
protected MicroScheduler(Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
if (walker instanceof ReadWalker) {
|
||||
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
|
||||
this.reads = getReadsDataSource(reads, true);
|
||||
traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods);
|
||||
} else if (walker instanceof LocusWalker) {
|
||||
traversalEngine = new TraverseLoci(reads.getReadsFiles(), refFile, rods);
|
||||
this.reads = getReadsDataSource(reads, false);
|
||||
} else if (walker instanceof DuplicateWalker) {
|
||||
traversalEngine = new TraverseDuplicates(reads.getReadsFiles(), refFile, rods);
|
||||
this.reads = getReadsDataSource(reads, true);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
||||
}
|
||||
|
||||
|
||||
this.reads = getReadsDataSource(reads);
|
||||
this.reference = openReferenceSequenceFile(refFile);
|
||||
this.rods = getReferenceOrderedDataSources(rods);
|
||||
}
|
||||
|
|
@ -209,16 +205,15 @@ public abstract class MicroScheduler {
|
|||
* Gets a data source for the given set of reads.
|
||||
*
|
||||
* @param reads the read source information
|
||||
* @param byReads are we a by reads traversal, or not
|
||||
*
|
||||
* @return A data source for the given set of reads.
|
||||
*/
|
||||
private SAMDataSource getReadsDataSource(Reads reads, boolean byReads) {
|
||||
private SAMDataSource getReadsDataSource(Reads reads) {
|
||||
// By reference traversals are happy with no reads. Make sure that case is handled.
|
||||
if (reads.getReadsFiles().size() == 0)
|
||||
return null;
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(reads, byReads);
|
||||
SAMDataSource dataSource = new SAMDataSource(reads);
|
||||
|
||||
// Side effect: initialize the traversal engine with reads data.
|
||||
// TODO: Give users a dedicated way of getting the header so that the MicroScheduler
|
||||
|
|
|
|||
|
|
@ -0,0 +1,96 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMIterator;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.io.File;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* use this to inject into SAMDataSource for testing
|
||||
*/
|
||||
public class ArtificialResourcePool extends SAMIteratorPool {
|
||||
// How strict should we be with SAM/BAM parsing?
|
||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
// the header
|
||||
private SAMFileHeader header;
|
||||
private ArtificialSAMIterator iterator;
|
||||
|
||||
/**
|
||||
* Track the iterator to see whether it's venturing into unmapped reads for the first
|
||||
* time. If so, query straight there. Only works for query iterators.
|
||||
*
|
||||
* TODO: Clean up.
|
||||
*/
|
||||
private boolean intoUnmappedReads = false;
|
||||
|
||||
public ArtificialResourcePool( SAMFileHeader header, ArtificialSAMIterator iterator ) {
|
||||
super( new Reads(Collections.<File>emptyList()) );
|
||||
this.header = header;
|
||||
this.iterator = iterator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StingSAMIterator iterator( DataStreamSegment segment ) {
|
||||
if (segment instanceof MappedStreamSegment && iterator instanceof ArtificialSAMQueryIterator) {
|
||||
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
||||
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
|
||||
queryIterator.queryContained(mappedSegment.locus.getContig(), (int)mappedSegment.locus.getStart(), (int)mappedSegment.locus.getStop());
|
||||
return queryIterator;
|
||||
}
|
||||
else if (segment instanceof UnmappedStreamSegment) {
|
||||
if( !intoUnmappedReads ) {
|
||||
if( iterator instanceof ArtificialSAMQueryIterator ) {
|
||||
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
||||
queryIterator.queryUnmappedReads();
|
||||
}
|
||||
intoUnmappedReads = true;
|
||||
}
|
||||
return new BoundedReadIterator(iterator,((UnmappedStreamSegment)segment).size);
|
||||
}
|
||||
else
|
||||
throw new StingException("Unsupported segment type passed to test");
|
||||
}
|
||||
|
||||
/**
|
||||
* get the merged header
|
||||
*
|
||||
* @return the merged header
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return this.header;
|
||||
}
|
||||
}
|
||||
|
|
@ -88,7 +88,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
Reads reads = new Reads(fl);
|
||||
|
||||
try {
|
||||
SAMDataSource data = new SAMDataSource(reads,false);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
for (Shard sh : strat) {
|
||||
int readCount = 0;
|
||||
count++;
|
||||
|
|
@ -138,7 +138,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
int count = 0;
|
||||
|
||||
try {
|
||||
SAMDataSource data = new SAMDataSource(reads,false);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
for (Shard sh : strat) {
|
||||
int readCount = 0;
|
||||
count++;
|
||||
|
|
@ -175,7 +175,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
|
||||
logger.debug("Pile two:");
|
||||
try {
|
||||
SAMDataSource data = new SAMDataSource(reads,false);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
for (Shard sh : strat) {
|
||||
int readCount = 0;
|
||||
count++;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,150 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class SAMByIntervalTest
|
||||
* <p/>
|
||||
* Test that the SAM data source behaves well given intervals
|
||||
*/
|
||||
public class SAMByIntervalTest extends BaseTest {
|
||||
private List<File> fl;
|
||||
ShardStrategy shardStrategy;
|
||||
Reads reads;
|
||||
private int targetReadCount = 14;
|
||||
|
||||
|
||||
// constants we use throughout the tests
|
||||
protected final int READ_COUNT;
|
||||
protected final int ENDING_CHROMO;
|
||||
protected final int STARTING_CHROMO;
|
||||
protected final int UNMAPPED_READ_COUNT;
|
||||
|
||||
public SAMByIntervalTest() {
|
||||
READ_COUNT = 100;
|
||||
ENDING_CHROMO = 10;
|
||||
STARTING_CHROMO = 1;
|
||||
UNMAPPED_READ_COUNT = 1000;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function does the setup of our parser, before each method call.
|
||||
* <p/>
|
||||
* Called before every test case method.
|
||||
*/
|
||||
@Before
|
||||
public void doForEachTest() {
|
||||
fl = new ArrayList<File>();
|
||||
|
||||
// sequence
|
||||
//seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly17/v0/Homo_sapiens_assembly17.fasta"));
|
||||
//GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
|
||||
// setup the test files
|
||||
fl.add(new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"));
|
||||
reads = new Reads(fl);
|
||||
}
|
||||
|
||||
|
||||
/** run a test on data over a specific interval */
|
||||
private void testRead( int start, int stop, int readCount ) {
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(STARTING_CHROMO, ENDING_CHROMO, READ_COUNT, UNMAPPED_READ_COUNT),
|
||||
ArtificialSAMUtils.mappedAndUnmappedReadIterator(STARTING_CHROMO, ENDING_CHROMO, READ_COUNT, UNMAPPED_READ_COUNT));
|
||||
|
||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
int unmappedReadsSeen = 0;
|
||||
int iterations = 0;
|
||||
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
data.setResourcePool(gen);
|
||||
GenomeLocSortedSet set = new GenomeLocSortedSet();
|
||||
set.add(GenomeLocParser.createGenomeLoc(0, start, stop));
|
||||
ShardStrategy strat = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, gen.getHeader().getSequenceDictionary(), UNMAPPED_READ_COUNT, set);
|
||||
|
||||
StingSAMIterator iter = data.seek(strat.next());
|
||||
int count = 0;
|
||||
while (iter.hasNext()) {
|
||||
SAMRecord r = iter.next();
|
||||
// uncomment for debugging - System.err.println(r.getAlignmentStart() + " " + r.getAlignmentEnd());
|
||||
count++;
|
||||
}
|
||||
assertEquals(readCount, count);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* test out that we get a single read, given the specific size
|
||||
*/
|
||||
@Test
|
||||
public void testSingleRead() {
|
||||
testRead(1,ArtificialSAMUtils.DEFAULT_READ_LENGTH,1);
|
||||
}
|
||||
|
||||
/**
|
||||
* test out that we get the expected amount for a whole chromosome
|
||||
*/
|
||||
@Test
|
||||
public void testChromosome() {
|
||||
testRead(1, READ_COUNT, READ_COUNT -ArtificialSAMUtils.DEFAULT_READ_LENGTH+1); // +1 because we go from 1 up to 101
|
||||
}
|
||||
|
||||
/**
|
||||
* test out that we get the expected amount for a whole chromosome
|
||||
*/
|
||||
@Test
|
||||
public void testMiddle() {
|
||||
testRead(20, READ_COUNT-20, READ_COUNT -ArtificialSAMUtils.DEFAULT_READ_LENGTH-40+2);
|
||||
}
|
||||
|
||||
|
||||
private SAMFileHeader createArtificialSamHeader( int startingChr, int endingChr, int readCount, int readSize ) {
|
||||
return ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1,
|
||||
startingChr,
|
||||
readCount + readSize);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,19 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||
|
||||
import static junit.framework.Assert.fail;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMIterator;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.junit.Before;
|
||||
|
|
@ -22,7 +17,6 @@ import org.junit.Test;
|
|||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -78,14 +72,14 @@ public class SAMByReadsTest extends BaseTest {
|
|||
@Test
|
||||
public void testToUnmappedReads() {
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(createArtificialSamHeader(1,10,100,1000),
|
||||
ArtificialSAMUtils.unmappedReadIterator(1, 100, 10, 1000) );
|
||||
ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 100, 10, 1000) );
|
||||
|
||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
try {
|
||||
int unmappedReadsSeen = 0;
|
||||
int iterations = 0;
|
||||
|
||||
SAMDataSource data = new SAMDataSource(reads,true);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
data.setResourcePool(gen);
|
||||
|
||||
for (int x = 0; x < 10; x++) {
|
||||
|
|
@ -121,7 +115,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
try {
|
||||
int iterations = 0;
|
||||
int readCount = 0;
|
||||
SAMDataSource data = new SAMDataSource(reads,true);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
|
||||
ArrayList<Integer> readsPerShard = new ArrayList<Integer>();
|
||||
|
||||
|
|
@ -176,7 +170,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
try {
|
||||
int iterations = 0;
|
||||
int readCount = 0;
|
||||
SAMDataSource data = new SAMDataSource(reads,true);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
|
||||
|
||||
data.setResourcePool(gen);
|
||||
|
|
@ -224,59 +218,3 @@ public class SAMByReadsTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* use this to inject into SAMDataSource for testing
|
||||
*/
|
||||
class ArtificialResourcePool extends SAMIteratorPool {
|
||||
// How strict should we be with SAM/BAM parsing?
|
||||
protected SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
// the header
|
||||
private SAMFileHeader header;
|
||||
private ArtificialSAMIterator iterator;
|
||||
|
||||
/**
|
||||
* Track the iterator to see whether it's venturing into unmapped reads for the first
|
||||
* time. If so, query straight there. Only works for query iterators.
|
||||
*
|
||||
* TODO: Clean up.
|
||||
*/
|
||||
private boolean intoUnmappedReads = false;
|
||||
|
||||
public ArtificialResourcePool( SAMFileHeader header, ArtificialSAMIterator iterator ) {
|
||||
super( new Reads(Collections.<File>emptyList()),true );
|
||||
this.header = header;
|
||||
this.iterator = iterator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StingSAMIterator iterator( DataStreamSegment segment ) {
|
||||
if (segment instanceof MappedStreamSegment && iterator instanceof ArtificialSAMQueryIterator) {
|
||||
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
||||
MappedStreamSegment mappedSegment = (MappedStreamSegment)segment;
|
||||
queryIterator.queryContained(mappedSegment.locus.getContig(), (int)mappedSegment.locus.getStart(), (int)mappedSegment.locus.getStop());
|
||||
return queryIterator;
|
||||
}
|
||||
else if (segment instanceof UnmappedStreamSegment) {
|
||||
if( !intoUnmappedReads ) {
|
||||
if( iterator instanceof ArtificialSAMQueryIterator ) {
|
||||
ArtificialSAMQueryIterator queryIterator = (ArtificialSAMQueryIterator)iterator;
|
||||
queryIterator.queryUnmappedReads();
|
||||
}
|
||||
intoUnmappedReads = true;
|
||||
}
|
||||
return new BoundedReadIterator(iterator,((UnmappedStreamSegment)segment).size);
|
||||
}
|
||||
else
|
||||
throw new StingException("Unsupported segment type passed to test");
|
||||
}
|
||||
|
||||
/**
|
||||
* get the merged header
|
||||
*
|
||||
* @return the merged header
|
||||
*/
|
||||
public SAMFileHeader getHeader() {
|
||||
return this.header;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ public class BoundedReadIteratorTest extends BaseTest {
|
|||
long shardReadCount = 0;
|
||||
|
||||
try {
|
||||
SAMDataSource data = new SAMDataSource(reads,true);
|
||||
SAMDataSource data = new SAMDataSource(reads);
|
||||
|
||||
// make sure we have a shard
|
||||
if (!strat.hasNext()) {
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
ref.getSequenceDictionary(),
|
||||
readSize);
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList),true);
|
||||
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
||||
dataSource.viewUnmappedReads(false);
|
||||
|
||||
countReadWalker.initialize();
|
||||
|
|
@ -169,7 +169,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
ref.getSequenceDictionary(),
|
||||
readSize);
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList),true);
|
||||
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList));
|
||||
dataSource.viewUnmappedReads(true);
|
||||
|
||||
countReadWalker.initialize();
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.sam;
|
|||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.QueryIterator;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
|
|
@ -21,7 +20,7 @@ public class ArtificialSAMUtilsTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void basicReadIteratorTest() {
|
||||
StingSAMIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 100);
|
||||
StingSAMIterator iter = ArtificialSAMUtils.mappedReadIterator(1, 100, 100);
|
||||
int count = 0;
|
||||
while (iter.hasNext()) {
|
||||
SAMRecord rec = iter.next();
|
||||
|
|
@ -32,7 +31,7 @@ public class ArtificialSAMUtilsTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void tenPerChromosome() {
|
||||
StingSAMIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 10);
|
||||
StingSAMIterator iter = ArtificialSAMUtils.mappedReadIterator(1, 100, 10);
|
||||
int count = 0;
|
||||
while (iter.hasNext()) {
|
||||
SAMRecord rec = iter.next();
|
||||
|
|
@ -45,7 +44,7 @@ public class ArtificialSAMUtilsTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void onePerChromosome() {
|
||||
StingSAMIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 1);
|
||||
StingSAMIterator iter = ArtificialSAMUtils.mappedReadIterator(1, 100, 1);
|
||||
int count = 0;
|
||||
while (iter.hasNext()) {
|
||||
SAMRecord rec = iter.next();
|
||||
|
|
@ -58,7 +57,7 @@ public class ArtificialSAMUtilsTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void basicUnmappedIteratorTest() {
|
||||
StingSAMIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 100, 1000);
|
||||
StingSAMIterator iter = ArtificialSAMUtils.mappedAndUnmappedReadIterator(1, 100, 100, 1000);
|
||||
int count = 0;
|
||||
for (int x = 0; x < (100* 100); x++ ) {
|
||||
if (!iter.hasNext()) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue