added changes to support alec toUnmappedRead seek. Huge improvements (orders of magnitude) in unmapped read performance.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1021 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4f6d26849f
commit
6ee64c7e43
|
|
@ -1,7 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
|
|
||||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||||
import net.sf.picard.util.PeekableIterator;
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
|
@ -21,24 +20,43 @@ import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* User: aaron
|
* User: aaron
|
||||||
* Date: Mar 26, 2009
|
* Date: Mar 26, 2009
|
||||||
* Time: 2:36:16 PM
|
* Time: 2:36:16 PM
|
||||||
* <p/>
|
*
|
||||||
* The Broad Institute
|
* Converts shards to SAM iterators over the specified region
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
|
||||||
* This software and its documentation are copyright 2009 by the
|
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
|
||||||
* <p/>
|
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
|
||||||
*/
|
*/
|
||||||
public class SAMDataSource implements SimpleDataSource {
|
public class SAMDataSource implements SimpleDataSource {
|
||||||
|
|
||||||
|
|
||||||
/** Backing support for reads. */
|
/** Backing support for reads. */
|
||||||
private Reads reads = null;
|
private final Reads reads;
|
||||||
|
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
|
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
|
||||||
|
|
@ -62,7 +80,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
/**
|
/**
|
||||||
* constructor, given sam files
|
* constructor, given sam files
|
||||||
*
|
*
|
||||||
* @param reads the list of sam files
|
* @param reads the list of sam files
|
||||||
* @param byReads are we a by reads traversal, or a loci traversal. We could delete this field
|
* @param byReads are we a by reads traversal, or a loci traversal. We could delete this field
|
||||||
* if we passed in iterGen, which would be a better (although more complicated for the
|
* if we passed in iterGen, which would be a better (although more complicated for the
|
||||||
* consumers of SAMDataSources).
|
* consumers of SAMDataSources).
|
||||||
|
|
@ -79,11 +97,12 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
iteratorPool = new SAMIteratorPool(reads,byReads);
|
iteratorPool = new SAMIteratorPool(reads, byReads);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For unit testing, add a custom iterator pool.
|
* For unit testing, add a custom iterator pool.
|
||||||
|
*
|
||||||
* @param iteratorPool Custom mock iterator pool.
|
* @param iteratorPool Custom mock iterator pool.
|
||||||
*/
|
*/
|
||||||
void setResourcePool( SAMIteratorPool iteratorPool ) {
|
void setResourcePool( SAMIteratorPool iteratorPool ) {
|
||||||
|
|
@ -100,7 +119,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
*
|
*
|
||||||
* @return an iterator for that region
|
* @return an iterator for that region
|
||||||
*/
|
*/
|
||||||
public StingSAMIterator seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {
|
public StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException {
|
||||||
return iteratorPool.iterator(location);
|
return iteratorPool.iterator(location);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -180,7 +199,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
iter.close();
|
iter.close();
|
||||||
}
|
}
|
||||||
iter = iteratorPool.iterator(null);
|
iter = iteratorPool.iterator(null);
|
||||||
bound = toUnmappedReads(shard.getSize(), iter);
|
bound = toUnmappedReads(shard.getSize(), (QueryIterator) iter);
|
||||||
}
|
}
|
||||||
if (bound == null) {
|
if (bound == null) {
|
||||||
shard.signalDone();
|
shard.signalDone();
|
||||||
|
|
@ -211,41 +230,24 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* @return the bounded iterator that you can use to get the intervaled reads from
|
* @return the bounded iterator that you can use to get the intervaled reads from
|
||||||
* @throws SimpleDataSourceLoadException
|
* @throws SimpleDataSourceLoadException
|
||||||
*/
|
*/
|
||||||
BoundedReadIterator toUnmappedReads( long readCount, StingSAMIterator iter ) throws SimpleDataSourceLoadException {
|
BoundedReadIterator toUnmappedReads( long readCount, QueryIterator iter ) throws SimpleDataSourceLoadException {
|
||||||
PeekableIterator<SAMRecord> peekable = new PeekableIterator<SAMRecord>(iter);
|
iter.queryUnmappedReads();
|
||||||
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
int cnt = 0;
|
|
||||||
SAMRecord d = null;
|
|
||||||
while (peekable.hasNext()) {
|
|
||||||
d = peekable.peek();
|
|
||||||
int x = d.getReferenceIndex();
|
|
||||||
if (x < 0)
|
|
||||||
// we have the magic read that starts the unmapped read segment!
|
|
||||||
break;
|
|
||||||
cnt++;
|
|
||||||
peekable.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
// check to see what happened, did we run out of reads?
|
|
||||||
if (!peekable.hasNext()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// now walk until we've taken the unmapped read count
|
// now walk until we've taken the unmapped read count
|
||||||
while (peekable.hasNext() && count < this.readsTaken) {
|
while (iter.hasNext() && count < this.readsTaken) {
|
||||||
peekable.next();
|
iter.next();
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check to see what happened, did we run out of reads?
|
// check to see what happened, did we run out of reads?
|
||||||
if (!peekable.hasNext()) {
|
if (!iter.hasNext()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// we're not out of unmapped reads, so increment our read cout
|
// we're not out of unmapped reads, so increment our read cout
|
||||||
this.readsTaken += readCount;
|
this.readsTaken += readCount;
|
||||||
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, peekable), readCount);
|
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -356,20 +358,14 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class SAMIteratorPool extends ResourcePool<SamFileHeaderMerger,StingSAMIterator> {
|
class SAMIteratorPool extends ResourcePool<SamFileHeaderMerger, QueryIterator> {
|
||||||
/**
|
/** Source information about the reads. */
|
||||||
* Source information about the reads.
|
|
||||||
*/
|
|
||||||
protected Reads reads;
|
protected Reads reads;
|
||||||
|
|
||||||
/**
|
/** Is this a by-reads traversal or a by-locus? */
|
||||||
* Is this a by-reads traversal or a by-locus?
|
|
||||||
*/
|
|
||||||
protected boolean byReads;
|
protected boolean byReads;
|
||||||
|
|
||||||
/**
|
/** File header for the combined file. */
|
||||||
* File header for the combined file.
|
|
||||||
*/
|
|
||||||
protected SAMFileHeader header;
|
protected SAMFileHeader header;
|
||||||
|
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
|
|
@ -379,54 +375,87 @@ class SAMIteratorPool extends ResourcePool<SamFileHeaderMerger,StingSAMIterator>
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
this.byReads = byReads;
|
this.byReads = byReads;
|
||||||
|
|
||||||
SamFileHeaderMerger merger = createNewResource( null );
|
SamFileHeaderMerger merger = createNewResource(null);
|
||||||
this.header = merger.getMergedHeader();
|
this.header = merger.getMergedHeader();
|
||||||
// Add this resource to the pool.
|
// Add this resource to the pool.
|
||||||
this.addNewResource( merger );
|
this.addNewResource(merger);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Get the combined header for all files in the iterator pool. */
|
||||||
* Get the combined header for all files in the iterator pool.
|
|
||||||
*/
|
|
||||||
public SAMFileHeader getHeader() {
|
public SAMFileHeader getHeader() {
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected SamFileHeaderMerger selectBestExistingResource( GenomeLoc position, List<SamFileHeaderMerger> mergers) {
|
protected SamFileHeaderMerger selectBestExistingResource( GenomeLoc position, List<SamFileHeaderMerger> mergers ) {
|
||||||
if( mergers.size() == 0 )
|
if (mergers.size() == 0)
|
||||||
return null;
|
return null;
|
||||||
return mergers.get(0);
|
return mergers.get(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected SamFileHeaderMerger createNewResource( GenomeLoc position ) {
|
protected SamFileHeaderMerger createNewResource( GenomeLoc position ) {
|
||||||
return createHeaderMerger( reads, SAMFileHeader.SortOrder.coordinate );
|
return createHeaderMerger(reads, SAMFileHeader.SortOrder.coordinate);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected StingSAMIterator createIteratorFromResource( GenomeLoc loc, SamFileHeaderMerger headerMerger ) {
|
protected QueryIterator createIteratorFromResource( GenomeLoc loc, SamFileHeaderMerger headerMerger ) {
|
||||||
final MergingSamRecordIterator2 iterator = new MergingSamRecordIterator2(headerMerger, reads);
|
final MergingSamRecordIterator2 iterator = new MergingSamRecordIterator2(headerMerger, reads);
|
||||||
|
|
||||||
if( loc != null ) {
|
if (loc != null) {
|
||||||
if (byReads)
|
if (byReads)
|
||||||
iterator.queryContained(loc.getContig(), (int) loc.getStart(), (int) loc.getStop());
|
iterator.queryContained(loc.getContig(), (int) loc.getStart(), (int) loc.getStop());
|
||||||
else
|
else
|
||||||
iterator.queryOverlapping(loc.getContig(), (int) loc.getStart(), (int) loc.getStop());
|
iterator.queryOverlapping(loc.getContig(), (int) loc.getStart(), (int) loc.getStop());
|
||||||
}
|
}
|
||||||
|
|
||||||
return new StingSAMIterator() {
|
return new QueryIterator() {
|
||||||
public Reads getSourceInfo() { return reads; }
|
public Reads getSourceInfo() {
|
||||||
|
return reads;
|
||||||
|
}
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
iterator.close();
|
iterator.close();
|
||||||
release(this);
|
release(this);
|
||||||
}
|
}
|
||||||
public Iterator<SAMRecord> iterator() { return this; }
|
|
||||||
public boolean hasNext() { return iterator.hasNext(); }
|
public Iterator<SAMRecord> iterator() {
|
||||||
public SAMRecord next() { return iterator.next(); }
|
return this;
|
||||||
public void remove() { throw new UnsupportedOperationException("Can't remove from a StingSAMIterator"); }
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return iterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord next() {
|
||||||
|
return iterator.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Can't remove from a StingSAMIterator");
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord peek() {
|
||||||
|
return iterator.peek();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void queryOverlapping( String contig, int start, int stop ) {
|
||||||
|
iterator.queryOverlapping(contig, start, stop);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void query( String contig, int start, int stop, boolean contained ) {
|
||||||
|
iterator.query(contig, start, stop, contained);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void queryUnmappedReads() {
|
||||||
|
iterator.queryUnmappedReads();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void queryContained( String contig, int start, int stop ) {
|
||||||
|
iterator.queryContained(contig, start, stop);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void closeResource( SamFileHeaderMerger resource ) {
|
protected void closeResource( SamFileHeaderMerger resource ) {
|
||||||
for( SAMFileReader reader: resource.getReaders() )
|
for (SAMFileReader reader : resource.getReaders())
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import net.sf.samtools.util.CloseableIterator;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
@ -46,7 +47,7 @@ import java.util.PriorityQueue;
|
||||||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||||
* the requested output format is unsorted, in which case any combination is valid.
|
* the requested output format is unsorted, in which case any combination is valid.
|
||||||
*/
|
*/
|
||||||
public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>, Iterable<SAMRecord>, PeekingStingIterator {
|
public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>, Iterable<SAMRecord>, QueryIterator {
|
||||||
protected PriorityQueue<ComparableSamRecordIterator> pq = null;
|
protected PriorityQueue<ComparableSamRecordIterator> pq = null;
|
||||||
protected final SamFileHeaderMerger samHeaderMerger;
|
protected final SamFileHeaderMerger samHeaderMerger;
|
||||||
protected final SAMFileHeader.SortOrder sortOrder;
|
protected final SAMFileHeader.SortOrder sortOrder;
|
||||||
|
|
@ -54,6 +55,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
||||||
private SAMRecord mNextRecord;
|
private SAMRecord mNextRecord;
|
||||||
protected boolean initialized = false;
|
protected boolean initialized = false;
|
||||||
protected final Reads reads;
|
protected final Reads reads;
|
||||||
|
protected boolean warnedUserAboutSortOrder = false; // so we only warn the user once
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||||
|
|
@ -62,7 +64,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
||||||
* @param headerMerger the header to merge
|
* @param headerMerger the header to merge
|
||||||
* @param reads the reads pile
|
* @param reads the reads pile
|
||||||
*/
|
*/
|
||||||
public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger, Reads reads) {
|
public MergingSamRecordIterator2( final SamFileHeaderMerger headerMerger, Reads reads ) {
|
||||||
this.samHeaderMerger = headerMerger;
|
this.samHeaderMerger = headerMerger;
|
||||||
this.reads = reads;
|
this.reads = reads;
|
||||||
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
|
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
|
||||||
|
|
@ -90,11 +92,24 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkSortOrder(SAMFileReader reader) {
|
/**
|
||||||
|
* verify the sort order
|
||||||
|
*
|
||||||
|
* @param reader the reader to check
|
||||||
|
*/
|
||||||
|
private void checkSortOrder( SAMFileReader reader ) {
|
||||||
if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) {
|
if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) {
|
||||||
throw new PicardException("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() +
|
if (reads.getSafetyChecking()) {
|
||||||
" vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals "
|
throw new PicardException("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() +
|
||||||
+ reader.getFileHeader().getAttribute("SO") + " in this case).");
|
" vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals "
|
||||||
|
+ reader.getFileHeader().getAttribute("SO") + " in this case).");
|
||||||
|
} else if(!warnedUserAboutSortOrder) {
|
||||||
|
warnedUserAboutSortOrder = true;
|
||||||
|
Utils.warnUser("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() +
|
||||||
|
" vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals "
|
||||||
|
+ reader.getFileHeader().getAttribute("SO") + " in this case).");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -102,7 +117,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void queryOverlapping(final String contig, final int start, final int stop) {
|
public void queryOverlapping( final String contig, final int start, final int stop ) {
|
||||||
if (initialized) {
|
if (initialized) {
|
||||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||||
}
|
}
|
||||||
|
|
@ -118,7 +133,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void query(final String contig, final int start, final int stop, final boolean contained) {
|
public void query( final String contig, final int start, final int stop, final boolean contained ) {
|
||||||
if (initialized) {
|
if (initialized) {
|
||||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||||
}
|
}
|
||||||
|
|
@ -133,7 +148,22 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void queryContained(final String contig, final int start, final int stop) {
|
public void queryUnmappedReads() {
|
||||||
|
if (initialized) {
|
||||||
|
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||||
|
}
|
||||||
|
final SAMRecordComparator comparator = getComparator();
|
||||||
|
for (final SAMFileReader reader : samHeaderMerger.getReaders()) {
|
||||||
|
Iterator<SAMRecord> recordIter = reader.queryUnmapped();
|
||||||
|
final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator);
|
||||||
|
addIfNotEmpty(iterator);
|
||||||
|
}
|
||||||
|
setInitialized();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void queryContained( final String contig, final int start, final int stop ) {
|
||||||
if (initialized) {
|
if (initialized) {
|
||||||
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2");
|
||||||
}
|
}
|
||||||
|
|
@ -237,7 +267,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
||||||
*
|
*
|
||||||
* @param iterator the iterator to add
|
* @param iterator the iterator to add
|
||||||
*/
|
*/
|
||||||
protected void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
|
protected void addIfNotEmpty( final ComparableSamRecordIterator iterator ) {
|
||||||
//System.out.printf("Adding %s %s %d%n", iterator.peek().getReadName(), iterator.peek().getReferenceName(), iterator.peek().getAlignmentStart());
|
//System.out.printf("Adding %s %s %d%n", iterator.peek().getReadName(), iterator.peek().getReferenceName(), iterator.peek().getAlignmentStart());
|
||||||
if (iterator.hasNext()) {
|
if (iterator.hasNext()) {
|
||||||
pq.offer(iterator);
|
pq.offer(iterator);
|
||||||
|
|
@ -262,11 +292,11 @@ public class MergingSamRecordIterator2 implements CloseableIterator<SAMRecord>,
|
||||||
// For unsorted build a fake comparator that compares based on object ID
|
// For unsorted build a fake comparator that compares based on object ID
|
||||||
if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
|
if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
|
||||||
return new SAMRecordComparator() {
|
return new SAMRecordComparator() {
|
||||||
public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
|
public int fileOrderCompare( final SAMRecord lhs, final SAMRecord rhs ) {
|
||||||
return System.identityHashCode(lhs) - System.identityHashCode(rhs);
|
return System.identityHashCode(lhs) - System.identityHashCode(rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int compare(final SAMRecord lhs, final SAMRecord rhs) {
|
public int compare( final SAMRecord lhs, final SAMRecord rhs ) {
|
||||||
return fileOrderCompare(lhs, rhs);
|
return fileOrderCompare(lhs, rhs);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
@ -345,13 +375,13 @@ class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements
|
||||||
* @param sam the SAM file to read records from
|
* @param sam the SAM file to read records from
|
||||||
* @param comparator the Comparator to use to provide ordering fo SAMRecords
|
* @param comparator the Comparator to use to provide ordering fo SAMRecords
|
||||||
*/
|
*/
|
||||||
public ComparableSamRecordIterator(final SAMFileReader sam, final Comparator<SAMRecord> comparator) {
|
public ComparableSamRecordIterator( final SAMFileReader sam, final Comparator<SAMRecord> comparator ) {
|
||||||
super(sam.iterator());
|
super(sam.iterator());
|
||||||
this.reader = sam;
|
this.reader = sam;
|
||||||
this.comparator = comparator;
|
this.comparator = comparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ComparableSamRecordIterator(final SAMFileReader sam, Iterator<SAMRecord> iterator, final Comparator<SAMRecord> comparator) {
|
public ComparableSamRecordIterator( final SAMFileReader sam, Iterator<SAMRecord> iterator, final Comparator<SAMRecord> comparator ) {
|
||||||
super(iterator); // use the provided iterator
|
super(iterator); // use the provided iterator
|
||||||
this.reader = sam;
|
this.reader = sam;
|
||||||
this.comparator = comparator;
|
this.comparator = comparator;
|
||||||
|
|
@ -381,7 +411,7 @@ class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements
|
||||||
*
|
*
|
||||||
* @return a negative, 0 or positive number as described in the Comparator interface
|
* @return a negative, 0 or positive number as described in the Comparator interface
|
||||||
*/
|
*/
|
||||||
public int compareTo(final ComparableSamRecordIterator that) {
|
public int compareTo( final ComparableSamRecordIterator that ) {
|
||||||
if (this.comparator.getClass() != that.comparator.getClass()) {
|
if (this.comparator.getClass() != that.comparator.getClass()) {
|
||||||
throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " +
|
throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " +
|
||||||
"have different orderings internally");
|
"have different orderings internally");
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.gatk.iterators;
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -30,14 +29,12 @@ import net.sf.samtools.SAMRecord;
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
* <p/>
|
* <p/>
|
||||||
* Interface Peekable
|
* Class PeekingIterator
|
||||||
* <p/>
|
* <p/>
|
||||||
* This interface indicates that
|
* a peekable interface, that requires a peek() method
|
||||||
*/
|
*/
|
||||||
public interface PeekingStingIterator extends StingSAMIterator {
|
public interface PeekingIterator<T> {
|
||||||
/**
|
|
||||||
* peek, given the specified type
|
/** @return returns a peeked value */
|
||||||
* @return
|
public T peek();
|
||||||
*/
|
|
||||||
SAMRecord peek();
|
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMRecordComparator;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
*
|
||||||
|
* This interface indicates that the iterator is query able
|
||||||
|
*/
|
||||||
|
public interface QueryIterator extends StingSAMIterator, PeekingIterator<SAMRecord> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The required methods to query able
|
||||||
|
**/
|
||||||
|
public void queryOverlapping(final String contig, final int start, final int stop);
|
||||||
|
public void query(final String contig, final int start, final int stop, final boolean contained);
|
||||||
|
public void queryUnmappedReads();
|
||||||
|
public void queryContained(final String contig, final int start, final int stop);
|
||||||
|
}
|
||||||
|
|
@ -50,13 +50,13 @@ import java.math.BigInteger;
|
||||||
*
|
*
|
||||||
* A descriptions should go here. Blame aaron if it's missing.
|
* A descriptions should go here. Blame aaron if it's missing.
|
||||||
*/
|
*/
|
||||||
public class ReadValidationWalker extends ReadWalker<SAMRecord, Integer> {
|
public class ReadValidationWalker extends ReadWalker<SAMRecord, SAMRecord> {
|
||||||
|
|
||||||
// our MD5 sum
|
// our MD5 sum
|
||||||
private MessageDigest m;
|
private MessageDigest m;
|
||||||
|
|
||||||
// private list of md5sums
|
// private list of md5sums
|
||||||
private final List<BigInteger> list = new ArrayList<BigInteger>();
|
private final List<String> list = new ArrayList<String>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The initialize function.
|
* The initialize function.
|
||||||
|
|
@ -94,8 +94,8 @@ public class ReadValidationWalker extends ReadWalker<SAMRecord, Integer> {
|
||||||
* bam file, if it was specified on the command line
|
* bam file, if it was specified on the command line
|
||||||
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
|
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
|
||||||
*/
|
*/
|
||||||
public Integer reduceInit() {
|
public SAMRecord reduceInit() {
|
||||||
return new Integer(0);
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -104,31 +104,16 @@ public class ReadValidationWalker extends ReadWalker<SAMRecord, Integer> {
|
||||||
* @param output the output source
|
* @param output the output source
|
||||||
* @return the SAMFileWriter, so that the next reduce can emit to the same source
|
* @return the SAMFileWriter, so that the next reduce can emit to the same source
|
||||||
*/
|
*/
|
||||||
public Integer reduce( SAMRecord read, Integer output ) {
|
public SAMRecord reduce( SAMRecord read, SAMRecord output ) {
|
||||||
byte[] ray = new byte[read.getReadName().length() + 5];
|
if (output == null)
|
||||||
int x = 0;
|
return read;
|
||||||
for (char c: read.getReadName().toCharArray()) {
|
if ((read.getReferenceIndex() == output.getReferenceIndex()) && (read.getAlignmentStart() < output.getAlignmentStart())) {
|
||||||
ray[x] = (byte)c;
|
System.err.println("saw the read " + read.getReadName() + " duplicated, old alignment = " + output.getAlignmentStart());
|
||||||
//System.err.println("adding " + c + " to pos " + x);
|
|
||||||
x++;
|
|
||||||
}
|
}
|
||||||
//System.err.println(read.getReadName() + " name, alignment = " + read.getAlignmentStart());
|
else if (read.getReferenceIndex() != output.getReferenceIndex()){
|
||||||
int y = 0;
|
System.err.println("Switching Chromo");
|
||||||
for (;y < 4; y++) {
|
|
||||||
ray[x+y] = (byte)((read.getAlignmentStart() >> y * 8) & 0x000f);
|
|
||||||
}
|
}
|
||||||
ray[x+y] = read.getSecondOfPairFlag() ? (byte)1 : (byte)0;
|
return read;
|
||||||
BigInteger bigInt = new BigInteger(m.digest(ray));
|
|
||||||
//System.err.println(bigInt.toString());
|
|
||||||
m.reset();
|
|
||||||
if (this.list.contains(bigInt)) {
|
|
||||||
throw new StingException("Seen Read: " + bigInt + "-> " + read.getReadName() + " before (list size = " + list.size() + ")");
|
|
||||||
}
|
|
||||||
if (read.getAlignmentStart() < output) {
|
|
||||||
throw new StingException("Seen Read " + read.getReadName() + " has alignment of " + read.getAlignmentStart() + " before (list size = " + output + ")");
|
|
||||||
}
|
|
||||||
list.add(bigInt);
|
|
||||||
return read.getAlignmentStart();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,12 @@
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
|
import org.broadinstitute.sting.gatk.iterators.QueryIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import net.sf.picard.util.PeekableIterator;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
|
@ -34,7 +37,7 @@ import java.util.Iterator;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** this fake iterator allows us to look at how specific piles of reads are handled */
|
/** this fake iterator allows us to look at how specific piles of reads are handled */
|
||||||
public class ArtificialSAMIterator implements PeekingStingIterator {
|
public class ArtificialSAMIterator implements StingSAMIterator {
|
||||||
|
|
||||||
|
|
||||||
protected int currentChromo = 0;
|
protected int currentChromo = 0;
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.QueryIterator;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -41,7 +42,7 @@ import org.broadinstitute.sting.utils.StingException;
|
||||||
* to test out classes that use specific itervals. The reads returned will
|
* to test out classes that use specific itervals. The reads returned will
|
||||||
* all lie in order in the specified interval.
|
* all lie in order in the specified interval.
|
||||||
*/
|
*/
|
||||||
public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
|
public class ArtificialSAMQueryIterator extends ArtificialSAMIterator implements QueryIterator {
|
||||||
|
|
||||||
// get the next positon
|
// get the next positon
|
||||||
protected int finalPos = 0;
|
protected int finalPos = 0;
|
||||||
|
|
@ -88,6 +89,39 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
|
||||||
initialize(contig, start, stop);
|
initialize(contig, start, stop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void query( String contig, int start, int stop, boolean contained ) {
|
||||||
|
if (contained)
|
||||||
|
queryContained(contig, start, stop);
|
||||||
|
else
|
||||||
|
queryOverlapping(contig, start, stop);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void queryUnmappedReads() {
|
||||||
|
initializeUnmapped();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* initialize the iterator to an unmapped read position
|
||||||
|
*/
|
||||||
|
public void initializeUnmapped() {
|
||||||
|
ensureUntouched();
|
||||||
|
while (super.hasNext() && this.peek().getReferenceIndex() >= 0) {
|
||||||
|
super.next();
|
||||||
|
}
|
||||||
|
// sanity check that we have an actual matching read next
|
||||||
|
SAMRecord rec = this.peek();
|
||||||
|
if (rec == null) {
|
||||||
|
throw new StingException("The next read doesn't match");
|
||||||
|
}
|
||||||
|
// set the seeked variable to true
|
||||||
|
seeked = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* initialize the query iterator
|
* initialize the query iterator
|
||||||
|
|
@ -141,6 +175,11 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator {
|
||||||
if (rec.getReferenceIndex() != this.contigIndex) {
|
if (rec.getReferenceIndex() != this.contigIndex) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// if we have an unmapped read, matching the contig is good enough for us
|
||||||
|
if (rec.getReferenceIndex() < 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
if (!overlapping) {
|
if (!overlapping) {
|
||||||
// if the start or the end are somewhere within our range
|
// if the start or the end are somewhere within our range
|
||||||
if (( rec.getAlignmentStart() >= startPos && rec.getAlignmentEnd() <= finalPos )) {
|
if (( rec.getAlignmentStart() >= startPos && rec.getAlignmentEnd() <= finalPos )) {
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,7 @@ import net.sf.samtools.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
|
import org.broadinstitute.sting.gatk.iterators.QueryIterator;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -103,6 +102,51 @@ public class ArtificialSAMUtils {
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* setup a default read group for a SAMFileHeader
|
||||||
|
*
|
||||||
|
* @param header the header to set
|
||||||
|
* @param readGroupID the read group ID tag
|
||||||
|
* @param sampleName the sample name
|
||||||
|
*
|
||||||
|
* @return the adjusted SAMFileHeader
|
||||||
|
*/
|
||||||
|
public static SAMFileHeader createDefaultReadGroup( SAMFileHeader header, String readGroupID, String sampleName ) {
|
||||||
|
SAMReadGroupRecord rec = new SAMReadGroupRecord(readGroupID);
|
||||||
|
rec.setSample(sampleName);
|
||||||
|
List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
|
||||||
|
readGroups.add(rec);
|
||||||
|
header.setReadGroups(readGroups);
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* setup read groups for the specified read groups and sample names
|
||||||
|
*
|
||||||
|
* @param header the header to set
|
||||||
|
* @param readGroupIDs the read group ID tags
|
||||||
|
* @param sampleNames the sample names
|
||||||
|
*
|
||||||
|
* @return the adjusted SAMFileHeader
|
||||||
|
*/
|
||||||
|
public static SAMFileHeader createEnumeratedReadGroups( SAMFileHeader header, List<String> readGroupIDs, List<String> sampleNames ) {
|
||||||
|
if (readGroupIDs.size() != sampleNames.size()) {
|
||||||
|
throw new StingException("read group count and sample name count must be the same");
|
||||||
|
}
|
||||||
|
|
||||||
|
List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
|
||||||
|
|
||||||
|
int x = 0;
|
||||||
|
for (; x < readGroupIDs.size(); x++) {
|
||||||
|
SAMReadGroupRecord rec = new SAMReadGroupRecord(readGroupIDs.get(x));
|
||||||
|
rec.setSample(sampleNames.get(x));
|
||||||
|
readGroups.add(rec);
|
||||||
|
}
|
||||||
|
header.setReadGroups(readGroups);
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
|
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
|
||||||
*
|
*
|
||||||
|
|
@ -115,7 +159,7 @@ public class ArtificialSAMUtils {
|
||||||
* @return the artificial read
|
* @return the artificial read
|
||||||
*/
|
*/
|
||||||
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
|
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
|
||||||
if( alignmentStart == 0 )
|
if (alignmentStart == 0)
|
||||||
throw new StingException("Invalid alignment start for artificial read");
|
throw new StingException("Invalid alignment start for artificial read");
|
||||||
SAMRecord record = new SAMRecord(header);
|
SAMRecord record = new SAMRecord(header);
|
||||||
record.setReadName(name);
|
record.setReadName(name);
|
||||||
|
|
@ -128,20 +172,26 @@ public class ArtificialSAMUtils {
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create an iterator containing the specified read piles
|
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
|
||||||
*
|
*
|
||||||
* @param startingChr the chromosome (reference ID) to start from
|
* @param header the SAM header to associate the read with
|
||||||
* @param endingChr the id to end with
|
* @param name the name of the read
|
||||||
* @param readCount the number of reads per chromosome
|
* @param refIndex the reference index, i.e. what chromosome to associate it with
|
||||||
|
* @param alignmentStart where to start the alignment
|
||||||
|
* @param bases the sequence of the read
|
||||||
|
* @param qual the qualities of the read
|
||||||
*
|
*
|
||||||
* @return StingSAMIterator representing the specified amount of fake data
|
* @return the artificial read
|
||||||
*/
|
*/
|
||||||
public static PeekingStingIterator unmappedReadIterator( int startingChr, int endingChr, int readCount ) {
|
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) {
|
||||||
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
if (bases.length != qual.length) {
|
||||||
|
throw new StingException("Passed in read string is different length then the quality array");
|
||||||
return new ArtificialSAMIterator(startingChr, endingChr, readCount, header);
|
}
|
||||||
|
SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
|
||||||
|
rec.setReadBases(bases);
|
||||||
|
rec.setBaseQualities(bases);
|
||||||
|
return rec;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -150,14 +200,29 @@ public class ArtificialSAMUtils {
|
||||||
* @param startingChr the chromosome (reference ID) to start from
|
* @param startingChr the chromosome (reference ID) to start from
|
||||||
* @param endingChr the id to end with
|
* @param endingChr the id to end with
|
||||||
* @param readCount the number of reads per chromosome
|
* @param readCount the number of reads per chromosome
|
||||||
|
*
|
||||||
|
* @return StingSAMIterator representing the specified amount of fake data
|
||||||
|
*/
|
||||||
|
public static QueryIterator unmappedReadIterator( int startingChr, int endingChr, int readCount ) {
|
||||||
|
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||||
|
|
||||||
|
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create an iterator containing the specified read piles
|
||||||
|
*
|
||||||
|
* @param startingChr the chromosome (reference ID) to start from
|
||||||
|
* @param endingChr the id to end with
|
||||||
|
* @param readCount the number of reads per chromosome
|
||||||
* @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
|
* @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
|
||||||
*
|
*
|
||||||
* @return StingSAMIterator representing the specified amount of fake data
|
* @return StingSAMIterator representing the specified amount of fake data
|
||||||
*/
|
*/
|
||||||
public static PeekingStingIterator unmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
|
public static QueryIterator unmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) {
|
||||||
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||||
|
|
||||||
return new ArtificialSAMIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
|
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -178,9 +243,9 @@ public class ArtificialSAMUtils {
|
||||||
/**
|
/**
|
||||||
* create an ArtificialSAMQueryIterator containing the specified read piles
|
* create an ArtificialSAMQueryIterator containing the specified read piles
|
||||||
*
|
*
|
||||||
* @param startingChr the chromosome (reference ID) to start from
|
* @param startingChr the chromosome (reference ID) to start from
|
||||||
* @param endingChr the id to end with
|
* @param endingChr the id to end with
|
||||||
* @param readCount the number of reads per chromosome
|
* @param readCount the number of reads per chromosome
|
||||||
* @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
|
* @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file
|
||||||
*
|
*
|
||||||
* @return StingSAMIterator representing the specified amount of fake data
|
* @return StingSAMIterator representing the specified amount of fake data
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources;
|
||||||
import static junit.framework.Assert.fail;
|
import static junit.framework.Assert.fail;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||||
|
|
@ -84,7 +83,7 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
SAMDataSource data = new SAMDataSource(reads,true);
|
SAMDataSource data = new SAMDataSource(reads,true);
|
||||||
for (int x = 0; x < 10; x++) {
|
for (int x = 0; x < 10; x++) {
|
||||||
++iterations;
|
++iterations;
|
||||||
PeekingStingIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 10, 1000);
|
QueryIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 10, 1000);
|
||||||
BoundedReadIterator ret = data.toUnmappedReads(100, iter);
|
BoundedReadIterator ret = data.toUnmappedReads(100, iter);
|
||||||
// count the reads we've gotten back
|
// count the reads we've gotten back
|
||||||
if (ret == null) {
|
if (ret == null) {
|
||||||
|
|
@ -173,7 +172,7 @@ public class SAMByReadsTest extends BaseTest {
|
||||||
while (shardStrategy.hasNext()) {
|
while (shardStrategy.hasNext()) {
|
||||||
|
|
||||||
|
|
||||||
BoundedReadIterator ret = (BoundedReadIterator)data.seek(shardStrategy.next());
|
StingSAMIterator ret = data.seek(shardStrategy.next());
|
||||||
assertTrue(ret != null);
|
assertTrue(ret != null);
|
||||||
while (ret.hasNext()) {
|
while (ret.hasNext()) {
|
||||||
ret.next();
|
ret.next();
|
||||||
|
|
@ -227,7 +226,7 @@ class ArtificialResourcePool extends SAMIteratorPool {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public StingSAMIterator iterator( GenomeLoc loc ) {
|
public QueryIterator iterator( GenomeLoc loc ) {
|
||||||
ArtificialSAMQueryIterator iter = ArtificialSAMUtils.queryReadIterator(1, 10, 100, 1000);
|
ArtificialSAMQueryIterator iter = ArtificialSAMUtils.queryReadIterator(1, 10, 100, 1000);
|
||||||
if (loc != null) {
|
if (loc != null) {
|
||||||
iter.queryContained(loc.getContig(), (int)loc.getStart(), (int)loc.getStop());
|
iter.queryContained(loc.getContig(), (int)loc.getStart(), (int)loc.getStop());
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator;
|
import org.broadinstitute.sting.gatk.iterators.QueryIterator;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
@ -80,30 +80,5 @@ public class ArtificialSAMUtilsTest extends BaseTest {
|
||||||
assertEquals(1000, count);
|
assertEquals(1000, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testPeeking() {
|
|
||||||
PeekingStingIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 100);
|
|
||||||
int count = 0;
|
|
||||||
while (iter.hasNext()) {
|
|
||||||
int readCnt = ((ArtificialSAMIterator)(iter)).readsTaken();
|
|
||||||
|
|
||||||
// peek the record
|
|
||||||
SAMRecord rec = iter.peek();
|
|
||||||
assertTrue(rec.getReferenceIndex() >= 0);
|
|
||||||
|
|
||||||
// next the record
|
|
||||||
SAMRecord rec2 = iter.next();
|
|
||||||
assertTrue(rec2.getReadName() == rec.getReadName());
|
|
||||||
assertTrue(rec2.getAlignmentStart() == rec.getAlignmentStart());
|
|
||||||
|
|
||||||
// find out how many reads we've taken now
|
|
||||||
int readCnt2 = ((ArtificialSAMIterator)(iter)).readsTaken();
|
|
||||||
|
|
||||||
count++;
|
|
||||||
if (count < 100*100) assertEquals(readCnt + 1, readCnt2);
|
|
||||||
else assertEquals(readCnt, readCnt2);
|
|
||||||
}
|
|
||||||
assertEquals(100 * 100, count );
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue