diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index b3085a523..bb278ba54 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import net.sf.picard.sam.SamFileHeaderMerger; -import net.sf.picard.util.PeekableIterator; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMReadGroupRecord; @@ -21,24 +20,43 @@ import java.util.ArrayList; import java.util.List; import java.util.Iterator; +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + /** * User: aaron * Date: Mar 26, 2009 * Time: 2:36:16 PM - *

- * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - *

- * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + * Converts shards to SAM iterators over the specified region */ public class SAMDataSource implements SimpleDataSource { /** Backing support for reads. */ - private Reads reads = null; + private final Reads reads; /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(SAMDataSource.class); @@ -62,7 +80,7 @@ public class SAMDataSource implements SimpleDataSource { /** * constructor, given sam files * - * @param reads the list of sam files + * @param reads the list of sam files * @param byReads are we a by reads traversal, or a loci traversal. We could delete this field * if we passed in iterGen, which would be a better (although more complicated for the * consumers of SAMDataSources). @@ -79,11 +97,12 @@ public class SAMDataSource implements SimpleDataSource { throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + smFile.getName()); } } - iteratorPool = new SAMIteratorPool(reads,byReads); + iteratorPool = new SAMIteratorPool(reads, byReads); } /** * For unit testing, add a custom iterator pool. + * * @param iteratorPool Custom mock iterator pool. */ void setResourcePool( SAMIteratorPool iteratorPool ) { @@ -100,7 +119,7 @@ public class SAMDataSource implements SimpleDataSource { * * @return an iterator for that region */ - public StingSAMIterator seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException { + public StingSAMIterator seekLocus( GenomeLoc location ) throws SimpleDataSourceLoadException { return iteratorPool.iterator(location); } @@ -180,7 +199,7 @@ public class SAMDataSource implements SimpleDataSource { iter.close(); } iter = iteratorPool.iterator(null); - bound = toUnmappedReads(shard.getSize(), iter); + bound = toUnmappedReads(shard.getSize(), (QueryIterator) iter); } if (bound == null) { shard.signalDone(); @@ -211,41 +230,24 @@ public class SAMDataSource implements SimpleDataSource { * @return the bounded iterator that you can use to get the intervaled reads from * @throws SimpleDataSourceLoadException */ - BoundedReadIterator toUnmappedReads( long readCount, StingSAMIterator iter ) throws SimpleDataSourceLoadException { - PeekableIterator peekable = new PeekableIterator(iter); + BoundedReadIterator toUnmappedReads( long readCount, QueryIterator iter ) throws SimpleDataSourceLoadException { + iter.queryUnmappedReads(); int count = 0; - int cnt = 0; - SAMRecord d = null; - while (peekable.hasNext()) { - d = peekable.peek(); - int x = d.getReferenceIndex(); - if (x < 0) - // we have the magic read that starts the unmapped read segment! - break; - cnt++; - peekable.next(); - } - - // check to see what happened, did we run out of reads? - if (!peekable.hasNext()) { - return null; - } - // now walk until we've taken the unmapped read count - while (peekable.hasNext() && count < this.readsTaken) { - peekable.next(); + while (iter.hasNext() && count < this.readsTaken) { + iter.next(); count++; } // check to see what happened, did we run out of reads? - if (!peekable.hasNext()) { + if (!iter.hasNext()) { return null; } // we're not out of unmapped reads, so increment our read cout this.readsTaken += readCount; - return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, peekable), readCount); + return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount); } @@ -356,77 +358,104 @@ public class SAMDataSource implements SimpleDataSource { } -class SAMIteratorPool extends ResourcePool { - /** - * Source information about the reads. - */ +class SAMIteratorPool extends ResourcePool { + /** Source information about the reads. */ protected Reads reads; - /** - * Is this a by-reads traversal or a by-locus? - */ + /** Is this a by-reads traversal or a by-locus? */ protected boolean byReads; - /** - * File header for the combined file. - */ + /** File header for the combined file. */ protected SAMFileHeader header; /** our log, which we want to capture anything from this class */ - protected static Logger logger = Logger.getLogger(SAMIteratorPool.class); + protected static Logger logger = Logger.getLogger(SAMIteratorPool.class); public SAMIteratorPool( Reads reads, boolean byReads ) { this.reads = reads; this.byReads = byReads; - SamFileHeaderMerger merger = createNewResource( null ); + SamFileHeaderMerger merger = createNewResource(null); this.header = merger.getMergedHeader(); // Add this resource to the pool. - this.addNewResource( merger ); + this.addNewResource(merger); } - /** - * Get the combined header for all files in the iterator pool. - */ + /** Get the combined header for all files in the iterator pool. */ public SAMFileHeader getHeader() { return header; } - protected SamFileHeaderMerger selectBestExistingResource( GenomeLoc position, List mergers) { - if( mergers.size() == 0 ) + protected SamFileHeaderMerger selectBestExistingResource( GenomeLoc position, List mergers ) { + if (mergers.size() == 0) return null; return mergers.get(0); } protected SamFileHeaderMerger createNewResource( GenomeLoc position ) { - return createHeaderMerger( reads, SAMFileHeader.SortOrder.coordinate ); + return createHeaderMerger(reads, SAMFileHeader.SortOrder.coordinate); } - protected StingSAMIterator createIteratorFromResource( GenomeLoc loc, SamFileHeaderMerger headerMerger ) { + protected QueryIterator createIteratorFromResource( GenomeLoc loc, SamFileHeaderMerger headerMerger ) { final MergingSamRecordIterator2 iterator = new MergingSamRecordIterator2(headerMerger, reads); - if( loc != null ) { + if (loc != null) { if (byReads) iterator.queryContained(loc.getContig(), (int) loc.getStart(), (int) loc.getStop()); else iterator.queryOverlapping(loc.getContig(), (int) loc.getStart(), (int) loc.getStop()); } - return new StingSAMIterator() { - public Reads getSourceInfo() { return reads; } + return new QueryIterator() { + public Reads getSourceInfo() { + return reads; + } + public void close() { iterator.close(); release(this); } - public Iterator iterator() { return this; } - public boolean hasNext() { return iterator.hasNext(); } - public SAMRecord next() { return iterator.next(); } - public void remove() { throw new UnsupportedOperationException("Can't remove from a StingSAMIterator"); } + + public Iterator iterator() { + return this; + } + + public boolean hasNext() { + return iterator.hasNext(); + } + + public SAMRecord next() { + return iterator.next(); + } + + public void remove() { + throw new UnsupportedOperationException("Can't remove from a StingSAMIterator"); + } + + public SAMRecord peek() { + return iterator.peek(); + } + + public void queryOverlapping( String contig, int start, int stop ) { + iterator.queryOverlapping(contig, start, stop); + } + + public void query( String contig, int start, int stop, boolean contained ) { + iterator.query(contig, start, stop, contained); + } + + public void queryUnmappedReads() { + iterator.queryUnmappedReads(); + } + + public void queryContained( String contig, int start, int stop ) { + iterator.queryContained(contig, start, stop); + } }; } protected void closeResource( SamFileHeaderMerger resource ) { - for( SAMFileReader reader: resource.getReaders() ) + for (SAMFileReader reader : resource.getReaders()) reader.close(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java index 3c55d71be..c3bdc5737 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -34,6 +34,7 @@ import net.sf.samtools.util.CloseableIterator; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.Utils; import java.lang.reflect.Constructor; import java.util.Comparator; @@ -46,7 +47,7 @@ import java.util.PriorityQueue; * iterable stream. The underlying iterators/files must all have the same sort order unless * the requested output format is unsorted, in which case any combination is valid. */ -public class MergingSamRecordIterator2 implements CloseableIterator, Iterable, PeekingStingIterator { +public class MergingSamRecordIterator2 implements CloseableIterator, Iterable, QueryIterator { protected PriorityQueue pq = null; protected final SamFileHeaderMerger samHeaderMerger; protected final SAMFileHeader.SortOrder sortOrder; @@ -54,6 +55,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, private SAMRecord mNextRecord; protected boolean initialized = false; protected final Reads reads; + protected boolean warnedUserAboutSortOrder = false; // so we only warn the user once /** * Constructs a new merging iterator with the same set of readers and sort order as @@ -62,7 +64,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, * @param headerMerger the header to merge * @param reads the reads pile */ - public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger, Reads reads) { + public MergingSamRecordIterator2( final SamFileHeaderMerger headerMerger, Reads reads ) { this.samHeaderMerger = headerMerger; this.reads = reads; this.sortOrder = headerMerger.getMergedHeader().getSortOrder(); @@ -90,11 +92,24 @@ public class MergingSamRecordIterator2 implements CloseableIterator, } - private void checkSortOrder(SAMFileReader reader) { + /** + * verify the sort order + * + * @param reader the reader to check + */ + private void checkSortOrder( SAMFileReader reader ) { if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder) { - throw new PicardException("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() + - " vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals " - + reader.getFileHeader().getAttribute("SO") + " in this case)."); + if (reads.getSafetyChecking()) { + throw new PicardException("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() + + " vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals " + + reader.getFileHeader().getAttribute("SO") + " in this case)."); + } else if(!warnedUserAboutSortOrder) { + warnedUserAboutSortOrder = true; + Utils.warnUser("Files are not compatible with sort order: " + reader.getFileHeader().getSortOrder() + + " vrs " + this.sortOrder + ". Make sure that the SO flag in your bam file is set (The reader attribute for sort order equals " + + reader.getFileHeader().getAttribute("SO") + " in this case)."); + } + } } @@ -102,7 +117,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, return true; } - public void queryOverlapping(final String contig, final int start, final int stop) { + public void queryOverlapping( final String contig, final int start, final int stop ) { if (initialized) { throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); } @@ -118,7 +133,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, } - public void query(final String contig, final int start, final int stop, final boolean contained) { + public void query( final String contig, final int start, final int stop, final boolean contained ) { if (initialized) { throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); } @@ -133,7 +148,22 @@ public class MergingSamRecordIterator2 implements CloseableIterator, } - public void queryContained(final String contig, final int start, final int stop) { + public void queryUnmappedReads() { + if (initialized) { + throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); + } + final SAMRecordComparator comparator = getComparator(); + for (final SAMFileReader reader : samHeaderMerger.getReaders()) { + Iterator recordIter = reader.queryUnmapped(); + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator); + addIfNotEmpty(iterator); + } + setInitialized(); + + } + + + public void queryContained( final String contig, final int start, final int stop ) { if (initialized) { throw new IllegalStateException("You cannot double initialize a MergingSamRecordIterator2"); } @@ -237,7 +267,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, * * @param iterator the iterator to add */ - protected void addIfNotEmpty(final ComparableSamRecordIterator iterator) { + protected void addIfNotEmpty( final ComparableSamRecordIterator iterator ) { //System.out.printf("Adding %s %s %d%n", iterator.peek().getReadName(), iterator.peek().getReferenceName(), iterator.peek().getAlignmentStart()); if (iterator.hasNext()) { pq.offer(iterator); @@ -262,11 +292,11 @@ public class MergingSamRecordIterator2 implements CloseableIterator, // For unsorted build a fake comparator that compares based on object ID if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) { return new SAMRecordComparator() { - public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) { + public int fileOrderCompare( final SAMRecord lhs, final SAMRecord rhs ) { return System.identityHashCode(lhs) - System.identityHashCode(rhs); } - public int compare(final SAMRecord lhs, final SAMRecord rhs) { + public int compare( final SAMRecord lhs, final SAMRecord rhs ) { return fileOrderCompare(lhs, rhs); } }; @@ -345,13 +375,13 @@ class ComparableSamRecordIterator extends PeekableIterator implements * @param sam the SAM file to read records from * @param comparator the Comparator to use to provide ordering fo SAMRecords */ - public ComparableSamRecordIterator(final SAMFileReader sam, final Comparator comparator) { + public ComparableSamRecordIterator( final SAMFileReader sam, final Comparator comparator ) { super(sam.iterator()); this.reader = sam; this.comparator = comparator; } - public ComparableSamRecordIterator(final SAMFileReader sam, Iterator iterator, final Comparator comparator) { + public ComparableSamRecordIterator( final SAMFileReader sam, Iterator iterator, final Comparator comparator ) { super(iterator); // use the provided iterator this.reader = sam; this.comparator = comparator; @@ -381,7 +411,7 @@ class ComparableSamRecordIterator extends PeekableIterator implements * * @return a negative, 0 or positive number as described in the Comparator interface */ - public int compareTo(final ComparableSamRecordIterator that) { + public int compareTo( final ComparableSamRecordIterator that ) { if (this.comparator.getClass() != that.comparator.getClass()) { throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " + "have different orderings internally"); diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/PeekingStingIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/PeekingIterator.java similarity index 82% rename from java/src/org/broadinstitute/sting/gatk/iterators/PeekingStingIterator.java rename to java/src/org/broadinstitute/sting/gatk/iterators/PeekingIterator.java index 8abf05409..60c62aedb 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/PeekingStingIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/PeekingIterator.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.gatk.iterators; -import net.sf.samtools.SAMRecord; /* * Copyright (c) 2009 The Broad Institute @@ -30,14 +29,12 @@ import net.sf.samtools.SAMRecord; /** * @author aaron *

- * Interface Peekable + * Class PeekingIterator *

- * This interface indicates that + * a peekable interface, that requires a peek() method */ -public interface PeekingStingIterator extends StingSAMIterator { - /** - * peek, given the specified type - * @return - */ - SAMRecord peek(); +public interface PeekingIterator { + + /** @return returns a peeked value */ + public T peek(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java new file mode 100644 index 000000000..a4375b68e --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/iterators/QueryIterator.java @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.gatk.iterators; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMRecordComparator; +import net.sf.samtools.SAMFileReader; + +import java.util.Iterator; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author aaron + * + * This interface indicates that the iterator is query able + */ +public interface QueryIterator extends StingSAMIterator, PeekingIterator { + + /** + * The required methods to query able + **/ + public void queryOverlapping(final String contig, final int start, final int stop); + public void query(final String contig, final int start, final int stop, final boolean contained); + public void queryUnmappedReads(); + public void queryContained(final String contig, final int start, final int stop); +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadValidationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadValidationWalker.java index 533b69d93..0de487922 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadValidationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadValidationWalker.java @@ -50,13 +50,13 @@ import java.math.BigInteger; * * A descriptions should go here. Blame aaron if it's missing. */ -public class ReadValidationWalker extends ReadWalker { +public class ReadValidationWalker extends ReadWalker { // our MD5 sum private MessageDigest m; // private list of md5sums - private final List list = new ArrayList(); + private final List list = new ArrayList(); /** * The initialize function. @@ -94,8 +94,8 @@ public class ReadValidationWalker extends ReadWalker { * bam file, if it was specified on the command line * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise */ - public Integer reduceInit() { - return new Integer(0); + public SAMRecord reduceInit() { + return null; } /** @@ -104,31 +104,16 @@ public class ReadValidationWalker extends ReadWalker { * @param output the output source * @return the SAMFileWriter, so that the next reduce can emit to the same source */ - public Integer reduce( SAMRecord read, Integer output ) { - byte[] ray = new byte[read.getReadName().length() + 5]; - int x = 0; - for (char c: read.getReadName().toCharArray()) { - ray[x] = (byte)c; - //System.err.println("adding " + c + " to pos " + x); - x++; + public SAMRecord reduce( SAMRecord read, SAMRecord output ) { + if (output == null) + return read; + if ((read.getReferenceIndex() == output.getReferenceIndex()) && (read.getAlignmentStart() < output.getAlignmentStart())) { + System.err.println("saw the read " + read.getReadName() + " duplicated, old alignment = " + output.getAlignmentStart()); } - //System.err.println(read.getReadName() + " name, alignment = " + read.getAlignmentStart()); - int y = 0; - for (;y < 4; y++) { - ray[x+y] = (byte)((read.getAlignmentStart() >> y * 8) & 0x000f); + else if (read.getReferenceIndex() != output.getReferenceIndex()){ + System.err.println("Switching Chromo"); } - ray[x+y] = read.getSecondOfPairFlag() ? (byte)1 : (byte)0; - BigInteger bigInt = new BigInteger(m.digest(ray)); - //System.err.println(bigInt.toString()); - m.reset(); - if (this.list.contains(bigInt)) { - throw new StingException("Seen Read: " + bigInt + "-> " + read.getReadName() + " before (list size = " + list.size() + ")"); - } - if (read.getAlignmentStart() < output) { - throw new StingException("Seen Read " + read.getReadName() + " has alignment of " + read.getAlignmentStart() + " before (list size = " + output + ")"); - } - list.add(bigInt); - return read.getAlignmentStart(); + return read; } diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java index 41d3cd62a..4f9d12bbf 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMIterator.java @@ -1,9 +1,12 @@ package org.broadinstitute.sting.utils.sam; -import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator; +import org.broadinstitute.sting.gatk.iterators.QueryIterator; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.Reads; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.util.CloseableIterator; +import net.sf.picard.util.PeekableIterator; import java.util.Iterator; @@ -34,7 +37,7 @@ import java.util.Iterator; */ /** this fake iterator allows us to look at how specific piles of reads are handled */ -public class ArtificialSAMIterator implements PeekingStingIterator { +public class ArtificialSAMIterator implements StingSAMIterator { protected int currentChromo = 0; diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java index fdf7771a3..a9ff96a62 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMQueryIterator.java @@ -7,6 +7,7 @@ import net.sf.samtools.SAMRecord; import java.util.List; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.gatk.iterators.QueryIterator; /* @@ -41,7 +42,7 @@ import org.broadinstitute.sting.utils.StingException; * to test out classes that use specific itervals. The reads returned will * all lie in order in the specified interval. */ -public class ArtificialSAMQueryIterator extends ArtificialSAMIterator { +public class ArtificialSAMQueryIterator extends ArtificialSAMIterator implements QueryIterator { // get the next positon protected int finalPos = 0; @@ -88,6 +89,39 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator { initialize(contig, start, stop); } + @Override + public void query( String contig, int start, int stop, boolean contained ) { + if (contained) + queryContained(contig, start, stop); + else + queryOverlapping(contig, start, stop); + } + + @Override + public void queryUnmappedReads() { + initializeUnmapped(); + } + + + /** + * initialize the iterator to an unmapped read position + */ + public void initializeUnmapped() { + ensureUntouched(); + while (super.hasNext() && this.peek().getReferenceIndex() >= 0) { + super.next(); + } + // sanity check that we have an actual matching read next + SAMRecord rec = this.peek(); + if (rec == null) { + throw new StingException("The next read doesn't match"); + } + // set the seeked variable to true + seeked = true; + } + + + /** * initialize the query iterator @@ -124,7 +158,7 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator { // sanity check that we have an actual matching read next SAMRecord rec = this.peek(); if (!matches(rec)) { - throw new StingException("The next read doesn't match"); + throw new StingException("The next read doesn't match"); } // set the seeked variable to true seeked = true; @@ -141,6 +175,11 @@ public class ArtificialSAMQueryIterator extends ArtificialSAMIterator { if (rec.getReferenceIndex() != this.contigIndex) { return false; } + // if we have an unmapped read, matching the contig is good enough for us + if (rec.getReferenceIndex() < 0) { + return true; + } + if (!overlapping) { // if the start or the end are somewhere within our range if (( rec.getAlignmentStart() >= startPos && rec.getAlignmentEnd() <= finalPos )) { diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index 38704d138..6c9032909 100755 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -5,8 +5,7 @@ import net.sf.samtools.*; import java.io.File; import java.util.*; -import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator; -import org.broadinstitute.sting.gatk.Reads; +import org.broadinstitute.sting.gatk.iterators.QueryIterator; import org.broadinstitute.sting.utils.StingException; /** @@ -103,6 +102,51 @@ public class ArtificialSAMUtils { return header; } + /** + * setup a default read group for a SAMFileHeader + * + * @param header the header to set + * @param readGroupID the read group ID tag + * @param sampleName the sample name + * + * @return the adjusted SAMFileHeader + */ + public static SAMFileHeader createDefaultReadGroup( SAMFileHeader header, String readGroupID, String sampleName ) { + SAMReadGroupRecord rec = new SAMReadGroupRecord(readGroupID); + rec.setSample(sampleName); + List readGroups = new ArrayList(); + readGroups.add(rec); + header.setReadGroups(readGroups); + return header; + } + + /** + * setup read groups for the specified read groups and sample names + * + * @param header the header to set + * @param readGroupIDs the read group ID tags + * @param sampleNames the sample names + * + * @return the adjusted SAMFileHeader + */ + public static SAMFileHeader createEnumeratedReadGroups( SAMFileHeader header, List readGroupIDs, List sampleNames ) { + if (readGroupIDs.size() != sampleNames.size()) { + throw new StingException("read group count and sample name count must be the same"); + } + + List readGroups = new ArrayList(); + + int x = 0; + for (; x < readGroupIDs.size(); x++) { + SAMReadGroupRecord rec = new SAMReadGroupRecord(readGroupIDs.get(x)); + rec.setSample(sampleNames.get(x)); + readGroups.add(rec); + } + header.setReadGroups(readGroups); + return header; + } + + /** * Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read * @@ -115,7 +159,7 @@ public class ArtificialSAMUtils { * @return the artificial read */ public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) { - if( alignmentStart == 0 ) + if (alignmentStart == 0) throw new StingException("Invalid alignment start for artificial read"); SAMRecord record = new SAMRecord(header); record.setReadName(name); @@ -128,20 +172,26 @@ public class ArtificialSAMUtils { return record; } - /** - * create an iterator containing the specified read piles + * Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read * - * @param startingChr the chromosome (reference ID) to start from - * @param endingChr the id to end with - * @param readCount the number of reads per chromosome + * @param header the SAM header to associate the read with + * @param name the name of the read + * @param refIndex the reference index, i.e. what chromosome to associate it with + * @param alignmentStart where to start the alignment + * @param bases the sequence of the read + * @param qual the qualities of the read * - * @return StingSAMIterator representing the specified amount of fake data + * @return the artificial read */ - public static PeekingStingIterator unmappedReadIterator( int startingChr, int endingChr, int readCount ) { - SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); - - return new ArtificialSAMIterator(startingChr, endingChr, readCount, header); + public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) { + if (bases.length != qual.length) { + throw new StingException("Passed in read string is different length then the quality array"); + } + SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length); + rec.setReadBases(bases); + rec.setBaseQualities(bases); + return rec; } /** @@ -150,14 +200,29 @@ public class ArtificialSAMUtils { * @param startingChr the chromosome (reference ID) to start from * @param endingChr the id to end with * @param readCount the number of reads per chromosome + * + * @return StingSAMIterator representing the specified amount of fake data + */ + public static QueryIterator unmappedReadIterator( int startingChr, int endingChr, int readCount ) { + SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); + + return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header); + } + + /** + * create an iterator containing the specified read piles + * + * @param startingChr the chromosome (reference ID) to start from + * @param endingChr the id to end with + * @param readCount the number of reads per chromosome * @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file * * @return StingSAMIterator representing the specified amount of fake data */ - public static PeekingStingIterator unmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) { + public static QueryIterator unmappedReadIterator( int startingChr, int endingChr, int readCount, int unmappedReadCount ) { SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); - return new ArtificialSAMIterator(startingChr, endingChr, readCount, unmappedReadCount, header); + return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header); } /** @@ -171,16 +236,16 @@ public class ArtificialSAMUtils { */ public static ArtificialSAMQueryIterator queryReadIterator( int startingChr, int endingChr, int readCount ) { SAMFileHeader header = createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); - + return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, 0, header); } /** * create an ArtificialSAMQueryIterator containing the specified read piles * - * @param startingChr the chromosome (reference ID) to start from - * @param endingChr the id to end with - * @param readCount the number of reads per chromosome + * @param startingChr the chromosome (reference ID) to start from + * @param endingChr the id to end with + * @param readCount the number of reads per chromosome * @param unmappedReadCount the count of unmapped reads to place at the end of the iterator, like in a sorted bam file * * @return StingSAMIterator representing the specified amount of fake data diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java index 07d5b58d6..c45d5bb1e 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMByReadsTest.java @@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.datasources.simpleDataSources; import static junit.framework.Assert.fail; import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; @@ -84,7 +83,7 @@ public class SAMByReadsTest extends BaseTest { SAMDataSource data = new SAMDataSource(reads,true); for (int x = 0; x < 10; x++) { ++iterations; - PeekingStingIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 10, 1000); + QueryIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 10, 1000); BoundedReadIterator ret = data.toUnmappedReads(100, iter); // count the reads we've gotten back if (ret == null) { @@ -173,7 +172,7 @@ public class SAMByReadsTest extends BaseTest { while (shardStrategy.hasNext()) { - BoundedReadIterator ret = (BoundedReadIterator)data.seek(shardStrategy.next()); + StingSAMIterator ret = data.seek(shardStrategy.next()); assertTrue(ret != null); while (ret.hasNext()) { ret.next(); @@ -227,7 +226,7 @@ class ArtificialResourcePool extends SAMIteratorPool { } @Override - public StingSAMIterator iterator( GenomeLoc loc ) { + public QueryIterator iterator( GenomeLoc loc ) { ArtificialSAMQueryIterator iter = ArtificialSAMUtils.queryReadIterator(1, 10, 100, 1000); if (loc != null) { iter.queryContained(loc.getContig(), (int)loc.getStart(), (int)loc.getStop()); diff --git a/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMUtilsTest.java b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMUtilsTest.java index 9768d9258..2275084a7 100644 --- a/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMUtilsTest.java +++ b/java/test/org/broadinstitute/sting/utils/sam/ArtificialSAMUtilsTest.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.sam; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; -import org.broadinstitute.sting.gatk.iterators.PeekingStingIterator; +import org.broadinstitute.sting.gatk.iterators.QueryIterator; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -80,30 +80,5 @@ public class ArtificialSAMUtilsTest extends BaseTest { assertEquals(1000, count); } - @Test - public void testPeeking() { - PeekingStingIterator iter = ArtificialSAMUtils.unmappedReadIterator(1, 100, 100); - int count = 0; - while (iter.hasNext()) { - int readCnt = ((ArtificialSAMIterator)(iter)).readsTaken(); - - // peek the record - SAMRecord rec = iter.peek(); - assertTrue(rec.getReferenceIndex() >= 0); - - // next the record - SAMRecord rec2 = iter.next(); - assertTrue(rec2.getReadName() == rec.getReadName()); - assertTrue(rec2.getAlignmentStart() == rec.getAlignmentStart()); - - // find out how many reads we've taken now - int readCnt2 = ((ArtificialSAMIterator)(iter)).readsTaken(); - - count++; - if (count < 100*100) assertEquals(readCnt + 1, readCnt2); - else assertEquals(readCnt, readCnt2); - } - assertEquals(100 * 100, count ); - - } + }