diff --git a/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java index c5b996ffd..8515a2138 100755 --- a/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/TraversalEngine.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk; import edu.mit.broad.picard.filter.FilteringIterator; import edu.mit.broad.picard.filter.SamRecordFilter; import edu.mit.broad.picard.reference.ReferenceSequence; +import edu.mit.broad.picard.sam.SamFileHeaderMerger; import net.sf.functionalj.Function1; import net.sf.functionalj.FunctionN; import net.sf.functionalj.Functions; @@ -327,6 +328,9 @@ public class TraversalEngine { protected void printOnTraversalDone(final String type, T sum) { printProgress(true, type, null); System.out.println("Traversal reduce result is " + sum); // TODO: fixme -- how do we use this logger? + final long curTime = System.currentTimeMillis(); + final double elapsed = (curTime - startTime) / 1000.0; + logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours%n", elapsed, elapsed / 60, elapsed / 3600)); logger.info(String.format("Traversal skipped %d reads out of %d total (%.2f%%)", nSkippedReads, nReads, (nSkippedReads * 100.0) / nReads)); logger.info(String.format(" -> %d unmapped reads", nUnmappedReads)); logger.info(String.format(" -> %d non-primary reads", nNotPrimary)); @@ -361,6 +365,23 @@ public class TraversalEngine { private Iterator getReadsIterator(final SAMFileReader samReader) { // If the file has an index, querying functions are available. Use them if possible... + if ( samReader == null && readsFile.toString().endsWith(".list") ) { + SAMFileHeader.SortOrder SORT_ORDER = SAMFileHeader.SortOrder.coordinate; + + List readers = new ArrayList(); + try { + for ( String fileName : new xReadLines(readsFile) ) { + SAMFileReader reader = initializeSAMFile(new File(fileName)); + readers.add(reader); + } + + SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers, SORT_ORDER); + return new MergingSamRecordIterator2(headerMerger); + } + catch ( FileNotFoundException e ) { + logger.fatal("Couldn't open file in sam file list: " + readsFile); + } + } if (samReader.hasIndex()) { return new SamQueryIterator(samReader, locs); } else { @@ -386,13 +407,19 @@ public class TraversalEngine { } private SAMFileReader initializeSAMFile(final File samFile) { - SAMFileReader samReader = new SAMFileReader(samFile, true); - samReader.setValidationStringency(strictness); + // todo: fixme, this is a hack to try out dynamic merging + if ( samFile.toString().endsWith(".list") ) { + return null; + // todo: omg, this is just scary, just it's just for testing purposes. fix with the new DataSource system + } else { + SAMFileReader samReader = new SAMFileReader(samFile, true); + samReader.setValidationStringency(strictness); - final SAMFileHeader header = samReader.getFileHeader(); - logger.info(String.format("Sort order is: " + header.getSortOrder())); + final SAMFileHeader header = samReader.getFileHeader(); + logger.info(String.format("Sort order is: " + header.getSortOrder())); - return samReader; + return samReader; + } } // cleaning up past mistakes diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java new file mode 100644 index 000000000..038cfb5e2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -0,0 +1,240 @@ +/* +* The Broad Institute +* SOFTWARE COPYRIGHT NOTICE AGREEMENT +* This software and its documentation are copyright 2009 by the +* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. +* +* This software is supplied without any warranty or guaranteed support whatsoever. +* Neither the Broad Institute nor MIT can be responsible for its use, misuse, or +* functionality. +*/ +package org.broadinstitute.sting.gatk.iterators; + +import edu.mit.broad.picard.sam.SamFileHeaderMerger; +import edu.mit.broad.picard.sam.ReservedTagConstants; +import edu.mit.broad.picard.PicardException; +import edu.mit.broad.picard.util.PeekableIterator; +import net.sf.samtools.*; + +/* +* The Broad Institute +* SOFTWARE COPYRIGHT NOTICE AGREEMENT +* This software and its documentation are copyright 2009 by the +* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. +* +* This software is supplied without any warranty or guaranteed support whatsoever. +* Neither the Broad Institute nor MIT can be responsible for its use, misuse, or +* functionality. +*/ +import java.util.*; +import java.lang.reflect.Constructor; + +import org.broadinstitute.sting.playground.gatk.iterators.SeekableSamIteration; + +/** + * Provides an iterator interface for merging multiple underlying iterators into a single + * iterable stream. The underlying iterators/files must all have the same sort order unless + * the requested output format is unsorted, in which case any combination is valid. + */ +public class MergingSamRecordIterator2 implements Iterator, SeekableSamIteration { + protected PriorityQueue pq; + protected final SamFileHeaderMerger samHeaderMerger; + protected final SAMFileHeader.SortOrder sortOrder; + + /** + * Constructs a new merging iterator with the same set of readers and sort order as + * provided by the header merger parameter. + */ + public MergingSamRecordIterator2(final SamFileHeaderMerger headerMerger) { + this.samHeaderMerger = headerMerger; + this.sortOrder = headerMerger.getMergedHeader().getSortOrder(); + initializePQ(); + + final SAMRecordComparator comparator = getComparator(); + for (final SAMFileReader reader : samHeaderMerger.getReaders()) { + if (this.sortOrder != SAMFileHeader.SortOrder.unsorted && reader.getFileHeader().getSortOrder() != this.sortOrder){ + throw new PicardException("Files are not compatible with sort order: " + this.sortOrder ); + } + + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, comparator); + addIfNotEmpty(iterator); + } + } + + protected void initializePQ() { + this.pq = new PriorityQueue(samHeaderMerger.getReaders().size()); + } + + public boolean supportsSeeking() { return true; } + + public void queryOverlapping( final String contig, final int start, final int stop ) { + initializePQ(); // reinitialize the system + final SAMRecordComparator comparator = getComparator(); + + for (final SAMFileReader reader : samHeaderMerger.getReaders()) { + Iterator recordIter = reader.queryOverlapping( contig, start, stop); + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator); + addIfNotEmpty(iterator); + } + } + + public void query(final String contig, final int start, final int stop, final boolean contained) { + initializePQ(); // reinitialize the system + final SAMRecordComparator comparator = getComparator(); + for (final SAMFileReader reader : samHeaderMerger.getReaders()) { + Iterator recordIter = reader.query( contig, start, stop, contained); + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator); + addIfNotEmpty(iterator); + } + } + + public void queryContained(final String contig, final int start, final int stop) { + initializePQ(); // reinitialize the system + final SAMRecordComparator comparator = getComparator(); + for (final SAMFileReader reader : samHeaderMerger.getReaders()) { + Iterator recordIter = reader.queryContained( contig, start, stop ); + final ComparableSamRecordIterator iterator = new ComparableSamRecordIterator(reader, recordIter, comparator); + addIfNotEmpty(iterator); + } + } + + /** Returns true if any of the underlying iterators has more records, otherwise false. */ + public boolean hasNext() { + return !this.pq.isEmpty(); + } + + /** Returns the next record from the top most iterator during merging. */ + public SAMRecord next() { + final ComparableSamRecordIterator iterator = this.pq.poll(); + final SAMRecord record = iterator.next(); + addIfNotEmpty(iterator); + + if (this.samHeaderMerger.hasGroupIdDuplicates()) { + final String id = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID); + final String newId = this.samHeaderMerger.getReadGroupId(iterator.getReader(), id); + record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newId); + } + final String oldProgramGroupId = (String) record.getAttribute(SAMTag.PG.toString()); + if (oldProgramGroupId != null) { + final String newProgramGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader(), oldProgramGroupId); + record.setAttribute(SAMTag.PG.toString(), newProgramGroupId); + } + + //System.out.printf("NEXT = %s %s %d%n", record.getReadName(), record.getReferenceName(), record.getAlignmentStart()); + //System.out.printf("PEEK = %s %s %d%n", this.pq.peek().peek().getReadName(), this.pq.peek().peek().getReferenceName(), this.pq.peek().peek().getAlignmentStart()); + return record; + } + + /** + * Adds iterator to priority queue. If the iterator has more records it is added + * otherwise it is closed and not added. + */ + protected void addIfNotEmpty(final ComparableSamRecordIterator iterator) { + //System.out.printf("Adding %s %s %d%n", iterator.peek().getReadName(), iterator.peek().getReferenceName(), iterator.peek().getAlignmentStart()); + if (iterator.hasNext()) { + pq.offer(iterator); + } + else { + iterator.close(); + } + } + + /** Unsupported operation. */ + public void remove() { + throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()"); + } + + /** + * Get the right comparator for a given sort order (coordinate, alphabetic). In the + * case of "unsorted" it will return a comparator that gives an arbitrary but reflexive + * ordering. + */ + protected SAMRecordComparator getComparator() { + // For unsorted build a fake comparator that compares based on object ID + if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) { + return new SAMRecordComparator() { + public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) { + return System.identityHashCode(lhs) - System.identityHashCode(rhs); + } + + public int compare(final SAMRecord lhs, final SAMRecord rhs) { + return fileOrderCompare(lhs, rhs); + } + }; + } + + // Otherwise try and figure out what kind of comparator to return and build it + final Class type = this.sortOrder.getComparator(); + + try { + final Constructor ctor = type.getConstructor(SAMFileHeader.class); + //System.out.printf("Getting comparator %s%n", ctor.toGenericString()); + return ctor.newInstance(this.samHeaderMerger.getMergedHeader()); + } + catch (Exception e) { + try { + final Constructor ctor = type.getConstructor(); + return ctor.newInstance(); + } + catch (Exception e2) { + throw new PicardException("Could not instantiate a comparator for sort order: " + this.sortOrder, e2); + } + } + } + + /** Returns the merged header that the merging iterator is working from. */ + public SAMFileHeader getMergedHeader() { + return this.samHeaderMerger.getMergedHeader(); + } +} + +// Should replace picard class with the same name +class ComparableSamRecordIterator extends PeekableIterator implements Comparable { + private final Comparator comparator; + private final SAMFileReader reader; + + /** + * Constructs an iterator for iteration over the supplied SAM file that will be + * able to compare itself to other ComparableSAMRecordIterator instances using + * the supplied comparator for ordering SAMRecords. + * + * @param sam the SAM file to read records from + * @param comparator the Comparator to use to provide ordering fo SAMRecords + */ + public ComparableSamRecordIterator(final SAMFileReader sam, final Comparator comparator) { + super(sam.iterator()); + this.reader = sam; + this.comparator = comparator; + } + + public ComparableSamRecordIterator(final SAMFileReader sam, Iterator iterator, final Comparator comparator) { + super(iterator); // use the provided iterator + this.reader = sam; + this.comparator = comparator; + } + + /** Returns the reader from which this iterator was constructed. */ + public SAMFileReader getReader() { + return reader; + } + + /** + * Compares this iterator to another comparable iterator based on the next record + * available in each iterator. If the two comparable iterators have different + * comparator types internally an exception is thrown. + * + * @param that another iterator to compare to + * @return a negative, 0 or positive number as described in the Comparator interface + */ + public int compareTo(final ComparableSamRecordIterator that) { + if (this.comparator.getClass() != that.comparator.getClass()) { + throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " + + "have different orderings internally"); + } + + final SAMRecord record = this.peek(); + final SAMRecord record2 = that.peek(); + //System.out.printf("Comparing %s vs. %s => %d%n", record.getReadName(), record2.getReadName(), comparator.compare(record, record2)); + return comparator.compare(record, record2); + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/iterators/SeekableSamIteration.java b/java/src/org/broadinstitute/sting/playground/gatk/iterators/SeekableSamIteration.java new file mode 100644 index 000000000..fba934d02 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/iterators/SeekableSamIteration.java @@ -0,0 +1,21 @@ +package org.broadinstitute.sting.playground.gatk.iterators; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecordComparator; + +import java.util.Iterator; + +/** + * Created by IntelliJ IDEA. + * User: mdepristo + * Date: Mar 26, 2009 + * Time: 9:27:59 AM + * To change this template use File | Settings | File Templates. + */ +public interface SeekableSamIteration { + public boolean supportsSeeking(); + public void queryOverlapping( final String contig, final int start, final int stop ); + public void query(final String contig, final int start, final int stop, final boolean contained); + public void queryContained(final String contig, final int start, final int stop); +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/iterators/SeekableSamIterator.java b/java/src/org/broadinstitute/sting/playground/gatk/iterators/SeekableSamIterator.java new file mode 100755 index 000000000..54937dd8b --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/iterators/SeekableSamIterator.java @@ -0,0 +1,43 @@ +package org.broadinstitute.sting.playground.gatk.iterators; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileReader; + +import java.util.Iterator; + +import org.broadinstitute.sting.playground.gatk.iterators.SeekableSamIteration; + +/** + * Created by IntelliJ IDEA. + * User: depristo + * Date: Feb 24, 2009 + * Time: 10:24:38 AM + * To change this template use File | Settings | File Templates. + */ +public class SeekableSamIterator implements Iterator, SeekableSamIteration { + protected Iterator it; + protected SAMFileReader reader; + + public SeekableSamIterator(Iterator it, SAMFileReader reader) { + this.it = it; + this.reader = reader; + } + + public boolean supportsSeeking() { return true; } + + public void queryOverlapping( final String contig, final int start, final int stop ) { + this.it = reader.queryOverlapping( contig, start, stop ); + } + + public void query(final String contig, final int start, final int stop, final boolean contained) { + this.it = reader.query( contig, start, stop, contained ); + } + + public void queryContained(final String contig, final int start, final int stop) { + this.it = reader.queryContained( contig, start, stop ); + } + + public boolean hasNext() { return it.hasNext(); } + public SAMRecord next() { return it.next(); } + public void remove () { it.remove(); } +} \ No newline at end of file