Added ability to sort reads on the fly
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@83 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0362cb9e59
commit
1aa3958644
|
|
@ -26,6 +26,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
@Option(shortName="DBSNP", doc="DBSNP file", optional=true) public String DBSNP_FILE = null;
|
||||
@Option(shortName="THREADED_IO", doc="If true, enables threaded I/O operations", optional=true) public String ENABLED_THREADED_IO = "false";
|
||||
@Option(shortName="U", doc="If true, enables unsafe operations, nothing will be checked at runtime. You better know what you are doing if you set this flag.", optional=false) public String UNSAFE = "false";
|
||||
@Option(shortName="SORT_ON_FLY", doc="If true, enables on fly sorting of reads file.", optional=false) public String ENABLED_SORT_ON_FLY = "false";
|
||||
|
||||
public static HashMap<String, Object> MODULES = new HashMap<String,Object>();
|
||||
public static void addModule(final String name, final Object walker) {
|
||||
|
|
@ -103,6 +104,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
}
|
||||
|
||||
engine.setSafetyChecking(! UNSAFE.toLowerCase().equals("true"));
|
||||
engine.setSortOnFly(! ENABLED_SORT_ON_FLY.toLowerCase().equals("true"));
|
||||
|
||||
engine.initialize(ENABLED_THREADED_IO.toLowerCase().equals("true"));
|
||||
//engine.testReference();
|
||||
|
|
|
|||
|
|
@ -77,6 +77,8 @@ public class TraversalEngine {
|
|||
|
||||
public boolean DEBUGGING = false;
|
||||
public boolean beSafeP = true;
|
||||
public boolean SORT_ON_FLY = false;
|
||||
public int MAX_ON_FLY_SORTS = 100000;
|
||||
public long N_RECORDS_TO_PRINT = 100000;
|
||||
public int THREADED_IO_BUFFER_SIZE = 10000;
|
||||
|
||||
|
|
@ -117,6 +119,11 @@ public class TraversalEngine {
|
|||
System.out.printf("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages...%n");
|
||||
this.beSafeP = beSafeP;
|
||||
}
|
||||
public void setSortOnFly( final boolean SORT_ON_FLY ) {
|
||||
if ( SORT_ON_FLY )
|
||||
System.out.println("Sorting read file on the fly: max reads allowed is " + MAX_ON_FLY_SORTS);
|
||||
this.SORT_ON_FLY = SORT_ON_FLY;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -306,9 +313,11 @@ public class TraversalEngine {
|
|||
throw new RuntimeIOException(ex);
|
||||
}
|
||||
|
||||
if ( beSafeP )
|
||||
if ( SORT_ON_FLY )
|
||||
samReadIter = new SortSamIterator(samReadIter, MAX_ON_FLY_SORTS);
|
||||
else if ( beSafeP )
|
||||
samReadIter = new VerifyingSamIterator(samReadIter);
|
||||
|
||||
|
||||
if ( THREADED_IO ) {
|
||||
System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE);
|
||||
samReadIter = new ThreadedIterator<SAMRecord>(samReadIter, THREADED_IO_BUFFER_SIZE);
|
||||
|
|
@ -455,7 +464,7 @@ public class TraversalEngine {
|
|||
}
|
||||
|
||||
public void verifySortOrder(final boolean requiresSortedOrder) {
|
||||
if ( beSafeP && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate ) {
|
||||
if ( beSafeP && !SORT_ON_FLY && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate ) {
|
||||
final String msg = "SAM file is not sorted in coordinate order (according to header) Walker type with given arguments requires a sorted file for correct processing";
|
||||
if ( requiresSortedOrder || strictness == SAMFileReader.ValidationStringency.STRICT )
|
||||
throw new RuntimeIOException(msg);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,64 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mdepristo
|
||||
* Date: Mar 15, 2009
|
||||
* Time: 6:02:31 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SortSamIterator implements Iterator<SAMRecord> {
|
||||
|
||||
Iterator<ComparableSAMRecord> it;
|
||||
|
||||
public SortSamIterator(Iterator<SAMRecord> unsortedIter, int maxSorts) {
|
||||
|
||||
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
|
||||
while (unsortedIter.hasNext()) {
|
||||
list.add(new ComparableSAMRecord(unsortedIter.next()));
|
||||
// choose an arbitrary length to limit sorting for now
|
||||
if (list.size() > maxSorts)
|
||||
throw new UnsupportedOperationException("Can not sort files with more than 100K reads on the fly!");
|
||||
}
|
||||
Collections.sort(list);
|
||||
it = list.iterator();
|
||||
}
|
||||
|
||||
public boolean hasNext() { return it.hasNext(); }
|
||||
public SAMRecord next() { return it.next().getRecord(); }
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
private class ComparableSAMRecord implements Comparable<ComparableSAMRecord> {
|
||||
|
||||
private SAMRecord record;
|
||||
|
||||
public ComparableSAMRecord(SAMRecord record) {
|
||||
this.record = record;
|
||||
}
|
||||
|
||||
public SAMRecord getRecord() {
|
||||
return record;
|
||||
}
|
||||
|
||||
public int compareTo(ComparableSAMRecord o) {
|
||||
GenomeLoc myLoc = Utils.genomicLocationOf(record);
|
||||
GenomeLoc hisLoc = Utils.genomicLocationOf(o.getRecord());
|
||||
return myLoc.compareTo(hisLoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue