Added ability to sort reads on the fly
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@83 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0362cb9e59
commit
1aa3958644
|
|
@ -26,6 +26,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
@Option(shortName="DBSNP", doc="DBSNP file", optional=true) public String DBSNP_FILE = null;
|
@Option(shortName="DBSNP", doc="DBSNP file", optional=true) public String DBSNP_FILE = null;
|
||||||
@Option(shortName="THREADED_IO", doc="If true, enables threaded I/O operations", optional=true) public String ENABLED_THREADED_IO = "false";
|
@Option(shortName="THREADED_IO", doc="If true, enables threaded I/O operations", optional=true) public String ENABLED_THREADED_IO = "false";
|
||||||
@Option(shortName="U", doc="If true, enables unsafe operations, nothing will be checked at runtime. You better know what you are doing if you set this flag.", optional=false) public String UNSAFE = "false";
|
@Option(shortName="U", doc="If true, enables unsafe operations, nothing will be checked at runtime. You better know what you are doing if you set this flag.", optional=false) public String UNSAFE = "false";
|
||||||
|
@Option(shortName="SORT_ON_FLY", doc="If true, enables on fly sorting of reads file.", optional=false) public String ENABLED_SORT_ON_FLY = "false";
|
||||||
|
|
||||||
public static HashMap<String, Object> MODULES = new HashMap<String,Object>();
|
public static HashMap<String, Object> MODULES = new HashMap<String,Object>();
|
||||||
public static void addModule(final String name, final Object walker) {
|
public static void addModule(final String name, final Object walker) {
|
||||||
|
|
@ -103,6 +104,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
||||||
}
|
}
|
||||||
|
|
||||||
engine.setSafetyChecking(! UNSAFE.toLowerCase().equals("true"));
|
engine.setSafetyChecking(! UNSAFE.toLowerCase().equals("true"));
|
||||||
|
engine.setSortOnFly(! ENABLED_SORT_ON_FLY.toLowerCase().equals("true"));
|
||||||
|
|
||||||
engine.initialize(ENABLED_THREADED_IO.toLowerCase().equals("true"));
|
engine.initialize(ENABLED_THREADED_IO.toLowerCase().equals("true"));
|
||||||
//engine.testReference();
|
//engine.testReference();
|
||||||
|
|
|
||||||
|
|
@ -77,6 +77,8 @@ public class TraversalEngine {
|
||||||
|
|
||||||
public boolean DEBUGGING = false;
|
public boolean DEBUGGING = false;
|
||||||
public boolean beSafeP = true;
|
public boolean beSafeP = true;
|
||||||
|
public boolean SORT_ON_FLY = false;
|
||||||
|
public int MAX_ON_FLY_SORTS = 100000;
|
||||||
public long N_RECORDS_TO_PRINT = 100000;
|
public long N_RECORDS_TO_PRINT = 100000;
|
||||||
public int THREADED_IO_BUFFER_SIZE = 10000;
|
public int THREADED_IO_BUFFER_SIZE = 10000;
|
||||||
|
|
||||||
|
|
@ -117,6 +119,11 @@ public class TraversalEngine {
|
||||||
System.out.printf("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages...%n");
|
System.out.printf("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages...%n");
|
||||||
this.beSafeP = beSafeP;
|
this.beSafeP = beSafeP;
|
||||||
}
|
}
|
||||||
|
public void setSortOnFly( final boolean SORT_ON_FLY ) {
|
||||||
|
if ( SORT_ON_FLY )
|
||||||
|
System.out.println("Sorting read file on the fly: max reads allowed is " + MAX_ON_FLY_SORTS);
|
||||||
|
this.SORT_ON_FLY = SORT_ON_FLY;
|
||||||
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -306,9 +313,11 @@ public class TraversalEngine {
|
||||||
throw new RuntimeIOException(ex);
|
throw new RuntimeIOException(ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( beSafeP )
|
if ( SORT_ON_FLY )
|
||||||
|
samReadIter = new SortSamIterator(samReadIter, MAX_ON_FLY_SORTS);
|
||||||
|
else if ( beSafeP )
|
||||||
samReadIter = new VerifyingSamIterator(samReadIter);
|
samReadIter = new VerifyingSamIterator(samReadIter);
|
||||||
|
|
||||||
if ( THREADED_IO ) {
|
if ( THREADED_IO ) {
|
||||||
System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE);
|
System.out.printf("Enabling threaded I/O with buffer of %d reads%n", THREADED_IO_BUFFER_SIZE);
|
||||||
samReadIter = new ThreadedIterator<SAMRecord>(samReadIter, THREADED_IO_BUFFER_SIZE);
|
samReadIter = new ThreadedIterator<SAMRecord>(samReadIter, THREADED_IO_BUFFER_SIZE);
|
||||||
|
|
@ -455,7 +464,7 @@ public class TraversalEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void verifySortOrder(final boolean requiresSortedOrder) {
|
public void verifySortOrder(final boolean requiresSortedOrder) {
|
||||||
if ( beSafeP && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate ) {
|
if ( beSafeP && !SORT_ON_FLY && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate ) {
|
||||||
final String msg = "SAM file is not sorted in coordinate order (according to header) Walker type with given arguments requires a sorted file for correct processing";
|
final String msg = "SAM file is not sorted in coordinate order (according to header) Walker type with given arguments requires a sorted file for correct processing";
|
||||||
if ( requiresSortedOrder || strictness == SAMFileReader.ValidationStringency.STRICT )
|
if ( requiresSortedOrder || strictness == SAMFileReader.ValidationStringency.STRICT )
|
||||||
throw new RuntimeIOException(msg);
|
throw new RuntimeIOException(msg);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.util.RuntimeIOException;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: mdepristo
|
||||||
|
* Date: Mar 15, 2009
|
||||||
|
* Time: 6:02:31 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class SortSamIterator implements Iterator<SAMRecord> {
|
||||||
|
|
||||||
|
Iterator<ComparableSAMRecord> it;
|
||||||
|
|
||||||
|
public SortSamIterator(Iterator<SAMRecord> unsortedIter, int maxSorts) {
|
||||||
|
|
||||||
|
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
|
||||||
|
while (unsortedIter.hasNext()) {
|
||||||
|
list.add(new ComparableSAMRecord(unsortedIter.next()));
|
||||||
|
// choose an arbitrary length to limit sorting for now
|
||||||
|
if (list.size() > maxSorts)
|
||||||
|
throw new UnsupportedOperationException("Can not sort files with more than 100K reads on the fly!");
|
||||||
|
}
|
||||||
|
Collections.sort(list);
|
||||||
|
it = list.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() { return it.hasNext(); }
|
||||||
|
public SAMRecord next() { return it.next().getRecord(); }
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||||
|
}
|
||||||
|
|
||||||
|
private class ComparableSAMRecord implements Comparable<ComparableSAMRecord> {
|
||||||
|
|
||||||
|
private SAMRecord record;
|
||||||
|
|
||||||
|
public ComparableSAMRecord(SAMRecord record) {
|
||||||
|
this.record = record;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord getRecord() {
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compareTo(ComparableSAMRecord o) {
|
||||||
|
GenomeLoc myLoc = Utils.genomicLocationOf(record);
|
||||||
|
GenomeLoc hisLoc = Utils.genomicLocationOf(o.getRecord());
|
||||||
|
return myLoc.compareTo(hisLoc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue