Moved the iterators over to the StingSAMIterator interface. This will help us ensure that iterators that need to be closed get closed.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@702 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-05-14 16:52:18 +00:00
parent 6d98234555
commit 7aa90757ac
10 changed files with 194 additions and 51 deletions

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import java.util.Iterator;
@ -40,7 +39,7 @@ public class BoundedReadIterator implements StingSAMIterator {
private long currentCount = 0;
// the iterator we want to decorate
private final CloseableIterator<SAMRecord> iterator;
private final StingSAMIterator iterator;
// our unmapped read flag
private boolean doNotUseThatUnmappedReadPile = false;
@ -56,7 +55,7 @@ public class BoundedReadIterator implements StingSAMIterator {
* @param iter
* @param readCount
*/
public BoundedReadIterator(CloseableIterator<SAMRecord> iter, long readCount) {
public BoundedReadIterator(StingSAMIterator iter, long readCount) {
if (iter != null) {
isOpen = true;

View File

@ -2,21 +2,18 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import java.util.Random;
import java.util.Iterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import java.util.Random;
public class DownsampleIterator implements Iterator<SAMRecord> {
public class DownsampleIterator implements StingSAMIterator {
Iterator<SAMRecord> it;
StingSAMIterator it;
Random generator;
int cutoff;
SAMRecord next;
public DownsampleIterator(Iterator<SAMRecord> it, double fraction) {
public DownsampleIterator(StingSAMIterator it, double fraction) {
this.it = it;
generator = new Random();
cutoff = (int)(fraction * 10000);
@ -46,4 +43,12 @@ public class DownsampleIterator implements Iterator<SAMRecord> {
return rec;
}
}
public void close() {
it.close();
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -271,7 +271,7 @@ public class MergingSamRecordIterator2 implements StingSAMIterator {
}
// Should replace picard class with the same name
class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator> {
class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements Comparable<ComparableSamRecordIterator>, StingSAMIterator {
private final Comparator<SAMRecord> comparator;
private final SAMFileReader reader;
@ -319,4 +319,8 @@ class ComparableSamRecordIterator extends PeekableIterator<SAMRecord> implements
//System.out.printf("Comparing %s vs. %s => %d%n", record.getReadName(), record2.getReadName(), comparator.compare(record, record2));
return comparator.compare(record, record2);
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -8,7 +8,9 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
public class SortSamIterator implements Iterator<SAMRecord> {
// TODO: Deprecate?
// I don't think we need this if we're only allowing sorted and indexed BAM Files in the GATK - Aaron
public class SortSamIterator implements StingSAMIterator {
Iterator<ComparableSAMRecord> it;
@ -31,4 +33,12 @@ public class SortSamIterator implements Iterator<SAMRecord> {
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
public void close() {
// nothing to do right now
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.util.CloseableIterator;
/**
*
@ -24,9 +23,9 @@ import net.sf.samtools.util.CloseableIterator;
* @version 1.0
* @date May 6, 2009
* <p/>
* Interface ClosableGetHeaderIterator
* Interface StingSAMIterator
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
* This is the standard interface for all iterators in the Sting package that iterate over SAMRecords
*/
public interface StingSAMIterator extends CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
}

View File

@ -2,12 +2,10 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.RuntimeIOException;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Iterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
/**
* Created by IntelliJ IDEA.
* User: mdepristo
@ -15,13 +13,13 @@ import org.broadinstitute.sting.utils.Utils;
* Time: 6:02:31 PM
* To change this template use File | Settings | File Templates.
*/
public class VerifyingSamIterator implements Iterator<SAMRecord> {
Iterator<SAMRecord> it;
public class VerifyingSamIterator implements StingSAMIterator {
StingSAMIterator it;
SAMRecord last = null;
boolean checkOrderP = true;
long nOutOfOrderReads = 0;
public VerifyingSamIterator(Iterator<SAMRecord> it) {
public VerifyingSamIterator(StingSAMIterator it) {
this.it = it;
}
@ -64,4 +62,12 @@ public class VerifyingSamIterator implements Iterator<SAMRecord> {
public void remove() {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
public void close() {
it.close();
}
public Iterator<SAMRecord> iterator() {
return this;
}
}

View File

@ -378,9 +378,21 @@ public abstract class TraversalEngine {
}
}
protected Iterator<SAMRecord> WrapReadsIterator( final Iterator<SAMRecord> rawIterator, final boolean enableVerification ) {
Iterator<SAMRecord> wrappedIterator = rawIterator;
@Deprecated
protected StingSAMIterator WrapReadsIterator( final Iterator<SAMRecord> rawIterator, final boolean enableVerification ) {
StingSAMIterator wrappedIterator = StingSAMIteratorAdapter.adapt(rawIterator);
wrappedIterator = ApplyDecoratingIterators(enableVerification, wrappedIterator);
if (THREADED_IO) {
logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE));
wrappedIterator = StingSAMIteratorAdapter.adapt(new ThreadedIterator<SAMRecord>(wrappedIterator, THREADED_IO_BUFFER_SIZE));
}
return wrappedIterator;
}
protected StingSAMIterator ApplyDecoratingIterators(boolean enableVerification, StingSAMIterator wrappedIterator) {
// NOTE: this (and other filtering) should be done before on-the-fly sorting
// as there is no reason to sort something that we will end of throwing away
if (DOWNSAMPLE_BY_FRACTION)
@ -391,12 +403,6 @@ public abstract class TraversalEngine {
if (beSafeP && enableVerification)
wrappedIterator = new VerifyingSamIterator(wrappedIterator);
if (THREADED_IO) {
logger.info(String.format("Enabling threaded I/O with buffer of %d reads", THREADED_IO_BUFFER_SIZE));
wrappedIterator = new ThreadedIterator<SAMRecord>(wrappedIterator, THREADED_IO_BUFFER_SIZE);
}
return wrappedIterator;
}

View File

@ -3,9 +3,10 @@ package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.dataSources.shards.ReadShard;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -87,10 +88,11 @@ public class TraverseReads extends TraversalEngine {
ReadWalker<M, T> readWalker = (ReadWalker<M, T>) walker;
int readCNT = 0;
// we allow a bunch of wrapping iterators for down sampling, threadingIO, etc.
StingSAMIterator it = ApplyDecoratingIterators(true, dataProvider.getReadIterator());
// while we still have more reads
for (SAMRecord read : dataProvider.getReadIterator()) {
for (SAMRecord read : it) {
// our locus context
LocusContext locus = null;
@ -132,5 +134,5 @@ public class TraverseReads extends TraversalEngine {
*/
public <T> void printOnTraversalDone( T sum ) {
printOnTraversalDone( "reads", sum );
}
}
}

View File

@ -54,6 +54,7 @@ public class TrivialInstrumenter implements ClassFileTransformer {
byte[] classfileBuffer)
throws IllegalClassFormatException {
if (className.contains("broadinstitute") && !(className.endsWith("BaseTest"))) {
JavaClass jclas = null;
try {
jclas = Repository.lookupClass(className);
@ -66,10 +67,15 @@ public class TrivialInstrumenter implements ClassFileTransformer {
if (!(jclas.getSuperClass().getClassName().contains("BaseTest"))) {
return null;
}
System.err.println("looking at " + className);
ClassGen cgen = new ClassGen(jclas);
ConstantPoolGen pgen = cgen.getConstantPool();
InstructionFactory fact = new InstructionFactory(cgen, pgen);
createFields(cgen, pgen);
/*for (Method m : cgen.getMethods()) {
System.err.println("looking at " + m.getName());
addStringOutputToMethod(jclas, cgen, pgen, m, fact);
}*/
createBeforeMethod(cgen, pgen, fact);
createAfterMethod(cgen, pgen, fact);
@ -90,8 +96,116 @@ public class TrivialInstrumenter implements ClassFileTransformer {
field = new FieldGen(Constants.ACC_PRIVATE | Constants.ACC_STATIC, Type.LONG, "startTime", pgen);
cgen.addField(field.getField());
field = new FieldGen(Constants.ACC_PRIVATE, Type.STRING, "currentTestName", pgen);
cgen.addField(field.getField());
}
/*
private void addStringOutputToMethod(JavaClass classname, ClassGen cgen, ConstantPoolGen pgen, Method meth, InstructionFactory fact) {
if(true) {return;}
if (meth.getName().contains("<")) {
System.err.println("Nope -> " + meth.getName());
return;
}
//if (meth.isPublic()) {
boolean outputInstead = true;
MethodGen g = new MethodGen(meth, cgen.getClassName(), pgen);
InstructionList il = g.getInstructionList();
//if (outputInstead) {
BufferedWriter outputStream = null;
BufferedWriter outputStream2 = null;
//}
Instruction returnInstruction = null;
InstructionHandle[] iHandles = il.getInstructionHandles();
for (int f = 0; f < iHandles.length; f++) {
if (iHandles[f].getInstruction() instanceof ReturnInstruction) {
returnInstruction = iHandles[f].getInstruction();
//System.out.println("found the invoke virtual");
break;
}
}
if (outputInstead) {
try {
outputStream =
new BufferedWriter(new FileWriter("one.txt"));
outputStream2 =
new BufferedWriter(new FileWriter("two.txt"));
outputStream.write(meth.getName() + " of " + meth.getClass());
for (Instruction i : il.getInstructions()) {
outputStream.write(i.getName() + " <code> " + i.getOpcode() + " <toString> " + i.toString() + "\n");
}
outputStream.close();
}
catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
//InstructionHandle handle = il.getEnd();
//il.s
//il.insert(getFieldInstruction, fact.createLoad(Type.OBJECT, 0));
//il.insert(getFieldInstruction, fact.createNew("java.lang.String"));
//il.insert(getFieldInstruction, InstructionConstants.DUP);
//il.insert(getFieldInstruction, new PUSH(pgen, meth.getName()));
//il.insert(getFieldInstruction, fact.createInvoke("java.lang.String", "<init>", Type.VOID, new Type[]{Type.STRING}, Constants.INVOKESPECIAL));
//il.insert(getFieldInstruction, fact.createFieldAccess(cgen.getClassName(), "currentTestName", Type.STRING, Constants.PUTFIELD));
//il.insert(getFieldInstruction,fact.createPrintln("Hello World"));
/*il.insert(returnInstruction, new ALOAD(0));
il.insert(returnInstruction, fact.createNew("java.lang.String"));
il.insert(returnInstruction, InstructionConstants.DUP);
il.insert(returnInstruction, new PUSH(pgen, meth.getName()));
il.insert(returnInstruction, fact.createInvoke("java.lang.String", "<init>", Type.VOID, new Type[]{Type.STRING}, Constants.INVOKESPECIAL));
il.insert(returnInstruction, fact.createFieldAccess(classname.replace("/","."), "currentTestName", Type.STRING, Constants.PUTFIELD));*/
/*il.setPositions();
g.setMaxStack();
g.setMaxLocals();
g.removeLineNumbers();
//org.apache.bcel.classfile.LocalVariableTypeTable table;
InstructionList inst = g.getInstructionList();
if (outputInstead) {
try {
outputStream2.write(meth.getName() + " of " + meth.getClass() + " classname: " + classname.getClassName() + "\n");
for (Instruction i : inst.getInstructions()) {
outputStream2.write(i.getName() + " <code> " + i.getOpcode() + " <toString> " + i.toString() + "\n");
}
outputStream2.close();
}
catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
//.
cgen.replaceMethod(meth, g.getMethod());
il.dispose();*/
//}
/*if (meth.isPublic()) {
InstructionList il = new InstructionList();
MethodGen method = new MethodGen(Constants.ACC_PUBLIC, Type.VOID, Type.NO_ARGS, new String[]{}, meth.getName(), cgen.getClassName(), il, pgen);
InstructionHandle ih_0 = il.append(fact.createLoad(Type.OBJECT, 0));
il.append(fact.createNew("java.lang.String"));
il.append(InstructionConstants.DUP);
il.append(new PUSH(pgen, "grrr"));
il.append(fact.createInvoke("java.lang.String", "<init>", Type.VOID, new Type[]{Type.STRING}, Constants.INVOKESPECIAL));
il.append(fact.createFieldAccess(cgen.getClassName(), "currentTestName", Type.STRING, Constants.PUTFIELD));
InstructionHandle ih_13 = il.append(fact.createReturn(Type.VOID));
method.setMaxStack();
method.setMaxLocals();
cgen.removeMethod(meth);
cgen.addMethod(method.getMethod());
il.dispose();
}
}*/
/**
* create the before method
*
@ -99,6 +213,7 @@ public class TrivialInstrumenter implements ClassFileTransformer {
* @param pgen our constant pool generator
* @param fact the instruction factory we're using
*/
private void createBeforeMethod(ClassGen cgen, ConstantPoolGen pgen, InstructionFactory fact) {
InstructionList il = new InstructionList();
MethodGen method = new MethodGen(Constants.ACC_PUBLIC | Constants.ACC_FINAL, Type.VOID, Type.NO_ARGS, new String[]{}, "baseSetup", cgen.getClassName(), il, pgen);
@ -127,7 +242,17 @@ public class TrivialInstrumenter implements ClassFileTransformer {
InstructionHandle ih_0 = il.append(fact.createInvoke("java.lang.System", "currentTimeMillis", Type.LONG, Type.NO_ARGS, Constants.INVOKESTATIC));
il.append(fact.createStore(Type.LONG, 1));
InstructionHandle ih_4 = il.append(fact.createFieldAccess(cgen.getClassName(), "logger", new ObjectType("org.apache.log4j.Logger"), Constants.GETSTATIC));
il.append(new PUSH(pgen, cgen.getClassName() + " runtime: %dms"));
il.append(fact.createNew("java.lang.StringBuilder"));
il.append(InstructionConstants.DUP);
il.append(fact.createInvoke("java.lang.StringBuilder", "<init>", Type.VOID, Type.NO_ARGS, Constants.INVOKESPECIAL));
il.append(new PUSH(pgen, "Test Name: "));
il.append(fact.createInvoke("java.lang.StringBuilder", "append", new ObjectType("java.lang.StringBuilder"), new Type[]{Type.STRING}, Constants.INVOKEVIRTUAL));
il.append(fact.createLoad(Type.OBJECT, 0));
il.append(fact.createFieldAccess(cgen.getClassName(), "currentTestName", Type.STRING, Constants.GETFIELD));
il.append(fact.createInvoke("java.lang.StringBuilder", "append", new ObjectType("java.lang.StringBuilder"), new Type[]{Type.STRING}, Constants.INVOKEVIRTUAL));
il.append(new PUSH(pgen, " runtime: %dms"));
il.append(fact.createInvoke("java.lang.StringBuilder", "append", new ObjectType("java.lang.StringBuilder"), new Type[]{Type.STRING}, Constants.INVOKEVIRTUAL));
il.append(fact.createInvoke("java.lang.StringBuilder", "toString", Type.STRING, Type.NO_ARGS, Constants.INVOKEVIRTUAL));
il.append(new PUSH(pgen, 1));
il.append(fact.createNewArray(Type.OBJECT, (short) 1));
il.append(InstructionConstants.DUP);
@ -139,7 +264,7 @@ public class TrivialInstrumenter implements ClassFileTransformer {
il.append(InstructionConstants.AASTORE);
il.append(fact.createInvoke("java.lang.String", "format", Type.STRING, new Type[]{Type.STRING, new ArrayType(Type.OBJECT, 1)}, Constants.INVOKESTATIC));
il.append(fact.createInvoke("org.apache.log4j.Logger", "warn", Type.VOID, new Type[]{Type.OBJECT}, Constants.INVOKEVIRTUAL));
InstructionHandle ih_30 = il.append(fact.createReturn(Type.VOID));
InstructionHandle ih_55 = il.append(fact.createReturn(Type.VOID));
method.setMaxStack();
method.setMaxLocals();
cgen.addMethod(method.getMethod());

View File

@ -1,17 +1,14 @@
package org.broadinstitute.sting.gatk.traversals;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
import org.broadinstitute.sting.gatk.dataSources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.CountReadsWalker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
@ -62,8 +59,8 @@ public class TraverseReadsTest extends BaseTest {
private List<File> bamList;
private Walker countReadWalker;
private File output;
private static long readSize = 100000;
TraverseReads traversalEngine = null;
private long readSize = 100000;
private TraverseReads traversalEngine = null;
/**
* This function does the setup of our parser, before each method call.
@ -118,11 +115,6 @@ public class TraverseReadsTest extends BaseTest {
catch (FileNotFoundException ex) {
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
}
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
GenomeLoc.setupRefContigOrdering(ref);
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
@ -179,11 +171,6 @@ public class TraverseReadsTest extends BaseTest {
catch (FileNotFoundException ex) {
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
}
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
GenomeLoc.setupRefContigOrdering(ref);
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,