Added TraverseReads test, some bug fixes discovered in the traversal test

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@594 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-05-05 20:36:00 +00:00
parent daa2163ee8
commit f5880109a7
3 changed files with 282 additions and 7 deletions

View File

@ -189,7 +189,7 @@ public class SAMDataSource implements SimpleDataSource {
bound = fastMappedReadSeek(shard.getSize(), iter);
}
if (bound == null || intoUnmappedReads) {
if ((bound == null || intoUnmappedReads) && includeUnmappedReads) {
if (iter != null) {
iter.close();
}

View File

@ -89,11 +89,16 @@ public class TraverseReads extends TraversalEngine {
// while we still have more reads
for (SAMRecord read : iter) {
// get the genome loc from the read
GenomeLoc site = new GenomeLoc(read);
// our locus context
LocusContext locus = null;
// Jump forward in the reference to this locus location
LocusContext locus = new LocusContext(site, Arrays.asList(read), Arrays.asList(0));
if (read.getReferenceIndex() >= 0) {
// get the genome loc from the read
GenomeLoc site = new GenomeLoc(read);
// Jump forward in the reference to this locus location
locus = new LocusContext(site, Arrays.asList(read), Arrays.asList(0));
}
// update the number of reads we've seen
TraversalStatistics.nRecords++;
@ -108,9 +113,9 @@ public class TraverseReads extends TraversalEngine {
sum = readWalker.reduce(x, sum);
}
printProgress("loci", locus.getLocation());
if (locus != null) { printProgress("loci", locus.getLocation()); }
}
System.err.println(TraversalStatistics.nRecords);
//System.err.println(TraversalStatistics.nRecords);
return sum;
}

View File

@ -0,0 +1,270 @@
package org.broadinstitute.sting.gatk.traversals;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.CountReadsWalker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import static org.junit.Assert.fail;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
/**
*
* User: aaron
* Date: Apr 24, 2009
* Time: 3:42:16 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
* @date Apr 24, 2009
* <p/>
* Class TraverseReadsTest
* <p/>
* test traversing reads
*/
public class TraverseReadsTest extends BaseTest {
private FastaSequenceFile2 seq;
private File bam = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"); // TCGA-06-0188.aligned.duplicates_marked.bam");
private File refFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/Homo_sapiens_assembly17.fasta");
private List<File> bamList;
private Walker countReadWalker;
private File output;
private static long readSize = 100000;
TraverseReads traversalEngine = null;
/**
* This function does the setup of our parser, before each method call.
* <p/>
* Called before every test case method.
*/
@Before
public void doForEachTest() {
output = new File("testOut.txt");
FileOutputStream out = null;
PrintStream ps; // declare a print stream object
try {
out = new FileOutputStream(output);
} catch (FileNotFoundException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
fail("Couldn't open the output file");
}
// Connect print stream to the output stream
ps = new PrintStream(out);
bamList = new ArrayList<File>();
bamList.add(bam);
countReadWalker = new CountReadsWalker();
try {
Field f = Walker.class.getDeclaredField("out");
f.setAccessible(true);
f.set(countReadWalker, ps);
} catch (IllegalAccessException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
} catch (NoSuchFieldException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
fail("Couldn't set the walkers printstream");
}
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>>();
traversalEngine = new TraverseReads(bamList, refFile, rods);
}
/** Test out that we can shard the file and iterate over every read */
@Test
public void testMappedReadCount() {
IndexedFastaSequenceFile ref = null;
try {
ref = new IndexedFastaSequenceFile(refFile);
}
catch (FileNotFoundException ex) {
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
}
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
GenomeLoc.setupRefContigOrdering(ref);
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
ref.getSequenceDictionary(),
readSize);
SAMDataSource dataSource = null;
try {
dataSource = new SAMDataSource(TraversalEngine.unpackReads(bamList));
dataSource.viewUnmappedReads(true);
//dataSource.viewUnmappedReads(false);
}
catch (SimpleDataSourceLoadException ex) {
throw new RuntimeException(ex);
}
catch (FileNotFoundException ex) {
throw new RuntimeException(ex);
}
dataSource.viewUnmappedReads(false);
boolean walkerInitialized = false;
Object accumulator = null;
while (shardStrategy.hasNext()) {
Shard shard = shardStrategy.next();
BoundedReadIterator readIter = null;
try {
readIter = (BoundedReadIterator)dataSource.seek(shard);
}
catch (SimpleDataSourceLoadException ex) {
throw new RuntimeException(ex);
}
//LocusContextProvider locusProvider = new LocusContextProvider( readIter );
// set the sam header of the traversal engine
traversalEngine.setSAMHeader(readIter.getMergedHeader());
if (!walkerInitialized) {
countReadWalker.initialize();
accumulator = ((ReadWalker<?, ?>) countReadWalker).reduceInit();
walkerInitialized = true;
}
if (shard == null) {
fail("Shard == null");
}
accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator);
readIter.close();
}
traversalEngine.printOnTraversalDone("loci", accumulator);
countReadWalker.onTraversalDone(accumulator);
if (!(accumulator instanceof Integer)) {
fail("Count read walker should return an interger.");
}
if (((Integer)accumulator) != 9721) {
fail("there should be 9721 mapped reads in the index file");
}
}
/** Test out that we can shard the file and iterate over every read */
@Test
public void testUnmappedReadCount() {
IndexedFastaSequenceFile ref = null;
try {
ref = new IndexedFastaSequenceFile(refFile);
}
catch (FileNotFoundException ex) {
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
}
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
GenomeLoc.setupRefContigOrdering(ref);
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
ref.getSequenceDictionary(),
readSize);
SAMDataSource dataSource = null;
try {
dataSource = new SAMDataSource(TraversalEngine.unpackReads(bamList));
dataSource.viewUnmappedReads(true);
//dataSource.viewUnmappedReads(false);
}
catch (SimpleDataSourceLoadException ex) {
throw new RuntimeException(ex);
}
catch (FileNotFoundException ex) {
throw new RuntimeException(ex);
}
dataSource.viewUnmappedReads(true);
boolean walkerInitialized = false;
Object accumulator = null;
while (shardStrategy.hasNext()) {
Shard shard = shardStrategy.next();
BoundedReadIterator readIter = null;
try {
readIter = (BoundedReadIterator)dataSource.seek(shard);
}
catch (SimpleDataSourceLoadException ex) {
throw new RuntimeException(ex);
}
//LocusContextProvider locusProvider = new LocusContextProvider( readIter );
// set the sam header of the traversal engine
traversalEngine.setSAMHeader(readIter.getMergedHeader());
if (!walkerInitialized) {
countReadWalker.initialize();
accumulator = ((ReadWalker<?, ?>) countReadWalker).reduceInit();
walkerInitialized = true;
}
if (shard == null) {
fail("Shard == null");
}
accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator);
readIter.close();
}
traversalEngine.printOnTraversalDone("loci", accumulator);
countReadWalker.onTraversalDone(accumulator);
if (!(accumulator instanceof Integer)) {
fail("Count read walker should return an interger.");
}
if (((Integer)accumulator) != 10000) {
fail("there should be 9721 mapped reads in the index file");
}
}
}