Added TraverseReads test, some bug fixes discovered in the traversal test
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@594 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
daa2163ee8
commit
f5880109a7
|
|
@ -189,7 +189,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
|
||||
bound = fastMappedReadSeek(shard.getSize(), iter);
|
||||
}
|
||||
if (bound == null || intoUnmappedReads) {
|
||||
if ((bound == null || intoUnmappedReads) && includeUnmappedReads) {
|
||||
if (iter != null) {
|
||||
iter.close();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,11 +89,16 @@ public class TraverseReads extends TraversalEngine {
|
|||
// while we still have more reads
|
||||
for (SAMRecord read : iter) {
|
||||
|
||||
// get the genome loc from the read
|
||||
GenomeLoc site = new GenomeLoc(read);
|
||||
// our locus context
|
||||
LocusContext locus = null;
|
||||
|
||||
// Jump forward in the reference to this locus location
|
||||
LocusContext locus = new LocusContext(site, Arrays.asList(read), Arrays.asList(0));
|
||||
if (read.getReferenceIndex() >= 0) {
|
||||
// get the genome loc from the read
|
||||
GenomeLoc site = new GenomeLoc(read);
|
||||
|
||||
// Jump forward in the reference to this locus location
|
||||
locus = new LocusContext(site, Arrays.asList(read), Arrays.asList(0));
|
||||
}
|
||||
|
||||
// update the number of reads we've seen
|
||||
TraversalStatistics.nRecords++;
|
||||
|
|
@ -108,9 +113,9 @@ public class TraverseReads extends TraversalEngine {
|
|||
sum = readWalker.reduce(x, sum);
|
||||
}
|
||||
|
||||
printProgress("loci", locus.getLocation());
|
||||
if (locus != null) { printProgress("loci", locus.getLocation()); }
|
||||
}
|
||||
System.err.println(TraversalStatistics.nRecords);
|
||||
//System.err.println(TraversalStatistics.nRecords);
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,270 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
|
||||
import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
|
||||
import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.CountReadsWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import static org.junit.Assert.fail;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: Apr 24, 2009
|
||||
* Time: 3:42:16 PM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date Apr 24, 2009
|
||||
* <p/>
|
||||
* Class TraverseReadsTest
|
||||
* <p/>
|
||||
* test traversing reads
|
||||
*/
|
||||
public class TraverseReadsTest extends BaseTest {
|
||||
|
||||
private FastaSequenceFile2 seq;
|
||||
private File bam = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"); // TCGA-06-0188.aligned.duplicates_marked.bam");
|
||||
private File refFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/Homo_sapiens_assembly17.fasta");
|
||||
private List<File> bamList;
|
||||
private Walker countReadWalker;
|
||||
private File output;
|
||||
private static long readSize = 100000;
|
||||
TraverseReads traversalEngine = null;
|
||||
|
||||
/**
|
||||
* This function does the setup of our parser, before each method call.
|
||||
* <p/>
|
||||
* Called before every test case method.
|
||||
*/
|
||||
@Before
|
||||
public void doForEachTest() {
|
||||
output = new File("testOut.txt");
|
||||
FileOutputStream out = null;
|
||||
PrintStream ps; // declare a print stream object
|
||||
|
||||
try {
|
||||
out = new FileOutputStream(output);
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
fail("Couldn't open the output file");
|
||||
}
|
||||
|
||||
// Connect print stream to the output stream
|
||||
ps = new PrintStream(out);
|
||||
bamList = new ArrayList<File>();
|
||||
bamList.add(bam);
|
||||
countReadWalker = new CountReadsWalker();
|
||||
try {
|
||||
Field f = Walker.class.getDeclaredField("out");
|
||||
f.setAccessible(true);
|
||||
f.set(countReadWalker, ps);
|
||||
|
||||
} catch (IllegalAccessException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
} catch (NoSuchFieldException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
fail("Couldn't set the walkers printstream");
|
||||
}
|
||||
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>>();
|
||||
|
||||
traversalEngine = new TraverseReads(bamList, refFile, rods);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testMappedReadCount() {
|
||||
|
||||
IndexedFastaSequenceFile ref = null;
|
||||
try {
|
||||
ref = new IndexedFastaSequenceFile(refFile);
|
||||
}
|
||||
catch (FileNotFoundException ex) {
|
||||
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
||||
}
|
||||
try {
|
||||
Thread.sleep(5000);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
}
|
||||
GenomeLoc.setupRefContigOrdering(ref);
|
||||
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
||||
ref.getSequenceDictionary(),
|
||||
readSize);
|
||||
SAMDataSource dataSource = null;
|
||||
try {
|
||||
dataSource = new SAMDataSource(TraversalEngine.unpackReads(bamList));
|
||||
dataSource.viewUnmappedReads(true);
|
||||
//dataSource.viewUnmappedReads(false);
|
||||
}
|
||||
catch (SimpleDataSourceLoadException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
catch (FileNotFoundException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
dataSource.viewUnmappedReads(false);
|
||||
|
||||
boolean walkerInitialized = false;
|
||||
Object accumulator = null;
|
||||
while (shardStrategy.hasNext()) {
|
||||
Shard shard = shardStrategy.next();
|
||||
BoundedReadIterator readIter = null;
|
||||
try {
|
||||
readIter = (BoundedReadIterator)dataSource.seek(shard);
|
||||
}
|
||||
catch (SimpleDataSourceLoadException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
//LocusContextProvider locusProvider = new LocusContextProvider( readIter );
|
||||
|
||||
// set the sam header of the traversal engine
|
||||
traversalEngine.setSAMHeader(readIter.getMergedHeader());
|
||||
|
||||
if (!walkerInitialized) {
|
||||
countReadWalker.initialize();
|
||||
accumulator = ((ReadWalker<?, ?>) countReadWalker).reduceInit();
|
||||
walkerInitialized = true;
|
||||
|
||||
}
|
||||
if (shard == null) {
|
||||
fail("Shard == null");
|
||||
}
|
||||
|
||||
|
||||
accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator);
|
||||
readIter.close();
|
||||
|
||||
}
|
||||
|
||||
traversalEngine.printOnTraversalDone("loci", accumulator);
|
||||
countReadWalker.onTraversalDone(accumulator);
|
||||
|
||||
if (!(accumulator instanceof Integer)) {
|
||||
fail("Count read walker should return an interger.");
|
||||
}
|
||||
if (((Integer)accumulator) != 9721) {
|
||||
fail("there should be 9721 mapped reads in the index file");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testUnmappedReadCount() {
|
||||
|
||||
IndexedFastaSequenceFile ref = null;
|
||||
try {
|
||||
ref = new IndexedFastaSequenceFile(refFile);
|
||||
}
|
||||
catch (FileNotFoundException ex) {
|
||||
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
||||
}
|
||||
try {
|
||||
Thread.sleep(5000);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
||||
}
|
||||
GenomeLoc.setupRefContigOrdering(ref);
|
||||
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
||||
ref.getSequenceDictionary(),
|
||||
readSize);
|
||||
SAMDataSource dataSource = null;
|
||||
try {
|
||||
dataSource = new SAMDataSource(TraversalEngine.unpackReads(bamList));
|
||||
dataSource.viewUnmappedReads(true);
|
||||
//dataSource.viewUnmappedReads(false);
|
||||
}
|
||||
catch (SimpleDataSourceLoadException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
catch (FileNotFoundException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
dataSource.viewUnmappedReads(true);
|
||||
|
||||
boolean walkerInitialized = false;
|
||||
Object accumulator = null;
|
||||
while (shardStrategy.hasNext()) {
|
||||
Shard shard = shardStrategy.next();
|
||||
BoundedReadIterator readIter = null;
|
||||
try {
|
||||
readIter = (BoundedReadIterator)dataSource.seek(shard);
|
||||
}
|
||||
catch (SimpleDataSourceLoadException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
//LocusContextProvider locusProvider = new LocusContextProvider( readIter );
|
||||
|
||||
// set the sam header of the traversal engine
|
||||
traversalEngine.setSAMHeader(readIter.getMergedHeader());
|
||||
|
||||
if (!walkerInitialized) {
|
||||
countReadWalker.initialize();
|
||||
accumulator = ((ReadWalker<?, ?>) countReadWalker).reduceInit();
|
||||
walkerInitialized = true;
|
||||
|
||||
}
|
||||
if (shard == null) {
|
||||
fail("Shard == null");
|
||||
}
|
||||
|
||||
|
||||
accumulator = traversalEngine.traverse(countReadWalker, shard, readIter, accumulator);
|
||||
readIter.close();
|
||||
|
||||
}
|
||||
|
||||
traversalEngine.printOnTraversalDone("loci", accumulator);
|
||||
countReadWalker.onTraversalDone(accumulator);
|
||||
|
||||
if (!(accumulator instanceof Integer)) {
|
||||
fail("Count read walker should return an interger.");
|
||||
}
|
||||
if (((Integer)accumulator) != 10000) {
|
||||
fail("there should be 9721 mapped reads in the index file");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue