2009-05-06 04:36:00 +08:00
|
|
|
package org.broadinstitute.sting.gatk.traversals;
|
|
|
|
|
|
|
|
|
|
import org.broadinstitute.sting.BaseTest;
|
2009-06-22 22:39:41 +08:00
|
|
|
import org.broadinstitute.sting.gatk.Reads;
|
2009-06-12 02:13:22 +08:00
|
|
|
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
|
|
|
|
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
|
|
|
|
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
|
|
|
|
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
|
|
|
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
2009-05-06 04:36:00 +08:00
|
|
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
|
|
|
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.CountReadsWalker;
|
|
|
|
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
2009-06-22 22:39:41 +08:00
|
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
2009-05-06 04:36:00 +08:00
|
|
|
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
|
|
|
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
|
|
|
import static org.junit.Assert.fail;
|
|
|
|
|
import org.junit.Before;
|
|
|
|
|
import org.junit.Test;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.io.FileNotFoundException;
|
|
|
|
|
import java.io.FileOutputStream;
|
|
|
|
|
import java.io.PrintStream;
|
|
|
|
|
import java.lang.reflect.Field;
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
*
|
|
|
|
|
* User: aaron
|
|
|
|
|
* Date: Apr 24, 2009
|
|
|
|
|
* Time: 3:42:16 PM
|
|
|
|
|
*
|
|
|
|
|
* The Broad Institute
|
|
|
|
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
|
|
|
|
* This software and its documentation are copyright 2009 by the
|
|
|
|
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
|
|
|
|
*
|
|
|
|
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
|
|
|
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @author aaron
|
|
|
|
|
* @version 1.0
|
|
|
|
|
* @date Apr 24, 2009
|
|
|
|
|
* <p/>
|
|
|
|
|
* Class TraverseReadsTest
|
|
|
|
|
* <p/>
|
|
|
|
|
* test traversing reads
|
|
|
|
|
*/
|
|
|
|
|
public class TraverseReadsTest extends BaseTest {
|
|
|
|
|
|
|
|
|
|
private FastaSequenceFile2 seq;
|
|
|
|
|
private File bam = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test.bam"); // TCGA-06-0188.aligned.duplicates_marked.bam");
|
|
|
|
|
private File refFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/Homo_sapiens_assembly17.fasta");
|
|
|
|
|
private List<File> bamList;
|
|
|
|
|
private Walker countReadWalker;
|
|
|
|
|
private File output;
|
2009-05-15 00:52:18 +08:00
|
|
|
private long readSize = 100000;
|
|
|
|
|
private TraverseReads traversalEngine = null;
|
2009-05-06 04:36:00 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* This function does the setup of our parser, before each method call.
|
|
|
|
|
* <p/>
|
|
|
|
|
* Called before every test case method.
|
|
|
|
|
*/
|
|
|
|
|
@Before
|
|
|
|
|
public void doForEachTest() {
|
|
|
|
|
output = new File("testOut.txt");
|
|
|
|
|
FileOutputStream out = null;
|
|
|
|
|
PrintStream ps; // declare a print stream object
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
out = new FileOutputStream(output);
|
|
|
|
|
} catch (FileNotFoundException e) {
|
|
|
|
|
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
|
|
|
|
fail("Couldn't open the output file");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Connect print stream to the output stream
|
|
|
|
|
ps = new PrintStream(out);
|
|
|
|
|
bamList = new ArrayList<File>();
|
|
|
|
|
bamList.add(bam);
|
|
|
|
|
countReadWalker = new CountReadsWalker();
|
|
|
|
|
try {
|
|
|
|
|
Field f = Walker.class.getDeclaredField("out");
|
|
|
|
|
f.setAccessible(true);
|
|
|
|
|
f.set(countReadWalker, ps);
|
|
|
|
|
|
|
|
|
|
} catch (IllegalAccessException e) {
|
|
|
|
|
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
|
|
|
|
} catch (NoSuchFieldException e) {
|
|
|
|
|
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
|
|
|
|
fail("Couldn't set the walkers printstream");
|
|
|
|
|
}
|
|
|
|
|
List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum>>();
|
|
|
|
|
|
|
|
|
|
traversalEngine = new TraverseReads(bamList, refFile, rods);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** Test out that we can shard the file and iterate over every read */
|
|
|
|
|
@Test
|
|
|
|
|
public void testMappedReadCount() {
|
|
|
|
|
|
|
|
|
|
IndexedFastaSequenceFile ref = null;
|
|
|
|
|
try {
|
|
|
|
|
ref = new IndexedFastaSequenceFile(refFile);
|
|
|
|
|
}
|
|
|
|
|
catch (FileNotFoundException ex) {
|
|
|
|
|
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
|
|
|
|
}
|
2009-06-22 22:39:41 +08:00
|
|
|
GenomeLocParser.setupRefContigOrdering(ref);
|
2009-05-06 04:36:00 +08:00
|
|
|
|
|
|
|
|
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
|
|
|
|
ref.getSequenceDictionary(),
|
|
|
|
|
readSize);
|
2009-05-09 05:27:54 +08:00
|
|
|
|
2009-06-05 06:37:51 +08:00
|
|
|
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList),true);
|
2009-05-06 04:36:00 +08:00
|
|
|
dataSource.viewUnmappedReads(false);
|
|
|
|
|
|
2009-05-09 05:27:54 +08:00
|
|
|
countReadWalker.initialize();
|
|
|
|
|
Object accumulator = countReadWalker.reduceInit();
|
|
|
|
|
|
2009-05-06 04:36:00 +08:00
|
|
|
while (shardStrategy.hasNext()) {
|
|
|
|
|
Shard shard = shardStrategy.next();
|
|
|
|
|
|
2009-05-07 05:40:41 +08:00
|
|
|
if (shard == null) {
|
|
|
|
|
fail("Shard == null");
|
|
|
|
|
}
|
2009-05-06 04:36:00 +08:00
|
|
|
|
2009-05-22 04:09:32 +08:00
|
|
|
ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null);
|
2009-05-09 05:27:54 +08:00
|
|
|
accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator);
|
|
|
|
|
dataProvider.close();
|
2009-05-06 04:36:00 +08:00
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
traversalEngine.printOnTraversalDone("loci", accumulator);
|
|
|
|
|
countReadWalker.onTraversalDone(accumulator);
|
|
|
|
|
|
|
|
|
|
if (!(accumulator instanceof Integer)) {
|
|
|
|
|
fail("Count read walker should return an interger.");
|
|
|
|
|
}
|
2009-05-07 05:40:41 +08:00
|
|
|
if (((Integer) accumulator) != 9721) {
|
2009-06-22 22:39:41 +08:00
|
|
|
fail("there should be 9721 mapped reads in the index file, there was " + ((Integer) accumulator) );
|
2009-05-06 04:36:00 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** Test out that we can shard the file and iterate over every read */
|
|
|
|
|
@Test
|
|
|
|
|
public void testUnmappedReadCount() {
|
|
|
|
|
IndexedFastaSequenceFile ref = null;
|
|
|
|
|
try {
|
|
|
|
|
ref = new IndexedFastaSequenceFile(refFile);
|
|
|
|
|
}
|
|
|
|
|
catch (FileNotFoundException ex) {
|
|
|
|
|
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
|
|
|
|
}
|
2009-06-22 22:39:41 +08:00
|
|
|
GenomeLocParser.setupRefContigOrdering(ref);
|
2009-05-06 04:36:00 +08:00
|
|
|
|
|
|
|
|
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
|
|
|
|
ref.getSequenceDictionary(),
|
|
|
|
|
readSize);
|
|
|
|
|
|
2009-06-05 06:37:51 +08:00
|
|
|
SAMDataSource dataSource = new SAMDataSource(new Reads(bamList),true);
|
2009-05-06 04:36:00 +08:00
|
|
|
dataSource.viewUnmappedReads(true);
|
|
|
|
|
|
2009-05-09 05:27:54 +08:00
|
|
|
countReadWalker.initialize();
|
|
|
|
|
Object accumulator = countReadWalker.reduceInit();
|
|
|
|
|
|
2009-05-06 04:36:00 +08:00
|
|
|
while (shardStrategy.hasNext()) {
|
|
|
|
|
Shard shard = shardStrategy.next();
|
|
|
|
|
|
2009-05-07 05:40:41 +08:00
|
|
|
if (shard == null) {
|
|
|
|
|
fail("Shard == null");
|
|
|
|
|
}
|
2009-05-06 04:36:00 +08:00
|
|
|
|
2009-05-22 04:09:32 +08:00
|
|
|
ShardDataProvider dataProvider = new ShardDataProvider(shard,dataSource,null,null);
|
2009-05-09 05:27:54 +08:00
|
|
|
accumulator = traversalEngine.traverse(countReadWalker, shard, dataProvider, accumulator);
|
|
|
|
|
dataProvider.close();
|
2009-05-06 04:36:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
traversalEngine.printOnTraversalDone("loci", accumulator);
|
|
|
|
|
countReadWalker.onTraversalDone(accumulator);
|
|
|
|
|
|
|
|
|
|
if (!(accumulator instanceof Integer)) {
|
|
|
|
|
fail("Count read walker should return an interger.");
|
|
|
|
|
}
|
2009-05-07 05:40:41 +08:00
|
|
|
if (((Integer) accumulator) != 10000) {
|
2009-06-22 22:39:41 +08:00
|
|
|
fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator));
|
2009-05-06 04:36:00 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|