Ported small BAM performance test suite to the Google Caliper microbenchmarking suite. Looks promising,
but I'm still not sure that GC is a good long-term solution. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5683 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
00b57c751b
commit
57a4700299
5
ivy.xml
5
ivy.xml
|
|
@ -46,7 +46,7 @@
|
|||
<!-- Dependencies for LSF library -->
|
||||
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
|
||||
|
||||
<!-- Dependencies for LSF library -->
|
||||
<!-- Dependencies for amazon.com S3 support -->
|
||||
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
|
||||
|
||||
<!-- Scala dependancies -->
|
||||
|
|
@ -60,6 +60,9 @@
|
|||
<dependency org="net.sourceforge.findbugs" name="annotations" rev="1.3.2" conf="default"/>
|
||||
<dependency org="net.sourceforge.findbugs" name="jsr305" rev="1.3.2" conf="default"/>
|
||||
|
||||
<!-- caliper, for benchmarking -->
|
||||
<dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="default" />
|
||||
|
||||
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
|
||||
<exclude org="javax.servlet" />
|
||||
<exclude org="javax.jms" />
|
||||
|
|
|
|||
|
|
@ -1,143 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Basic suite for testing idealized and actual performance of read processing.
|
||||
*/
|
||||
public class BAMProcessingPerformanceMeter extends CommandLineProgram {
|
||||
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = true)
|
||||
File samFile;
|
||||
|
||||
@Input(fullName = "reference_file", shortName="R", doc = "Associated FASTA sequence", required = true)
|
||||
File referenceFile;
|
||||
|
||||
@Argument(fullName="test_repetitions", shortName = "test_reps", doc="Number of times to repeat each test", required = false)
|
||||
int testRepetitions = 5;
|
||||
|
||||
@Argument(fullName="print_frequency", shortName = "pf", doc="Print cumulative time after x # reads", required = false)
|
||||
int printFrequency = 100000;
|
||||
|
||||
private void testBAMFileProcessingThroughput(ReadProcessor readProcessor) {
|
||||
readProcessor.execute(samFile,referenceFile);
|
||||
}
|
||||
|
||||
public int execute() {
|
||||
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new NoAdditionalProcessing(this));
|
||||
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverEachBase(this));
|
||||
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverCigarString(this));
|
||||
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new ExtractTag(this,"OQ"));
|
||||
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeSamLocusIterator(this));
|
||||
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, GATKArgumentCollection.getDefaultDownsamplingMethod()));
|
||||
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, DownsamplingMethod.NONE));
|
||||
GATKWalkerInvoker countReadsInvoker = new GATKWalkerInvoker(this);
|
||||
CountReadsPerformanceWalker countReadsWalker = new CountReadsPerformanceWalker(countReadsInvoker);
|
||||
countReadsInvoker.setWalker(countReadsWalker);
|
||||
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countReadsInvoker);
|
||||
|
||||
GATKWalkerInvoker countBasesInReadInvoker = new GATKWalkerInvoker(this);
|
||||
CountBasesInReadPerformanceWalker countBasesInReadWalker = new CountBasesInReadPerformanceWalker(countBasesInReadInvoker);
|
||||
countBasesInReadInvoker.setWalker(countBasesInReadWalker);
|
||||
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countBasesInReadInvoker);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Required main method implementation.
|
||||
* @param argv Command-line argument text.
|
||||
* @throws Exception on error.
|
||||
*/
|
||||
public static void main(String[] argv) throws Exception {
|
||||
int returnCode = 0;
|
||||
try {
|
||||
BAMProcessingPerformanceMeter instance = new BAMProcessingPerformanceMeter();
|
||||
start(instance, argv);
|
||||
returnCode = 0;
|
||||
}
|
||||
catch(Exception ex) {
|
||||
returnCode = 1;
|
||||
ex.printStackTrace();
|
||||
throw ex;
|
||||
}
|
||||
finally {
|
||||
System.exit(returnCode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
abstract class ReadProcessor {
|
||||
private final SimpleTimer timer;
|
||||
private final int printFrequency;
|
||||
protected int iterations = 0;
|
||||
|
||||
public ReadProcessor(BAMProcessingPerformanceMeter performanceMeter) {
|
||||
timer = new SimpleTimer("timer");
|
||||
this.printFrequency = performanceMeter.printFrequency;
|
||||
}
|
||||
|
||||
public abstract String getTestName();
|
||||
public String getIterationType() { return "loci"; }
|
||||
|
||||
public void processRead(final SAMRecord read) { }
|
||||
public void execute(File bamFile,File fastaFile) {
|
||||
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||
startTest();
|
||||
for(SAMRecord read: reader) {
|
||||
processRead(read);
|
||||
updateIterationCount();
|
||||
}
|
||||
stopTest();
|
||||
reader.close();
|
||||
}
|
||||
|
||||
protected void startTest() {
|
||||
timer.start();
|
||||
}
|
||||
|
||||
protected void stopTest() {
|
||||
timer.stop();
|
||||
printStatus("TEST COMPLETE");
|
||||
}
|
||||
|
||||
protected void updateIterationCount() {
|
||||
if(++iterations % printFrequency == 0) printStatus("ONGOING");
|
||||
}
|
||||
|
||||
private void printStatus(String prefix) {
|
||||
System.out.printf("%s: %s printed %d %s in %f seconds.%n",prefix,getTestName(),iterations,getIterationType(),timer.getElapsedTime());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:55 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class CountBasesInReadPerformanceWalker extends ReadWalker<Integer,Long> {
|
||||
private long As;
|
||||
private long Cs;
|
||||
private long Gs;
|
||||
private long Ts;
|
||||
|
||||
private final GATKWalkerInvoker invoker;
|
||||
|
||||
public CountBasesInReadPerformanceWalker(GATKWalkerInvoker walkerInvoker) {
|
||||
this.invoker = walkerInvoker;
|
||||
|
||||
}
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
for(byte base: read.getReadBases()) {
|
||||
switch(base) {
|
||||
case 'A': As++; break;
|
||||
case 'C': Cs++; break;
|
||||
case 'G': Gs++; break;
|
||||
case 'T': Ts++; break;
|
||||
}
|
||||
}
|
||||
invoker.updateIterationCount();
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Long reduceInit() { return 0L; }
|
||||
public Long reduce(Integer value, Long accum) { return value + accum; }
|
||||
}
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:55 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class CountReadsPerformanceWalker extends ReadWalker<Integer,Long> {
|
||||
private final GATKWalkerInvoker invoker;
|
||||
|
||||
public CountReadsPerformanceWalker(GATKWalkerInvoker walkerInvoker) {
|
||||
this.invoker = walkerInvoker;
|
||||
|
||||
}
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
invoker.updateIterationCount();
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Long reduceInit() { return 0L; }
|
||||
public Long reduce(Integer value, Long accum) { return value + accum; }
|
||||
}
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import com.google.caliper.Param;
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Apr 22, 2011
|
||||
* Time: 4:02:56 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
||||
@Param
|
||||
private String bamFile;
|
||||
|
||||
@Param
|
||||
private Integer maxReads;
|
||||
|
||||
@Override
|
||||
public String getBAMFile() { return bamFile; }
|
||||
|
||||
@Override
|
||||
public Integer getMaxReads() { return maxReads; }
|
||||
|
||||
@Param
|
||||
private Downsampling downsampling;
|
||||
|
||||
public void timeDownsampling(int reps) {
|
||||
for(int i = 0; i < reps; i++) {
|
||||
SAMFileReader reader = new SAMFileReader(inputFile);
|
||||
ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
|
||||
reader.getFileHeader(),
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
0,
|
||||
downsampling.create(),
|
||||
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
|
||||
Collections.<SamRecordFilter>emptyList(),
|
||||
false,
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null,
|
||||
(byte)0);
|
||||
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
SampleDataSource sampleDataSource = new SampleDataSource();
|
||||
sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
|
||||
|
||||
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
|
||||
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
|
||||
while(locusIteratorByState.hasNext()) {
|
||||
locusIteratorByState.next().getLocation();
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
private enum Downsampling {
|
||||
NONE {
|
||||
@Override
|
||||
DownsamplingMethod create() { return DownsamplingMethod.NONE; }
|
||||
},
|
||||
PER_SAMPLE {
|
||||
@Override
|
||||
DownsamplingMethod create() { return GATKArgumentCollection.getDefaultDownsamplingMethod(); }
|
||||
};
|
||||
abstract DownsamplingMethod create();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:53 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class ExtractTag extends ReadProcessor {
|
||||
private final String tag;
|
||||
|
||||
public ExtractTag(final BAMProcessingPerformanceMeter performanceMeter, final String tag) {
|
||||
super(performanceMeter);
|
||||
this.tag = tag;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTestName() { return "extract tag"; }
|
||||
public void processRead(final SAMRecord read) {
|
||||
read.getAttribute(tag);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import com.google.caliper.Param;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:54 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class GATKWalkerBenchmark extends ReadProcessingBenchmark {
|
||||
@Param
|
||||
private String bamFile;
|
||||
|
||||
@Param
|
||||
private Integer maxReads;
|
||||
|
||||
@Param
|
||||
private String referenceFile;
|
||||
|
||||
@Param
|
||||
private WalkerType walkerType;
|
||||
|
||||
@Override
|
||||
public String getBAMFile() { return bamFile; }
|
||||
|
||||
@Override
|
||||
public Integer getMaxReads() { return maxReads; }
|
||||
|
||||
@Override
|
||||
public void setUp() {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
public void timeWalkerPerformance(final int reps) {
|
||||
for(int i = 0; i < reps; i++) {
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
|
||||
// Establish the argument collection
|
||||
GATKArgumentCollection argCollection = new GATKArgumentCollection();
|
||||
argCollection.referenceFile = new File(referenceFile);
|
||||
argCollection.samFiles = Collections.singletonList(inputFile.getAbsolutePath());
|
||||
|
||||
engine.setArguments(argCollection);
|
||||
// Bugs in the engine mean that this has to be set twice.
|
||||
engine.setSAMFileIDs(Collections.singletonList(new SAMReaderID(inputFile,new Tags())));
|
||||
engine.setFilters(Collections.<SamRecordFilter>singletonList(new UnmappedReadFilter()));
|
||||
engine.setReferenceMetaDataFiles(Collections.<RMDTriplet>emptyList());
|
||||
|
||||
// Create the walker
|
||||
engine.setWalker(walkerType.create());
|
||||
|
||||
engine.execute();
|
||||
}
|
||||
}
|
||||
|
||||
private enum WalkerType {
|
||||
COUNT_READS {
|
||||
@Override
|
||||
Walker create() { return new CountReadsWalker(); }
|
||||
},
|
||||
COUNT_BASES_IN_READ {
|
||||
@Override
|
||||
Walker create() { return new CountBasesInReadPerformanceWalker(); }
|
||||
},
|
||||
COUNT_LOCI {
|
||||
@Override
|
||||
Walker create() {
|
||||
CountLociWalker walker = new CountLociWalker();
|
||||
JVMUtils.setFieldValue(JVMUtils.findField(CountLociWalker.class,"out"),walker,System.out);
|
||||
return walker;
|
||||
}
|
||||
};
|
||||
abstract Walker create();
|
||||
}
|
||||
}
|
||||
|
||||
class CountBasesInReadPerformanceWalker extends ReadWalker<Integer,Long> {
|
||||
private long As;
|
||||
private long Cs;
|
||||
private long Gs;
|
||||
private long Ts;
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
for(byte base: read.getReadBases()) {
|
||||
switch(base) {
|
||||
case 'A': As++; break;
|
||||
case 'C': Cs++; break;
|
||||
case 'G': Gs++; break;
|
||||
case 'T': Ts++; break;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Long reduceInit() { return 0L; }
|
||||
public Long reduce(Integer value, Long accum) { return value + accum; }
|
||||
}
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:54 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class GATKWalkerInvoker extends ReadProcessor {
|
||||
/**
|
||||
* Walker to run over the existing dataset.
|
||||
*/
|
||||
private Walker<?,?> walker;
|
||||
|
||||
public GATKWalkerInvoker(BAMProcessingPerformanceMeter performanceMeter) {
|
||||
super(performanceMeter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTestName() { return "GATK-CountReads"; }
|
||||
|
||||
public void setWalker(Walker<?,?> walker) {
|
||||
this.walker = walker;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void execute(File samFile, File fastaFile) {
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
|
||||
// Establish the argument collection
|
||||
GATKArgumentCollection argCollection = new GATKArgumentCollection();
|
||||
argCollection.referenceFile = fastaFile;
|
||||
argCollection.samFiles = Collections.singletonList(samFile.getAbsolutePath());
|
||||
|
||||
engine.setArguments(argCollection);
|
||||
// Bugs in the engine mean that this has to be set twice.
|
||||
engine.setSAMFileIDs(Collections.singletonList(new SAMReaderID(samFile,new Tags())));
|
||||
engine.setFilters(Collections.<SamRecordFilter>emptyList());
|
||||
engine.setReferenceMetaDataFiles(Collections.<RMDTriplet>emptyList());
|
||||
|
||||
// Create the walker
|
||||
engine.setWalker(walker);
|
||||
|
||||
startTest();
|
||||
engine.execute();
|
||||
stopTest();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:54 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class InvokeLocusIteratorByState extends ReadProcessor {
|
||||
private final DownsamplingMethod downsamplingMethod;
|
||||
|
||||
public InvokeLocusIteratorByState(final BAMProcessingPerformanceMeter performanceMeter,DownsamplingMethod downsamplingMethod) {
|
||||
super(performanceMeter);
|
||||
this.downsamplingMethod = downsamplingMethod;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTestName() {
|
||||
if(downsamplingMethod != DownsamplingMethod.NONE)
|
||||
return String.format("invoke locus iterator by state; downsampling by sample to coverage = %d; ",downsamplingMethod.toCoverage);
|
||||
else
|
||||
return String.format("invoke locus iterator by state; no downsampling; ");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getIterationType() { return "loci"; }
|
||||
|
||||
@Override
|
||||
public void execute(File samFile, File fastaFile) {
|
||||
SAMFileReader reader = new SAMFileReader(samFile);
|
||||
ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(samFile,new Tags())),
|
||||
reader.getFileHeader(),
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.SILENT,
|
||||
0,
|
||||
downsamplingMethod,
|
||||
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
|
||||
Collections.<SamRecordFilter>emptyList(),
|
||||
false,
|
||||
false,
|
||||
BAQ.CalculationMode.OFF,
|
||||
BAQ.QualityMode.DONT_MODIFY,
|
||||
null,
|
||||
(byte)0);
|
||||
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
SampleDataSource sampleDataSource = new SampleDataSource();
|
||||
sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
|
||||
|
||||
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
|
||||
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
|
||||
startTest();
|
||||
while(locusIteratorByState.hasNext()) {
|
||||
locusIteratorByState.next();
|
||||
updateIterationCount();
|
||||
}
|
||||
stopTest();
|
||||
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:53 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class IterateOverCigarString extends ReadProcessor {
|
||||
private long matchMismatches;
|
||||
private long insertions;
|
||||
private long deletions;
|
||||
private long others;
|
||||
|
||||
public IterateOverCigarString(final BAMProcessingPerformanceMeter performanceMeter) {
|
||||
super(performanceMeter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTestName() { return "iterator over cigar string"; }
|
||||
public void processRead(final SAMRecord read) {
|
||||
Cigar cigar = read.getCigar();
|
||||
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
||||
int elementSize = cigarElement.getLength();
|
||||
while(elementSize > 0) {
|
||||
switch(cigarElement.getOperator()) {
|
||||
case M: matchMismatches++; break;
|
||||
case I: insertions++; break;
|
||||
case D: deletions++; break;
|
||||
default: others++; break;
|
||||
}
|
||||
elementSize--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,58 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:53 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class IterateOverEachBase extends ReadProcessor {
|
||||
private long As;
|
||||
private long Cs;
|
||||
private long Gs;
|
||||
private long Ts;
|
||||
|
||||
public IterateOverEachBase(final BAMProcessingPerformanceMeter performanceMeter) {
|
||||
super(performanceMeter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTestName() { return "iterate over each base"; }
|
||||
public void processRead(final SAMRecord read) {
|
||||
for(byte base: read.getReadBases()) {
|
||||
switch(base) {
|
||||
case 'A': As++; break;
|
||||
case 'C': Cs++; break;
|
||||
case 'G': Gs++; break;
|
||||
case 'T': Ts++; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:53 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class NoAdditionalProcessing extends ReadProcessor {
|
||||
public NoAdditionalProcessing(final BAMProcessingPerformanceMeter performanceMeter) {
|
||||
super(performanceMeter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTestName() { return "no additional processing"; }
|
||||
public void processRead(final SAMRecord read) {}
|
||||
}
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import com.google.caliper.Param;
|
||||
import com.google.caliper.SimpleBenchmark;
|
||||
import net.sf.picard.util.SamLocusIterator;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Apr 22, 2011
|
||||
* Time: 3:51:06 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PicardBaselineBenchmark extends ReadProcessingBenchmark {
|
||||
@Param
|
||||
private String bamFile;
|
||||
|
||||
@Param
|
||||
private Integer maxReads;
|
||||
|
||||
@Override
|
||||
public String getBAMFile() { return bamFile; }
|
||||
|
||||
@Override
|
||||
public Integer getMaxReads() { return maxReads; }
|
||||
|
||||
public void timeDecompressBamFile(int reps) {
|
||||
for(int i = 0; i < reps; i++) {
|
||||
SAMFileReader reader = new SAMFileReader(inputFile);
|
||||
CloseableIterator<SAMRecord> iterator = reader.iterator();
|
||||
while(iterator.hasNext())
|
||||
iterator.next();
|
||||
iterator.close();
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void timeExtractTag(int reps) {
|
||||
for(int i = 0; i < reps; i++) {
|
||||
SAMFileReader reader = new SAMFileReader(inputFile);
|
||||
CloseableIterator<SAMRecord> iterator = reader.iterator();
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecord read = iterator.next();
|
||||
read.getAttribute("OQ");
|
||||
}
|
||||
iterator.close();
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void timeSamLocusIterator(int reps) {
|
||||
for(int i = 0; i < reps; i++) {
|
||||
SAMFileReader reader = new SAMFileReader(inputFile);
|
||||
long loci = 0;
|
||||
|
||||
SamLocusIterator samLocusIterator = new SamLocusIterator(reader);
|
||||
samLocusIterator.setEmitUncoveredLoci(false);
|
||||
Iterator<SamLocusIterator.LocusInfo> workhorseIterator = samLocusIterator.iterator();
|
||||
|
||||
while(workhorseIterator.hasNext()) {
|
||||
SamLocusIterator.LocusInfo locusInfo = workhorseIterator.next();
|
||||
// Use the value of locusInfo to avoid optimization.
|
||||
if(locusInfo != null) loci++;
|
||||
}
|
||||
System.out.printf("Total loci = %d%n",loci);
|
||||
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -24,47 +24,59 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import net.sf.picard.util.SamLocusIterator;
|
||||
import com.google.caliper.Param;
|
||||
import com.google.caliper.SimpleBenchmark;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMFileWriterFactory;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Iterator;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Feb 25, 2011
|
||||
* Time: 10:16:54 AM
|
||||
* Date: Apr 22, 2011
|
||||
* Time: 4:04:38 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class InvokeSamLocusIterator extends ReadProcessor {
|
||||
public InvokeSamLocusIterator(final BAMProcessingPerformanceMeter performanceMeter) {
|
||||
super(performanceMeter);
|
||||
}
|
||||
public abstract class ReadProcessingBenchmark extends SimpleBenchmark {
|
||||
protected abstract String getBAMFile();
|
||||
protected abstract Integer getMaxReads();
|
||||
|
||||
protected File inputFile;
|
||||
|
||||
@Override
|
||||
public String getTestName() {
|
||||
return String.format("invoke sam locus iterator");
|
||||
}
|
||||
public void setUp() {
|
||||
SAMFileReader fullInputFile = new SAMFileReader(new File(getBAMFile()));
|
||||
|
||||
@Override
|
||||
public String getIterationType() { return "loci"; }
|
||||
|
||||
@Override
|
||||
public void execute(File samFile, File fastaFile) {
|
||||
SAMFileReader reader = new SAMFileReader(samFile);
|
||||
|
||||
SamLocusIterator samLocusIterator = new SamLocusIterator(reader);
|
||||
samLocusIterator.setEmitUncoveredLoci(false);
|
||||
Iterator<SamLocusIterator.LocusInfo> workhorseIterator = samLocusIterator.iterator();
|
||||
|
||||
startTest();
|
||||
while(workhorseIterator.hasNext()) {
|
||||
SamLocusIterator.LocusInfo locusInfo = workhorseIterator.next();
|
||||
updateIterationCount();
|
||||
File tempFile = null;
|
||||
try {
|
||||
tempFile = File.createTempFile("testfile_"+getMaxReads(),".bam");
|
||||
}
|
||||
stopTest();
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to create temporary BAM",ex);
|
||||
}
|
||||
SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
factory.setCreateIndex(true);
|
||||
SAMFileWriter writer = factory.makeBAMWriter(fullInputFile.getFileHeader(),true,tempFile);
|
||||
|
||||
reader.close();
|
||||
long numReads = 0;
|
||||
for(SAMRecord read: fullInputFile) {
|
||||
if(numReads++ >= getMaxReads())
|
||||
break;
|
||||
writer.addAlignment(read);
|
||||
}
|
||||
|
||||
writer.close();
|
||||
|
||||
inputFile = tempFile;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() {
|
||||
inputFile.delete();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads.performance;
|
||||
|
||||
import com.google.caliper.Param;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mhanna
|
||||
* Date: Apr 22, 2011
|
||||
* Time: 4:01:23 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark {
|
||||
@Param
|
||||
private String bamFile;
|
||||
|
||||
@Param
|
||||
private Integer maxReads;
|
||||
|
||||
@Override
|
||||
public String getBAMFile() { return bamFile; }
|
||||
|
||||
@Override
|
||||
public Integer getMaxReads() { return maxReads; }
|
||||
|
||||
public void timeIterateOverEachBase(int reps) {
|
||||
System.out.printf("Processing " + inputFile);
|
||||
for(int i = 0; i < reps; i++) {
|
||||
SAMFileReader reader = new SAMFileReader(inputFile);
|
||||
CloseableIterator<SAMRecord> iterator = reader.iterator();
|
||||
|
||||
long As=0,Cs=0,Gs=0,Ts=0;
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecord read = iterator.next();
|
||||
for(byte base: read.getReadBases()) {
|
||||
switch(base) {
|
||||
case 'A': As++; break;
|
||||
case 'C': Cs++; break;
|
||||
case 'G': Gs++; break;
|
||||
case 'T': Ts++; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.printf("As = %d; Cs = %d; Gs = %d; Ts = %d; total = %d%n",As,Cs,Gs,Ts,As+Cs+Gs+Ts);
|
||||
iterator.close();
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void timeIterateOverCigarString(int reps) {
|
||||
for(int i = 0; i < reps; i++) {
|
||||
long matchMismatches = 0;
|
||||
long insertions = 0;
|
||||
long deletions = 0;
|
||||
long others = 0;
|
||||
|
||||
SAMFileReader reader = new SAMFileReader(inputFile);
|
||||
CloseableIterator<SAMRecord> iterator = reader.iterator();
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecord read = iterator.next();
|
||||
|
||||
Cigar cigar = read.getCigar();
|
||||
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
||||
int elementSize = cigarElement.getLength();
|
||||
while(elementSize > 0) {
|
||||
switch(cigarElement.getOperator()) {
|
||||
case M: matchMismatches++; break;
|
||||
case I: insertions++; break;
|
||||
case D: deletions++; break;
|
||||
default: others++; break;
|
||||
}
|
||||
elementSize--;
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.printf("Ms = %d; Is = %d; Ds = %d; others = %d; total = %d%n",matchMismatches,insertions,deletions,others,matchMismatches+insertions+deletions+others);
|
||||
|
||||
iterator.close();
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -11,6 +11,7 @@
|
|||
<ibiblio name="reflections-repo" m2compatible="true" root="http://reflections.googlecode.com/svn/repo" />
|
||||
<ibiblio name="java.net" m2compatible="false" root="http://download.java.net/maven/1/" pattern="[organisation]/jars/[artifact]-[revision].[ext]"/>
|
||||
<ibiblio name="maven2-repository.dev.java.net" m2compatible="true" root="http://download.java.net/maven/2/" />
|
||||
<ibiblio name="sonatype" m2compatible="true" root="https://oss.sonatype.org/content/repositories/snapshots" />
|
||||
</resolvers>
|
||||
<modules>
|
||||
<module organisation="edu.mit.broad" resolver="projects" />
|
||||
|
|
@ -24,5 +25,7 @@
|
|||
<module organisation="javax.mail" resolver="java.net" />
|
||||
<module organisation="javax.activation" resolver="java.net" />
|
||||
<module organisation="net.java.dev.jna" resolver="maven2-repository.dev.java.net" />
|
||||
<module organisation="com.google.code.caliper" resolver="sonatype" />
|
||||
<module organisation="com.google.code.gson" revision="1.7-SNAPSHOT" resolver="sonatype" />
|
||||
</modules>
|
||||
</ivysettings>
|
||||
|
|
|
|||
Loading…
Reference in New Issue