Ported small BAM performance test suite to the Google Caliper microbenchmarking suite. Looks promising,

but I'm still not sure that GC is a good long-term solution.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5683 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-04-22 22:09:17 +00:00
parent 00b57c751b
commit 57a4700299
16 changed files with 516 additions and 700 deletions

View File

@ -46,7 +46,7 @@
<!-- Dependencies for LSF library -->
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
<!-- Dependencies for LSF library -->
<!-- Dependencies for amazon.com S3 support -->
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
<!-- Scala dependancies -->
@ -60,6 +60,9 @@
<dependency org="net.sourceforge.findbugs" name="annotations" rev="1.3.2" conf="default"/>
<dependency org="net.sourceforge.findbugs" name="jsr305" rev="1.3.2" conf="default"/>
<!-- caliper, for benchmarking -->
<dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="default" />
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
<exclude org="javax.servlet" />
<exclude org="javax.jms" />

View File

@ -1,143 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.utils.SimpleTimer;
import java.io.File;
/**
* Basic suite for testing idealized and actual performance of read processing.
*/
public class BAMProcessingPerformanceMeter extends CommandLineProgram {
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = true)
File samFile;
@Input(fullName = "reference_file", shortName="R", doc = "Associated FASTA sequence", required = true)
File referenceFile;
@Argument(fullName="test_repetitions", shortName = "test_reps", doc="Number of times to repeat each test", required = false)
int testRepetitions = 5;
@Argument(fullName="print_frequency", shortName = "pf", doc="Print cumulative time after x # reads", required = false)
int printFrequency = 100000;
private void testBAMFileProcessingThroughput(ReadProcessor readProcessor) {
readProcessor.execute(samFile,referenceFile);
}
public int execute() {
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new NoAdditionalProcessing(this));
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverEachBase(this));
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverCigarString(this));
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new ExtractTag(this,"OQ"));
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeSamLocusIterator(this));
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, GATKArgumentCollection.getDefaultDownsamplingMethod()));
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, DownsamplingMethod.NONE));
GATKWalkerInvoker countReadsInvoker = new GATKWalkerInvoker(this);
CountReadsPerformanceWalker countReadsWalker = new CountReadsPerformanceWalker(countReadsInvoker);
countReadsInvoker.setWalker(countReadsWalker);
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countReadsInvoker);
GATKWalkerInvoker countBasesInReadInvoker = new GATKWalkerInvoker(this);
CountBasesInReadPerformanceWalker countBasesInReadWalker = new CountBasesInReadPerformanceWalker(countBasesInReadInvoker);
countBasesInReadInvoker.setWalker(countBasesInReadWalker);
for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countBasesInReadInvoker);
return 0;
}
/**
* Required main method implementation.
* @param argv Command-line argument text.
* @throws Exception on error.
*/
public static void main(String[] argv) throws Exception {
int returnCode = 0;
try {
BAMProcessingPerformanceMeter instance = new BAMProcessingPerformanceMeter();
start(instance, argv);
returnCode = 0;
}
catch(Exception ex) {
returnCode = 1;
ex.printStackTrace();
throw ex;
}
finally {
System.exit(returnCode);
}
}
}
abstract class ReadProcessor {
private final SimpleTimer timer;
private final int printFrequency;
protected int iterations = 0;
public ReadProcessor(BAMProcessingPerformanceMeter performanceMeter) {
timer = new SimpleTimer("timer");
this.printFrequency = performanceMeter.printFrequency;
}
public abstract String getTestName();
public String getIterationType() { return "loci"; }
public void processRead(final SAMRecord read) { }
public void execute(File bamFile,File fastaFile) {
SAMFileReader reader = new SAMFileReader(bamFile);
startTest();
for(SAMRecord read: reader) {
processRead(read);
updateIterationCount();
}
stopTest();
reader.close();
}
protected void startTest() {
timer.start();
}
protected void stopTest() {
timer.stop();
printStatus("TEST COMPLETE");
}
protected void updateIterationCount() {
if(++iterations % printFrequency == 0) printStatus("ONGOING");
}
private void printStatus(String prefix) {
System.out.printf("%s: %s printed %d %s in %f seconds.%n",prefix,getTestName(),iterations,getIterationType(),timer.getElapsedTime());
}
}

View File

@ -1,67 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:55 AM
* To change this template use File | Settings | File Templates.
*/
class CountBasesInReadPerformanceWalker extends ReadWalker<Integer,Long> {
private long As;
private long Cs;
private long Gs;
private long Ts;
private final GATKWalkerInvoker invoker;
public CountBasesInReadPerformanceWalker(GATKWalkerInvoker walkerInvoker) {
this.invoker = walkerInvoker;
}
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
for(byte base: read.getReadBases()) {
switch(base) {
case 'A': As++; break;
case 'C': Cs++; break;
case 'G': Gs++; break;
case 'T': Ts++; break;
}
}
invoker.updateIterationCount();
return 1;
}
public Long reduceInit() { return 0L; }
public Long reduce(Integer value, Long accum) { return value + accum; }
}

View File

@ -1,54 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:55 AM
* To change this template use File | Settings | File Templates.
*/
class CountReadsPerformanceWalker extends ReadWalker<Integer,Long> {
private final GATKWalkerInvoker invoker;
public CountReadsPerformanceWalker(GATKWalkerInvoker walkerInvoker) {
this.invoker = walkerInvoker;
}
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
invoker.updateIterationCount();
return 1;
}
public Long reduceInit() { return 0L; }
public Long reduce(Integer value, Long accum) { return value + accum; }
}

View File

@ -0,0 +1,114 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import com.google.caliper.Param;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.File;
import java.util.Collections;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Apr 22, 2011
* Time: 4:02:56 PM
* To change this template use File | Settings | File Templates.
*/
public class DownsamplerBenchmark extends ReadProcessingBenchmark {
@Param
private String bamFile;
@Param
private Integer maxReads;
@Override
public String getBAMFile() { return bamFile; }
@Override
public Integer getMaxReads() { return maxReads; }
@Param
private Downsampling downsampling;
public void timeDownsampling(int reps) {
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(inputFile,new Tags())),
reader.getFileHeader(),
false,
SAMFileReader.ValidationStringency.SILENT,
0,
downsampling.create(),
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
Collections.<SamRecordFilter>emptyList(),
false,
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null,
(byte)0);
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
SampleDataSource sampleDataSource = new SampleDataSource();
sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
while(locusIteratorByState.hasNext()) {
locusIteratorByState.next().getLocation();
}
reader.close();
}
}
private enum Downsampling {
NONE {
@Override
DownsamplingMethod create() { return DownsamplingMethod.NONE; }
},
PER_SAMPLE {
@Override
DownsamplingMethod create() { return GATKArgumentCollection.getDefaultDownsamplingMethod(); }
};
abstract DownsamplingMethod create();
}
}

View File

@ -1,49 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:53 AM
* To change this template use File | Settings | File Templates.
*/
class ExtractTag extends ReadProcessor {
private final String tag;
public ExtractTag(final BAMProcessingPerformanceMeter performanceMeter, final String tag) {
super(performanceMeter);
this.tag = tag;
}
@Override
public String getTestName() { return "extract tag"; }
public void processRead(final SAMRecord read) {
read.getAttribute(tag);
}
}

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import com.google.caliper.Param;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker;
import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import java.io.File;
import java.lang.reflect.Field;
import java.util.Collections;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:54 AM
* To change this template use File | Settings | File Templates.
*/
public class GATKWalkerBenchmark extends ReadProcessingBenchmark {
@Param
private String bamFile;
@Param
private Integer maxReads;
@Param
private String referenceFile;
@Param
private WalkerType walkerType;
@Override
public String getBAMFile() { return bamFile; }
@Override
public Integer getMaxReads() { return maxReads; }
@Override
public void setUp() {
super.setUp();
}
public void timeWalkerPerformance(final int reps) {
for(int i = 0; i < reps; i++) {
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
// Establish the argument collection
GATKArgumentCollection argCollection = new GATKArgumentCollection();
argCollection.referenceFile = new File(referenceFile);
argCollection.samFiles = Collections.singletonList(inputFile.getAbsolutePath());
engine.setArguments(argCollection);
// Bugs in the engine mean that this has to be set twice.
engine.setSAMFileIDs(Collections.singletonList(new SAMReaderID(inputFile,new Tags())));
engine.setFilters(Collections.<SamRecordFilter>singletonList(new UnmappedReadFilter()));
engine.setReferenceMetaDataFiles(Collections.<RMDTriplet>emptyList());
// Create the walker
engine.setWalker(walkerType.create());
engine.execute();
}
}
private enum WalkerType {
COUNT_READS {
@Override
Walker create() { return new CountReadsWalker(); }
},
COUNT_BASES_IN_READ {
@Override
Walker create() { return new CountBasesInReadPerformanceWalker(); }
},
COUNT_LOCI {
@Override
Walker create() {
CountLociWalker walker = new CountLociWalker();
JVMUtils.setFieldValue(JVMUtils.findField(CountLociWalker.class,"out"),walker,System.out);
return walker;
}
};
abstract Walker create();
}
}
class CountBasesInReadPerformanceWalker extends ReadWalker<Integer,Long> {
private long As;
private long Cs;
private long Gs;
private long Ts;
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
for(byte base: read.getReadBases()) {
switch(base) {
case 'A': As++; break;
case 'C': Cs++; break;
case 'G': Gs++; break;
case 'T': Ts++; break;
}
}
return 1;
}
public Long reduceInit() { return 0L; }
public Long reduce(Integer value, Long accum) { return value + accum; }
}

View File

@ -1,85 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.Walker;
import java.io.File;
import java.util.Collections;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:54 AM
* To change this template use File | Settings | File Templates.
*/
class GATKWalkerInvoker extends ReadProcessor {
/**
* Walker to run over the existing dataset.
*/
private Walker<?,?> walker;
public GATKWalkerInvoker(BAMProcessingPerformanceMeter performanceMeter) {
super(performanceMeter);
}
@Override
public String getTestName() { return "GATK-CountReads"; }
public void setWalker(Walker<?,?> walker) {
this.walker = walker;
}
@Override
public void execute(File samFile, File fastaFile) {
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
// Establish the argument collection
GATKArgumentCollection argCollection = new GATKArgumentCollection();
argCollection.referenceFile = fastaFile;
argCollection.samFiles = Collections.singletonList(samFile.getAbsolutePath());
engine.setArguments(argCollection);
// Bugs in the engine mean that this has to be set twice.
engine.setSAMFileIDs(Collections.singletonList(new SAMReaderID(samFile,new Tags())));
engine.setFilters(Collections.<SamRecordFilter>emptyList());
engine.setReferenceMetaDataFiles(Collections.<RMDTriplet>emptyList());
// Create the walker
engine.setWalker(walker);
startTest();
engine.execute();
stopTest();
}
}

View File

@ -1,106 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;
import java.io.File;
import java.util.Collections;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:54 AM
* To change this template use File | Settings | File Templates.
*/
class InvokeLocusIteratorByState extends ReadProcessor {
private final DownsamplingMethod downsamplingMethod;
public InvokeLocusIteratorByState(final BAMProcessingPerformanceMeter performanceMeter,DownsamplingMethod downsamplingMethod) {
super(performanceMeter);
this.downsamplingMethod = downsamplingMethod;
}
@Override
public String getTestName() {
if(downsamplingMethod != DownsamplingMethod.NONE)
return String.format("invoke locus iterator by state; downsampling by sample to coverage = %d; ",downsamplingMethod.toCoverage);
else
return String.format("invoke locus iterator by state; no downsampling; ");
}
@Override
public String getIterationType() { return "loci"; }
@Override
public void execute(File samFile, File fastaFile) {
SAMFileReader reader = new SAMFileReader(samFile);
ReadProperties readProperties = new ReadProperties(Collections.<SAMReaderID>singletonList(new SAMReaderID(samFile,new Tags())),
reader.getFileHeader(),
false,
SAMFileReader.ValidationStringency.SILENT,
0,
downsamplingMethod,
new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
Collections.<SamRecordFilter>emptyList(),
false,
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null,
(byte)0);
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
SampleDataSource sampleDataSource = new SampleDataSource();
sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
startTest();
while(locusIteratorByState.hasNext()) {
locusIteratorByState.next();
updateIterationCount();
}
stopTest();
reader.close();
}
}

View File

@ -1,65 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:53 AM
* To change this template use File | Settings | File Templates.
*/
class IterateOverCigarString extends ReadProcessor {
private long matchMismatches;
private long insertions;
private long deletions;
private long others;
public IterateOverCigarString(final BAMProcessingPerformanceMeter performanceMeter) {
super(performanceMeter);
}
@Override
public String getTestName() { return "iterator over cigar string"; }
public void processRead(final SAMRecord read) {
Cigar cigar = read.getCigar();
for(CigarElement cigarElement: cigar.getCigarElements()) {
int elementSize = cigarElement.getLength();
while(elementSize > 0) {
switch(cigarElement.getOperator()) {
case M: matchMismatches++; break;
case I: insertions++; break;
case D: deletions++; break;
default: others++; break;
}
elementSize--;
}
}
}
}

View File

@ -1,58 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:53 AM
* To change this template use File | Settings | File Templates.
*/
class IterateOverEachBase extends ReadProcessor {
private long As;
private long Cs;
private long Gs;
private long Ts;
public IterateOverEachBase(final BAMProcessingPerformanceMeter performanceMeter) {
super(performanceMeter);
}
@Override
public String getTestName() { return "iterate over each base"; }
public void processRead(final SAMRecord read) {
for(byte base: read.getReadBases()) {
switch(base) {
case 'A': As++; break;
case 'C': Cs++; break;
case 'G': Gs++; break;
case 'T': Ts++; break;
}
}
}
}

View File

@ -1,44 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:53 AM
* To change this template use File | Settings | File Templates.
*/
class NoAdditionalProcessing extends ReadProcessor {
public NoAdditionalProcessing(final BAMProcessingPerformanceMeter performanceMeter) {
super(performanceMeter);
}
@Override
public String getTestName() { return "no additional processing"; }
public void processRead(final SAMRecord read) {}
}

View File

@ -0,0 +1,100 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import net.sf.picard.util.SamLocusIterator;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import java.io.File;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Apr 22, 2011
* Time: 3:51:06 PM
* To change this template use File | Settings | File Templates.
*/
public class PicardBaselineBenchmark extends ReadProcessingBenchmark {
@Param
private String bamFile;
@Param
private Integer maxReads;
@Override
public String getBAMFile() { return bamFile; }
@Override
public Integer getMaxReads() { return maxReads; }
public void timeDecompressBamFile(int reps) {
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator();
while(iterator.hasNext())
iterator.next();
iterator.close();
reader.close();
}
}
public void timeExtractTag(int reps) {
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator();
while(iterator.hasNext()) {
SAMRecord read = iterator.next();
read.getAttribute("OQ");
}
iterator.close();
reader.close();
}
}
public void timeSamLocusIterator(int reps) {
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
long loci = 0;
SamLocusIterator samLocusIterator = new SamLocusIterator(reader);
samLocusIterator.setEmitUncoveredLoci(false);
Iterator<SamLocusIterator.LocusInfo> workhorseIterator = samLocusIterator.iterator();
while(workhorseIterator.hasNext()) {
SamLocusIterator.LocusInfo locusInfo = workhorseIterator.next();
// Use the value of locusInfo to avoid optimization.
if(locusInfo != null) loci++;
}
System.out.printf("Total loci = %d%n",loci);
reader.close();
}
}
}

View File

@ -24,47 +24,59 @@
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import net.sf.picard.util.SamLocusIterator;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMFileWriterFactory;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.util.Iterator;
import java.io.IOException;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Feb 25, 2011
* Time: 10:16:54 AM
* Date: Apr 22, 2011
* Time: 4:04:38 PM
* To change this template use File | Settings | File Templates.
*/
class InvokeSamLocusIterator extends ReadProcessor {
public InvokeSamLocusIterator(final BAMProcessingPerformanceMeter performanceMeter) {
super(performanceMeter);
}
public abstract class ReadProcessingBenchmark extends SimpleBenchmark {
protected abstract String getBAMFile();
protected abstract Integer getMaxReads();
protected File inputFile;
@Override
public String getTestName() {
return String.format("invoke sam locus iterator");
}
public void setUp() {
SAMFileReader fullInputFile = new SAMFileReader(new File(getBAMFile()));
@Override
public String getIterationType() { return "loci"; }
@Override
public void execute(File samFile, File fastaFile) {
SAMFileReader reader = new SAMFileReader(samFile);
SamLocusIterator samLocusIterator = new SamLocusIterator(reader);
samLocusIterator.setEmitUncoveredLoci(false);
Iterator<SamLocusIterator.LocusInfo> workhorseIterator = samLocusIterator.iterator();
startTest();
while(workhorseIterator.hasNext()) {
SamLocusIterator.LocusInfo locusInfo = workhorseIterator.next();
updateIterationCount();
File tempFile = null;
try {
tempFile = File.createTempFile("testfile_"+getMaxReads(),".bam");
}
stopTest();
catch(IOException ex) {
throw new ReviewedStingException("Unable to create temporary BAM",ex);
}
SAMFileWriterFactory factory = new SAMFileWriterFactory();
factory.setCreateIndex(true);
SAMFileWriter writer = factory.makeBAMWriter(fullInputFile.getFileHeader(),true,tempFile);
reader.close();
long numReads = 0;
for(SAMRecord read: fullInputFile) {
if(numReads++ >= getMaxReads())
break;
writer.addAlignment(read);
}
writer.close();
inputFile = tempFile;
}
@Override
public void tearDown() {
inputFile.delete();
}
}

View File

@ -0,0 +1,113 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads.performance;
import com.google.caliper.Param;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import java.io.File;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Apr 22, 2011
* Time: 4:01:23 PM
* To change this template use File | Settings | File Templates.
*/
public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark {
@Param
private String bamFile;
@Param
private Integer maxReads;
@Override
public String getBAMFile() { return bamFile; }
@Override
public Integer getMaxReads() { return maxReads; }
public void timeIterateOverEachBase(int reps) {
System.out.printf("Processing " + inputFile);
for(int i = 0; i < reps; i++) {
SAMFileReader reader = new SAMFileReader(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator();
long As=0,Cs=0,Gs=0,Ts=0;
while(iterator.hasNext()) {
SAMRecord read = iterator.next();
for(byte base: read.getReadBases()) {
switch(base) {
case 'A': As++; break;
case 'C': Cs++; break;
case 'G': Gs++; break;
case 'T': Ts++; break;
}
}
}
System.out.printf("As = %d; Cs = %d; Gs = %d; Ts = %d; total = %d%n",As,Cs,Gs,Ts,As+Cs+Gs+Ts);
iterator.close();
reader.close();
}
}
public void timeIterateOverCigarString(int reps) {
for(int i = 0; i < reps; i++) {
long matchMismatches = 0;
long insertions = 0;
long deletions = 0;
long others = 0;
SAMFileReader reader = new SAMFileReader(inputFile);
CloseableIterator<SAMRecord> iterator = reader.iterator();
while(iterator.hasNext()) {
SAMRecord read = iterator.next();
Cigar cigar = read.getCigar();
for(CigarElement cigarElement: cigar.getCigarElements()) {
int elementSize = cigarElement.getLength();
while(elementSize > 0) {
switch(cigarElement.getOperator()) {
case M: matchMismatches++; break;
case I: insertions++; break;
case D: deletions++; break;
default: others++; break;
}
elementSize--;
}
}
}
System.out.printf("Ms = %d; Is = %d; Ds = %d; others = %d; total = %d%n",matchMismatches,insertions,deletions,others,matchMismatches+insertions+deletions+others);
iterator.close();
reader.close();
}
}
}

View File

@ -11,6 +11,7 @@
<ibiblio name="reflections-repo" m2compatible="true" root="http://reflections.googlecode.com/svn/repo" />
<ibiblio name="java.net" m2compatible="false" root="http://download.java.net/maven/1/" pattern="[organisation]/jars/[artifact]-[revision].[ext]"/>
<ibiblio name="maven2-repository.dev.java.net" m2compatible="true" root="http://download.java.net/maven/2/" />
<ibiblio name="sonatype" m2compatible="true" root="https://oss.sonatype.org/content/repositories/snapshots" />
</resolvers>
<modules>
<module organisation="edu.mit.broad" resolver="projects" />
@ -24,5 +25,7 @@
<module organisation="javax.mail" resolver="java.net" />
<module organisation="javax.activation" resolver="java.net" />
<module organisation="net.java.dev.jna" resolver="maven2-repository.dev.java.net" />
<module organisation="com.google.code.caliper" resolver="sonatype" />
<module organisation="com.google.code.gson" revision="1.7-SNAPSHOT" resolver="sonatype" />
</modules>
</ivysettings>