diff --git a/ivy.xml b/ivy.xml
index f41596c1e..519eed1b2 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -46,7 +46,7 @@
-
+
@@ -60,6 +60,9 @@
+
+
+
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/BAMProcessingPerformanceMeter.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/BAMProcessingPerformanceMeter.java
deleted file mode 100644
index ef506b1fa..000000000
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/BAMProcessingPerformanceMeter.java
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads.performance;
-
-import net.sf.samtools.SAMFileReader;
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.CommandLineProgram;
-import org.broadinstitute.sting.commandline.Input;
-import org.broadinstitute.sting.gatk.DownsamplingMethod;
-import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
-import org.broadinstitute.sting.utils.SimpleTimer;
-
-import java.io.File;
-
-/**
- * Basic suite for testing idealized and actual performance of read processing.
- */
-public class BAMProcessingPerformanceMeter extends CommandLineProgram {
- @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = true)
- File samFile;
-
- @Input(fullName = "reference_file", shortName="R", doc = "Associated FASTA sequence", required = true)
- File referenceFile;
-
- @Argument(fullName="test_repetitions", shortName = "test_reps", doc="Number of times to repeat each test", required = false)
- int testRepetitions = 5;
-
- @Argument(fullName="print_frequency", shortName = "pf", doc="Print cumulative time after x # reads", required = false)
- int printFrequency = 100000;
-
- private void testBAMFileProcessingThroughput(ReadProcessor readProcessor) {
- readProcessor.execute(samFile,referenceFile);
- }
-
- public int execute() {
- for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new NoAdditionalProcessing(this));
- for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverEachBase(this));
- for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverCigarString(this));
- for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new ExtractTag(this,"OQ"));
- for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeSamLocusIterator(this));
- for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, GATKArgumentCollection.getDefaultDownsamplingMethod()));
- for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, DownsamplingMethod.NONE));
- GATKWalkerInvoker countReadsInvoker = new GATKWalkerInvoker(this);
- CountReadsPerformanceWalker countReadsWalker = new CountReadsPerformanceWalker(countReadsInvoker);
- countReadsInvoker.setWalker(countReadsWalker);
- for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countReadsInvoker);
-
- GATKWalkerInvoker countBasesInReadInvoker = new GATKWalkerInvoker(this);
- CountBasesInReadPerformanceWalker countBasesInReadWalker = new CountBasesInReadPerformanceWalker(countBasesInReadInvoker);
- countBasesInReadInvoker.setWalker(countBasesInReadWalker);
- for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countBasesInReadInvoker);
-
- return 0;
- }
-
- /**
- * Required main method implementation.
- * @param argv Command-line argument text.
- * @throws Exception on error.
- */
- public static void main(String[] argv) throws Exception {
- int returnCode = 0;
- try {
- BAMProcessingPerformanceMeter instance = new BAMProcessingPerformanceMeter();
- start(instance, argv);
- returnCode = 0;
- }
- catch(Exception ex) {
- returnCode = 1;
- ex.printStackTrace();
- throw ex;
- }
- finally {
- System.exit(returnCode);
- }
- }
-}
-
-abstract class ReadProcessor {
- private final SimpleTimer timer;
- private final int printFrequency;
- protected int iterations = 0;
-
- public ReadProcessor(BAMProcessingPerformanceMeter performanceMeter) {
- timer = new SimpleTimer("timer");
- this.printFrequency = performanceMeter.printFrequency;
- }
-
- public abstract String getTestName();
- public String getIterationType() { return "loci"; }
-
- public void processRead(final SAMRecord read) { }
- public void execute(File bamFile,File fastaFile) {
- SAMFileReader reader = new SAMFileReader(bamFile);
- startTest();
- for(SAMRecord read: reader) {
- processRead(read);
- updateIterationCount();
- }
- stopTest();
- reader.close();
- }
-
- protected void startTest() {
- timer.start();
- }
-
- protected void stopTest() {
- timer.stop();
- printStatus("TEST COMPLETE");
- }
-
- protected void updateIterationCount() {
- if(++iterations % printFrequency == 0) printStatus("ONGOING");
- }
-
- private void printStatus(String prefix) {
- System.out.printf("%s: %s printed %d %s in %f seconds.%n",prefix,getTestName(),iterations,getIterationType(),timer.getElapsedTime());
- }
-}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountBasesInReadPerformanceWalker.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountBasesInReadPerformanceWalker.java
deleted file mode 100644
index d40a33892..000000000
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountBasesInReadPerformanceWalker.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads.performance;
-
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.ReadWalker;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 25, 2011
- * Time: 10:16:55 AM
- * To change this template use File | Settings | File Templates.
- */
-class CountBasesInReadPerformanceWalker extends ReadWalker {
- private long As;
- private long Cs;
- private long Gs;
- private long Ts;
-
- private final GATKWalkerInvoker invoker;
-
- public CountBasesInReadPerformanceWalker(GATKWalkerInvoker walkerInvoker) {
- this.invoker = walkerInvoker;
-
- }
-
- public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
- for(byte base: read.getReadBases()) {
- switch(base) {
- case 'A': As++; break;
- case 'C': Cs++; break;
- case 'G': Gs++; break;
- case 'T': Ts++; break;
- }
- }
- invoker.updateIterationCount();
- return 1;
- }
-
- public Long reduceInit() { return 0L; }
- public Long reduce(Integer value, Long accum) { return value + accum; }
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountReadsPerformanceWalker.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountReadsPerformanceWalker.java
deleted file mode 100644
index 3d0771e4b..000000000
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountReadsPerformanceWalker.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads.performance;
-
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.ReadWalker;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 25, 2011
- * Time: 10:16:55 AM
- * To change this template use File | Settings | File Templates.
- */
-class CountReadsPerformanceWalker extends ReadWalker {
- private final GATKWalkerInvoker invoker;
-
- public CountReadsPerformanceWalker(GATKWalkerInvoker walkerInvoker) {
- this.invoker = walkerInvoker;
-
- }
-
- public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
- invoker.updateIterationCount();
- return 1;
- }
-
- public Long reduceInit() { return 0L; }
- public Long reduce(Integer value, Long accum) { return value + accum; }
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/DownsamplerBenchmark.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/DownsamplerBenchmark.java
new file mode 100644
index 000000000..931a04b2d
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/DownsamplerBenchmark.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads.performance;
+
+import com.google.caliper.Param;
+import net.sf.picard.filter.FilteringIterator;
+import net.sf.picard.filter.SamRecordFilter;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.commandline.Tags;
+import org.broadinstitute.sting.gatk.DownsamplingMethod;
+import org.broadinstitute.sting.gatk.ReadProperties;
+import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
+import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
+import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
+import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
+import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.baq.BAQ;
+
+import java.io.File;
+import java.util.Collections;
+import java.util.Iterator;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mhanna
+ * Date: Apr 22, 2011
+ * Time: 4:02:56 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class DownsamplerBenchmark extends ReadProcessingBenchmark {
+ @Param
+ private String bamFile;
+
+ @Param
+ private Integer maxReads;
+
+ @Override
+ public String getBAMFile() { return bamFile; }
+
+ @Override
+ public Integer getMaxReads() { return maxReads; }
+
+ @Param
+ private Downsampling downsampling;
+
+ public void timeDownsampling(int reps) {
+ for(int i = 0; i < reps; i++) {
+ SAMFileReader reader = new SAMFileReader(inputFile);
+ ReadProperties readProperties = new ReadProperties(Collections.singletonList(new SAMReaderID(inputFile,new Tags())),
+ reader.getFileHeader(),
+ false,
+ SAMFileReader.ValidationStringency.SILENT,
+ 0,
+ downsampling.create(),
+ new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
+ Collections.emptyList(),
+ false,
+ false,
+ BAQ.CalculationMode.OFF,
+ BAQ.QualityMode.DONT_MODIFY,
+ null,
+ (byte)0);
+
+ GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
+ SampleDataSource sampleDataSource = new SampleDataSource();
+ sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
+
+ // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
+ Iterator readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
+ LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
+ while(locusIteratorByState.hasNext()) {
+ locusIteratorByState.next().getLocation();
+ }
+ reader.close();
+ }
+ }
+
+ private enum Downsampling {
+ NONE {
+ @Override
+ DownsamplingMethod create() { return DownsamplingMethod.NONE; }
+ },
+ PER_SAMPLE {
+ @Override
+ DownsamplingMethod create() { return GATKArgumentCollection.getDefaultDownsamplingMethod(); }
+ };
+ abstract DownsamplingMethod create();
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ExtractTag.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ExtractTag.java
deleted file mode 100644
index a2a05bb55..000000000
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ExtractTag.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads.performance;
-
-import net.sf.samtools.SAMRecord;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 25, 2011
- * Time: 10:16:53 AM
- * To change this template use File | Settings | File Templates.
- */
-class ExtractTag extends ReadProcessor {
- private final String tag;
-
- public ExtractTag(final BAMProcessingPerformanceMeter performanceMeter, final String tag) {
- super(performanceMeter);
- this.tag = tag;
- }
-
- @Override
- public String getTestName() { return "extract tag"; }
- public void processRead(final SAMRecord read) {
- read.getAttribute(tag);
- }
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerBenchmark.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerBenchmark.java
new file mode 100644
index 000000000..e9b75005c
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerBenchmark.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads.performance;
+
+import com.google.caliper.Param;
+import net.sf.picard.filter.SamRecordFilter;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.commandline.Tags;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker;
+import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
+import org.broadinstitute.sting.utils.classloader.JVMUtils;
+
+import java.io.File;
+import java.lang.reflect.Field;
+import java.util.Collections;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mhanna
+ * Date: Feb 25, 2011
+ * Time: 10:16:54 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class GATKWalkerBenchmark extends ReadProcessingBenchmark {
+ @Param
+ private String bamFile;
+
+ @Param
+ private Integer maxReads;
+
+ @Param
+ private String referenceFile;
+
+ @Param
+ private WalkerType walkerType;
+
+ @Override
+ public String getBAMFile() { return bamFile; }
+
+ @Override
+ public Integer getMaxReads() { return maxReads; }
+
+ @Override
+ public void setUp() {
+ super.setUp();
+ }
+
+ public void timeWalkerPerformance(final int reps) {
+ for(int i = 0; i < reps; i++) {
+ GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
+
+ // Establish the argument collection
+ GATKArgumentCollection argCollection = new GATKArgumentCollection();
+ argCollection.referenceFile = new File(referenceFile);
+ argCollection.samFiles = Collections.singletonList(inputFile.getAbsolutePath());
+
+ engine.setArguments(argCollection);
+ // Bugs in the engine mean that this has to be set twice.
+ engine.setSAMFileIDs(Collections.singletonList(new SAMReaderID(inputFile,new Tags())));
+ engine.setFilters(Collections.singletonList(new UnmappedReadFilter()));
+ engine.setReferenceMetaDataFiles(Collections.emptyList());
+
+ // Create the walker
+ engine.setWalker(walkerType.create());
+
+ engine.execute();
+ }
+ }
+
+ private enum WalkerType {
+ COUNT_READS {
+ @Override
+ Walker create() { return new CountReadsWalker(); }
+ },
+ COUNT_BASES_IN_READ {
+ @Override
+ Walker create() { return new CountBasesInReadPerformanceWalker(); }
+ },
+ COUNT_LOCI {
+ @Override
+ Walker create() {
+ CountLociWalker walker = new CountLociWalker();
+ JVMUtils.setFieldValue(JVMUtils.findField(CountLociWalker.class,"out"),walker,System.out);
+ return walker;
+ }
+ };
+ abstract Walker create();
+ }
+}
+
+class CountBasesInReadPerformanceWalker extends ReadWalker {
+ private long As;
+ private long Cs;
+ private long Gs;
+ private long Ts;
+
+ public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
+ for(byte base: read.getReadBases()) {
+ switch(base) {
+ case 'A': As++; break;
+ case 'C': Cs++; break;
+ case 'G': Gs++; break;
+ case 'T': Ts++; break;
+ }
+ }
+ return 1;
+ }
+
+ public Long reduceInit() { return 0L; }
+ public Long reduce(Integer value, Long accum) { return value + accum; }
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerInvoker.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerInvoker.java
deleted file mode 100644
index 14a2d3e1e..000000000
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerInvoker.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads.performance;
-
-import net.sf.picard.filter.SamRecordFilter;
-import org.broadinstitute.sting.commandline.Tags;
-import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
-import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
-import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
-import org.broadinstitute.sting.gatk.walkers.Walker;
-
-import java.io.File;
-import java.util.Collections;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 25, 2011
- * Time: 10:16:54 AM
- * To change this template use File | Settings | File Templates.
- */
-class GATKWalkerInvoker extends ReadProcessor {
- /**
- * Walker to run over the existing dataset.
- */
- private Walker,?> walker;
-
- public GATKWalkerInvoker(BAMProcessingPerformanceMeter performanceMeter) {
- super(performanceMeter);
- }
-
- @Override
- public String getTestName() { return "GATK-CountReads"; }
-
- public void setWalker(Walker,?> walker) {
- this.walker = walker;
- }
-
- @Override
- public void execute(File samFile, File fastaFile) {
- GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
-
- // Establish the argument collection
- GATKArgumentCollection argCollection = new GATKArgumentCollection();
- argCollection.referenceFile = fastaFile;
- argCollection.samFiles = Collections.singletonList(samFile.getAbsolutePath());
-
- engine.setArguments(argCollection);
- // Bugs in the engine mean that this has to be set twice.
- engine.setSAMFileIDs(Collections.singletonList(new SAMReaderID(samFile,new Tags())));
- engine.setFilters(Collections.emptyList());
- engine.setReferenceMetaDataFiles(Collections.emptyList());
-
- // Create the walker
- engine.setWalker(walker);
-
- startTest();
- engine.execute();
- stopTest();
- }
-
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeLocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeLocusIteratorByState.java
deleted file mode 100644
index db2280a8e..000000000
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeLocusIteratorByState.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads.performance;
-
-import net.sf.picard.filter.FilteringIterator;
-import net.sf.picard.filter.SamRecordFilter;
-import net.sf.samtools.SAMFileReader;
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.commandline.Tags;
-import org.broadinstitute.sting.gatk.DownsamplingMethod;
-import org.broadinstitute.sting.gatk.ReadProperties;
-import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
-import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
-import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
-import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.baq.BAQ;
-
-import java.io.File;
-import java.util.Collections;
-import java.util.Iterator;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 25, 2011
- * Time: 10:16:54 AM
- * To change this template use File | Settings | File Templates.
- */
-class InvokeLocusIteratorByState extends ReadProcessor {
- private final DownsamplingMethod downsamplingMethod;
-
- public InvokeLocusIteratorByState(final BAMProcessingPerformanceMeter performanceMeter,DownsamplingMethod downsamplingMethod) {
- super(performanceMeter);
- this.downsamplingMethod = downsamplingMethod;
- }
-
- @Override
- public String getTestName() {
- if(downsamplingMethod != DownsamplingMethod.NONE)
- return String.format("invoke locus iterator by state; downsampling by sample to coverage = %d; ",downsamplingMethod.toCoverage);
- else
- return String.format("invoke locus iterator by state; no downsampling; ");
- }
-
- @Override
- public String getIterationType() { return "loci"; }
-
- @Override
- public void execute(File samFile, File fastaFile) {
- SAMFileReader reader = new SAMFileReader(samFile);
- ReadProperties readProperties = new ReadProperties(Collections.singletonList(new SAMReaderID(samFile,new Tags())),
- reader.getFileHeader(),
- false,
- SAMFileReader.ValidationStringency.SILENT,
- 0,
- downsamplingMethod,
- new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)),
- Collections.emptyList(),
- false,
- false,
- BAQ.CalculationMode.OFF,
- BAQ.QualityMode.DONT_MODIFY,
- null,
- (byte)0);
-
- GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
- SampleDataSource sampleDataSource = new SampleDataSource();
- sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
-
- // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
- Iterator readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
- LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
- startTest();
- while(locusIteratorByState.hasNext()) {
- locusIteratorByState.next();
- updateIterationCount();
- }
- stopTest();
-
- reader.close();
- }
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverCigarString.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverCigarString.java
deleted file mode 100644
index 5836c0974..000000000
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverCigarString.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads.performance;
-
-import net.sf.samtools.Cigar;
-import net.sf.samtools.CigarElement;
-import net.sf.samtools.SAMRecord;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 25, 2011
- * Time: 10:16:53 AM
- * To change this template use File | Settings | File Templates.
- */
-class IterateOverCigarString extends ReadProcessor {
- private long matchMismatches;
- private long insertions;
- private long deletions;
- private long others;
-
- public IterateOverCigarString(final BAMProcessingPerformanceMeter performanceMeter) {
- super(performanceMeter);
- }
-
- @Override
- public String getTestName() { return "iterator over cigar string"; }
- public void processRead(final SAMRecord read) {
- Cigar cigar = read.getCigar();
- for(CigarElement cigarElement: cigar.getCigarElements()) {
- int elementSize = cigarElement.getLength();
- while(elementSize > 0) {
- switch(cigarElement.getOperator()) {
- case M: matchMismatches++; break;
- case I: insertions++; break;
- case D: deletions++; break;
- default: others++; break;
- }
- elementSize--;
- }
- }
- }
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverEachBase.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverEachBase.java
deleted file mode 100644
index 3c019841d..000000000
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverEachBase.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads.performance;
-
-import net.sf.samtools.SAMRecord;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 25, 2011
- * Time: 10:16:53 AM
- * To change this template use File | Settings | File Templates.
- */
-class IterateOverEachBase extends ReadProcessor {
- private long As;
- private long Cs;
- private long Gs;
- private long Ts;
-
- public IterateOverEachBase(final BAMProcessingPerformanceMeter performanceMeter) {
- super(performanceMeter);
- }
-
- @Override
- public String getTestName() { return "iterate over each base"; }
- public void processRead(final SAMRecord read) {
- for(byte base: read.getReadBases()) {
- switch(base) {
- case 'A': As++; break;
- case 'C': Cs++; break;
- case 'G': Gs++; break;
- case 'T': Ts++; break;
- }
- }
- }
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/NoAdditionalProcessing.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/NoAdditionalProcessing.java
deleted file mode 100644
index 9e2afd548..000000000
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/NoAdditionalProcessing.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads.performance;
-
-import net.sf.samtools.SAMRecord;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 25, 2011
- * Time: 10:16:53 AM
- * To change this template use File | Settings | File Templates.
- */
-class NoAdditionalProcessing extends ReadProcessor {
- public NoAdditionalProcessing(final BAMProcessingPerformanceMeter performanceMeter) {
- super(performanceMeter);
- }
-
- @Override
- public String getTestName() { return "no additional processing"; }
- public void processRead(final SAMRecord read) {}
-}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/PicardBaselineBenchmark.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/PicardBaselineBenchmark.java
new file mode 100644
index 000000000..1c22d7663
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/PicardBaselineBenchmark.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads.performance;
+
+import com.google.caliper.Param;
+import com.google.caliper.SimpleBenchmark;
+import net.sf.picard.util.SamLocusIterator;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.util.CloseableIterator;
+
+import java.io.File;
+import java.util.Iterator;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mhanna
+ * Date: Apr 22, 2011
+ * Time: 3:51:06 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class PicardBaselineBenchmark extends ReadProcessingBenchmark {
+ @Param
+ private String bamFile;
+
+ @Param
+ private Integer maxReads;
+
+ @Override
+ public String getBAMFile() { return bamFile; }
+
+ @Override
+ public Integer getMaxReads() { return maxReads; }
+
+ public void timeDecompressBamFile(int reps) {
+ for(int i = 0; i < reps; i++) {
+ SAMFileReader reader = new SAMFileReader(inputFile);
+ CloseableIterator iterator = reader.iterator();
+ while(iterator.hasNext())
+ iterator.next();
+ iterator.close();
+ reader.close();
+ }
+ }
+
+ public void timeExtractTag(int reps) {
+ for(int i = 0; i < reps; i++) {
+ SAMFileReader reader = new SAMFileReader(inputFile);
+ CloseableIterator iterator = reader.iterator();
+ while(iterator.hasNext()) {
+ SAMRecord read = iterator.next();
+ read.getAttribute("OQ");
+ }
+ iterator.close();
+ reader.close();
+ }
+ }
+
+ public void timeSamLocusIterator(int reps) {
+ for(int i = 0; i < reps; i++) {
+ SAMFileReader reader = new SAMFileReader(inputFile);
+ long loci = 0;
+
+ SamLocusIterator samLocusIterator = new SamLocusIterator(reader);
+ samLocusIterator.setEmitUncoveredLoci(false);
+ Iterator workhorseIterator = samLocusIterator.iterator();
+
+ while(workhorseIterator.hasNext()) {
+ SamLocusIterator.LocusInfo locusInfo = workhorseIterator.next();
+ // Use the value of locusInfo to avoid optimization.
+ if(locusInfo != null) loci++;
+ }
+ System.out.printf("Total loci = %d%n",loci);
+
+ reader.close();
+ }
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeSamLocusIterator.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ReadProcessingBenchmark.java
similarity index 50%
rename from java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeSamLocusIterator.java
rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ReadProcessingBenchmark.java
index f55dfcec1..206cb9e4a 100644
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeSamLocusIterator.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ReadProcessingBenchmark.java
@@ -24,47 +24,59 @@
package org.broadinstitute.sting.gatk.datasources.reads.performance;
-import net.sf.picard.util.SamLocusIterator;
+import com.google.caliper.Param;
+import com.google.caliper.SimpleBenchmark;
import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMFileWriter;
+import net.sf.samtools.SAMFileWriterFactory;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
-import java.util.Iterator;
+import java.io.IOException;
/**
* Created by IntelliJ IDEA.
* User: mhanna
- * Date: Feb 25, 2011
- * Time: 10:16:54 AM
+ * Date: Apr 22, 2011
+ * Time: 4:04:38 PM
* To change this template use File | Settings | File Templates.
*/
-class InvokeSamLocusIterator extends ReadProcessor {
- public InvokeSamLocusIterator(final BAMProcessingPerformanceMeter performanceMeter) {
- super(performanceMeter);
- }
+public abstract class ReadProcessingBenchmark extends SimpleBenchmark {
+ protected abstract String getBAMFile();
+ protected abstract Integer getMaxReads();
+
+ protected File inputFile;
@Override
- public String getTestName() {
- return String.format("invoke sam locus iterator");
- }
+ public void setUp() {
+ SAMFileReader fullInputFile = new SAMFileReader(new File(getBAMFile()));
- @Override
- public String getIterationType() { return "loci"; }
-
- @Override
- public void execute(File samFile, File fastaFile) {
- SAMFileReader reader = new SAMFileReader(samFile);
-
- SamLocusIterator samLocusIterator = new SamLocusIterator(reader);
- samLocusIterator.setEmitUncoveredLoci(false);
- Iterator workhorseIterator = samLocusIterator.iterator();
-
- startTest();
- while(workhorseIterator.hasNext()) {
- SamLocusIterator.LocusInfo locusInfo = workhorseIterator.next();
- updateIterationCount();
+ File tempFile = null;
+ try {
+ tempFile = File.createTempFile("testfile_"+getMaxReads(),".bam");
}
- stopTest();
+ catch(IOException ex) {
+ throw new ReviewedStingException("Unable to create temporary BAM",ex);
+ }
+ SAMFileWriterFactory factory = new SAMFileWriterFactory();
+ factory.setCreateIndex(true);
+ SAMFileWriter writer = factory.makeBAMWriter(fullInputFile.getFileHeader(),true,tempFile);
- reader.close();
+ long numReads = 0;
+ for(SAMRecord read: fullInputFile) {
+ if(numReads++ >= getMaxReads())
+ break;
+ writer.addAlignment(read);
+ }
+
+ writer.close();
+
+ inputFile = tempFile;
+ }
+
+ @Override
+ public void tearDown() {
+ inputFile.delete();
}
}
diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/TheoreticalMinimaBenchmark.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/TheoreticalMinimaBenchmark.java
new file mode 100644
index 000000000..1ad286e97
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/TheoreticalMinimaBenchmark.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads.performance;
+
+import com.google.caliper.Param;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.util.CloseableIterator;
+
+import java.io.File;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: mhanna
+ * Date: Apr 22, 2011
+ * Time: 4:01:23 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark {
+ @Param
+ private String bamFile;
+
+ @Param
+ private Integer maxReads;
+
+ @Override
+ public String getBAMFile() { return bamFile; }
+
+ @Override
+ public Integer getMaxReads() { return maxReads; }
+
+ public void timeIterateOverEachBase(int reps) {
+ System.out.printf("Processing " + inputFile);
+ for(int i = 0; i < reps; i++) {
+ SAMFileReader reader = new SAMFileReader(inputFile);
+ CloseableIterator iterator = reader.iterator();
+
+ long As=0,Cs=0,Gs=0,Ts=0;
+ while(iterator.hasNext()) {
+ SAMRecord read = iterator.next();
+ for(byte base: read.getReadBases()) {
+ switch(base) {
+ case 'A': As++; break;
+ case 'C': Cs++; break;
+ case 'G': Gs++; break;
+ case 'T': Ts++; break;
+ }
+ }
+ }
+ System.out.printf("As = %d; Cs = %d; Gs = %d; Ts = %d; total = %d%n",As,Cs,Gs,Ts,As+Cs+Gs+Ts);
+ iterator.close();
+ reader.close();
+ }
+ }
+
+ public void timeIterateOverCigarString(int reps) {
+ for(int i = 0; i < reps; i++) {
+ long matchMismatches = 0;
+ long insertions = 0;
+ long deletions = 0;
+ long others = 0;
+
+ SAMFileReader reader = new SAMFileReader(inputFile);
+ CloseableIterator iterator = reader.iterator();
+ while(iterator.hasNext()) {
+ SAMRecord read = iterator.next();
+
+ Cigar cigar = read.getCigar();
+ for(CigarElement cigarElement: cigar.getCigarElements()) {
+ int elementSize = cigarElement.getLength();
+ while(elementSize > 0) {
+ switch(cigarElement.getOperator()) {
+ case M: matchMismatches++; break;
+ case I: insertions++; break;
+ case D: deletions++; break;
+ default: others++; break;
+ }
+ elementSize--;
+ }
+ }
+ }
+ System.out.printf("Ms = %d; Is = %d; Ds = %d; others = %d; total = %d%n",matchMismatches,insertions,deletions,others,matchMismatches+insertions+deletions+others);
+
+ iterator.close();
+ reader.close();
+ }
+ }
+
+}
diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml
index 4a773e3fa..cf9373dda 100644
--- a/settings/ivysettings.xml
+++ b/settings/ivysettings.xml
@@ -11,6 +11,7 @@
+
@@ -24,5 +25,7 @@
+
+