diff --git a/ivy.xml b/ivy.xml index f41596c1e..519eed1b2 100644 --- a/ivy.xml +++ b/ivy.xml @@ -46,7 +46,7 @@ - + @@ -60,6 +60,9 @@ + + + diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/BAMProcessingPerformanceMeter.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/BAMProcessingPerformanceMeter.java deleted file mode 100644 index ef506b1fa..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/BAMProcessingPerformanceMeter.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads.performance; - -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.CommandLineProgram; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; -import org.broadinstitute.sting.utils.SimpleTimer; - -import java.io.File; - -/** - * Basic suite for testing idealized and actual performance of read processing. - */ -public class BAMProcessingPerformanceMeter extends CommandLineProgram { - @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = true) - File samFile; - - @Input(fullName = "reference_file", shortName="R", doc = "Associated FASTA sequence", required = true) - File referenceFile; - - @Argument(fullName="test_repetitions", shortName = "test_reps", doc="Number of times to repeat each test", required = false) - int testRepetitions = 5; - - @Argument(fullName="print_frequency", shortName = "pf", doc="Print cumulative time after x # reads", required = false) - int printFrequency = 100000; - - private void testBAMFileProcessingThroughput(ReadProcessor readProcessor) { - readProcessor.execute(samFile,referenceFile); - } - - public int execute() { - for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new NoAdditionalProcessing(this)); - for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverEachBase(this)); - for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new IterateOverCigarString(this)); - for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new ExtractTag(this,"OQ")); - for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeSamLocusIterator(this)); - for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, GATKArgumentCollection.getDefaultDownsamplingMethod())); - for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(new InvokeLocusIteratorByState(this, DownsamplingMethod.NONE)); - GATKWalkerInvoker countReadsInvoker = new GATKWalkerInvoker(this); - CountReadsPerformanceWalker countReadsWalker = new CountReadsPerformanceWalker(countReadsInvoker); - countReadsInvoker.setWalker(countReadsWalker); - for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countReadsInvoker); - - GATKWalkerInvoker countBasesInReadInvoker = new GATKWalkerInvoker(this); - CountBasesInReadPerformanceWalker countBasesInReadWalker = new CountBasesInReadPerformanceWalker(countBasesInReadInvoker); - countBasesInReadInvoker.setWalker(countBasesInReadWalker); - for(int i = 0; i < testRepetitions; i++) testBAMFileProcessingThroughput(countBasesInReadInvoker); - - return 0; - } - - /** - * Required main method implementation. - * @param argv Command-line argument text. - * @throws Exception on error. - */ - public static void main(String[] argv) throws Exception { - int returnCode = 0; - try { - BAMProcessingPerformanceMeter instance = new BAMProcessingPerformanceMeter(); - start(instance, argv); - returnCode = 0; - } - catch(Exception ex) { - returnCode = 1; - ex.printStackTrace(); - throw ex; - } - finally { - System.exit(returnCode); - } - } -} - -abstract class ReadProcessor { - private final SimpleTimer timer; - private final int printFrequency; - protected int iterations = 0; - - public ReadProcessor(BAMProcessingPerformanceMeter performanceMeter) { - timer = new SimpleTimer("timer"); - this.printFrequency = performanceMeter.printFrequency; - } - - public abstract String getTestName(); - public String getIterationType() { return "loci"; } - - public void processRead(final SAMRecord read) { } - public void execute(File bamFile,File fastaFile) { - SAMFileReader reader = new SAMFileReader(bamFile); - startTest(); - for(SAMRecord read: reader) { - processRead(read); - updateIterationCount(); - } - stopTest(); - reader.close(); - } - - protected void startTest() { - timer.start(); - } - - protected void stopTest() { - timer.stop(); - printStatus("TEST COMPLETE"); - } - - protected void updateIterationCount() { - if(++iterations % printFrequency == 0) printStatus("ONGOING"); - } - - private void printStatus(String prefix) { - System.out.printf("%s: %s printed %d %s in %f seconds.%n",prefix,getTestName(),iterations,getIterationType(),timer.getElapsedTime()); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountBasesInReadPerformanceWalker.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountBasesInReadPerformanceWalker.java deleted file mode 100644 index d40a33892..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountBasesInReadPerformanceWalker.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads.performance; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; - -/** - * Created by IntelliJ IDEA. - * User: mhanna - * Date: Feb 25, 2011 - * Time: 10:16:55 AM - * To change this template use File | Settings | File Templates. - */ -class CountBasesInReadPerformanceWalker extends ReadWalker { - private long As; - private long Cs; - private long Gs; - private long Ts; - - private final GATKWalkerInvoker invoker; - - public CountBasesInReadPerformanceWalker(GATKWalkerInvoker walkerInvoker) { - this.invoker = walkerInvoker; - - } - - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { - for(byte base: read.getReadBases()) { - switch(base) { - case 'A': As++; break; - case 'C': Cs++; break; - case 'G': Gs++; break; - case 'T': Ts++; break; - } - } - invoker.updateIterationCount(); - return 1; - } - - public Long reduceInit() { return 0L; } - public Long reduce(Integer value, Long accum) { return value + accum; } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountReadsPerformanceWalker.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountReadsPerformanceWalker.java deleted file mode 100644 index 3d0771e4b..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/CountReadsPerformanceWalker.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads.performance; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; - -/** - * Created by IntelliJ IDEA. - * User: mhanna - * Date: Feb 25, 2011 - * Time: 10:16:55 AM - * To change this template use File | Settings | File Templates. - */ -class CountReadsPerformanceWalker extends ReadWalker { - private final GATKWalkerInvoker invoker; - - public CountReadsPerformanceWalker(GATKWalkerInvoker walkerInvoker) { - this.invoker = walkerInvoker; - - } - - public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { - invoker.updateIterationCount(); - return 1; - } - - public Long reduceInit() { return 0L; } - public Long reduce(Integer value, Long accum) { return value + accum; } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/DownsamplerBenchmark.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/DownsamplerBenchmark.java new file mode 100644 index 000000000..931a04b2d --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/DownsamplerBenchmark.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads.performance; + +import com.google.caliper.Param; +import net.sf.picard.filter.FilteringIterator; +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.DownsamplingMethod; +import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; +import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; +import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.baq.BAQ; + +import java.io.File; +import java.util.Collections; +import java.util.Iterator; + +/** + * Created by IntelliJ IDEA. + * User: mhanna + * Date: Apr 22, 2011 + * Time: 4:02:56 PM + * To change this template use File | Settings | File Templates. + */ +public class DownsamplerBenchmark extends ReadProcessingBenchmark { + @Param + private String bamFile; + + @Param + private Integer maxReads; + + @Override + public String getBAMFile() { return bamFile; } + + @Override + public Integer getMaxReads() { return maxReads; } + + @Param + private Downsampling downsampling; + + public void timeDownsampling(int reps) { + for(int i = 0; i < reps; i++) { + SAMFileReader reader = new SAMFileReader(inputFile); + ReadProperties readProperties = new ReadProperties(Collections.singletonList(new SAMReaderID(inputFile,new Tags())), + reader.getFileHeader(), + false, + SAMFileReader.ValidationStringency.SILENT, + 0, + downsampling.create(), + new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)), + Collections.emptyList(), + false, + false, + BAQ.CalculationMode.OFF, + BAQ.QualityMode.DONT_MODIFY, + null, + (byte)0); + + GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()); + SampleDataSource sampleDataSource = new SampleDataSource(); + sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader()); + + // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out? + Iterator readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter()); + LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource); + while(locusIteratorByState.hasNext()) { + locusIteratorByState.next().getLocation(); + } + reader.close(); + } + } + + private enum Downsampling { + NONE { + @Override + DownsamplingMethod create() { return DownsamplingMethod.NONE; } + }, + PER_SAMPLE { + @Override + DownsamplingMethod create() { return GATKArgumentCollection.getDefaultDownsamplingMethod(); } + }; + abstract DownsamplingMethod create(); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ExtractTag.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ExtractTag.java deleted file mode 100644 index a2a05bb55..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ExtractTag.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads.performance; - -import net.sf.samtools.SAMRecord; - -/** - * Created by IntelliJ IDEA. - * User: mhanna - * Date: Feb 25, 2011 - * Time: 10:16:53 AM - * To change this template use File | Settings | File Templates. - */ -class ExtractTag extends ReadProcessor { - private final String tag; - - public ExtractTag(final BAMProcessingPerformanceMeter performanceMeter, final String tag) { - super(performanceMeter); - this.tag = tag; - } - - @Override - public String getTestName() { return "extract tag"; } - public void processRead(final SAMRecord read) { - read.getAttribute(tag); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerBenchmark.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerBenchmark.java new file mode 100644 index 000000000..e9b75005c --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerBenchmark.java @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads.performance; + +import com.google.caliper.Param; +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker; +import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker; +import org.broadinstitute.sting.utils.classloader.JVMUtils; + +import java.io.File; +import java.lang.reflect.Field; +import java.util.Collections; + +/** + * Created by IntelliJ IDEA. + * User: mhanna + * Date: Feb 25, 2011 + * Time: 10:16:54 AM + * To change this template use File | Settings | File Templates. + */ +public class GATKWalkerBenchmark extends ReadProcessingBenchmark { + @Param + private String bamFile; + + @Param + private Integer maxReads; + + @Param + private String referenceFile; + + @Param + private WalkerType walkerType; + + @Override + public String getBAMFile() { return bamFile; } + + @Override + public Integer getMaxReads() { return maxReads; } + + @Override + public void setUp() { + super.setUp(); + } + + public void timeWalkerPerformance(final int reps) { + for(int i = 0; i < reps; i++) { + GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + + // Establish the argument collection + GATKArgumentCollection argCollection = new GATKArgumentCollection(); + argCollection.referenceFile = new File(referenceFile); + argCollection.samFiles = Collections.singletonList(inputFile.getAbsolutePath()); + + engine.setArguments(argCollection); + // Bugs in the engine mean that this has to be set twice. + engine.setSAMFileIDs(Collections.singletonList(new SAMReaderID(inputFile,new Tags()))); + engine.setFilters(Collections.singletonList(new UnmappedReadFilter())); + engine.setReferenceMetaDataFiles(Collections.emptyList()); + + // Create the walker + engine.setWalker(walkerType.create()); + + engine.execute(); + } + } + + private enum WalkerType { + COUNT_READS { + @Override + Walker create() { return new CountReadsWalker(); } + }, + COUNT_BASES_IN_READ { + @Override + Walker create() { return new CountBasesInReadPerformanceWalker(); } + }, + COUNT_LOCI { + @Override + Walker create() { + CountLociWalker walker = new CountLociWalker(); + JVMUtils.setFieldValue(JVMUtils.findField(CountLociWalker.class,"out"),walker,System.out); + return walker; + } + }; + abstract Walker create(); + } +} + +class CountBasesInReadPerformanceWalker extends ReadWalker { + private long As; + private long Cs; + private long Gs; + private long Ts; + + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { + for(byte base: read.getReadBases()) { + switch(base) { + case 'A': As++; break; + case 'C': Cs++; break; + case 'G': Gs++; break; + case 'T': Ts++; break; + } + } + return 1; + } + + public Long reduceInit() { return 0L; } + public Long reduce(Integer value, Long accum) { return value + accum; } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerInvoker.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerInvoker.java deleted file mode 100644 index 14a2d3e1e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/GATKWalkerInvoker.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads.performance; - -import net.sf.picard.filter.SamRecordFilter; -import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.gatk.walkers.Walker; - -import java.io.File; -import java.util.Collections; - -/** - * Created by IntelliJ IDEA. - * User: mhanna - * Date: Feb 25, 2011 - * Time: 10:16:54 AM - * To change this template use File | Settings | File Templates. - */ -class GATKWalkerInvoker extends ReadProcessor { - /** - * Walker to run over the existing dataset. - */ - private Walker walker; - - public GATKWalkerInvoker(BAMProcessingPerformanceMeter performanceMeter) { - super(performanceMeter); - } - - @Override - public String getTestName() { return "GATK-CountReads"; } - - public void setWalker(Walker walker) { - this.walker = walker; - } - - @Override - public void execute(File samFile, File fastaFile) { - GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); - - // Establish the argument collection - GATKArgumentCollection argCollection = new GATKArgumentCollection(); - argCollection.referenceFile = fastaFile; - argCollection.samFiles = Collections.singletonList(samFile.getAbsolutePath()); - - engine.setArguments(argCollection); - // Bugs in the engine mean that this has to be set twice. - engine.setSAMFileIDs(Collections.singletonList(new SAMReaderID(samFile,new Tags()))); - engine.setFilters(Collections.emptyList()); - engine.setReferenceMetaDataFiles(Collections.emptyList()); - - // Create the walker - engine.setWalker(walker); - - startTest(); - engine.execute(); - stopTest(); - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeLocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeLocusIteratorByState.java deleted file mode 100644 index db2280a8e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeLocusIteratorByState.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads.performance; - -import net.sf.picard.filter.FilteringIterator; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.commandline.Tags; -import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource; -import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; -import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.baq.BAQ; - -import java.io.File; -import java.util.Collections; -import java.util.Iterator; - -/** - * Created by IntelliJ IDEA. - * User: mhanna - * Date: Feb 25, 2011 - * Time: 10:16:54 AM - * To change this template use File | Settings | File Templates. - */ -class InvokeLocusIteratorByState extends ReadProcessor { - private final DownsamplingMethod downsamplingMethod; - - public InvokeLocusIteratorByState(final BAMProcessingPerformanceMeter performanceMeter,DownsamplingMethod downsamplingMethod) { - super(performanceMeter); - this.downsamplingMethod = downsamplingMethod; - } - - @Override - public String getTestName() { - if(downsamplingMethod != DownsamplingMethod.NONE) - return String.format("invoke locus iterator by state; downsampling by sample to coverage = %d; ",downsamplingMethod.toCoverage); - else - return String.format("invoke locus iterator by state; no downsampling; "); - } - - @Override - public String getIterationType() { return "loci"; } - - @Override - public void execute(File samFile, File fastaFile) { - SAMFileReader reader = new SAMFileReader(samFile); - ReadProperties readProperties = new ReadProperties(Collections.singletonList(new SAMReaderID(samFile,new Tags())), - reader.getFileHeader(), - false, - SAMFileReader.ValidationStringency.SILENT, - 0, - downsamplingMethod, - new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL)), - Collections.emptyList(), - false, - false, - BAQ.CalculationMode.OFF, - BAQ.QualityMode.DONT_MODIFY, - null, - (byte)0); - - GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()); - SampleDataSource sampleDataSource = new SampleDataSource(); - sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader()); - - // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out? - Iterator readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter()); - LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource); - startTest(); - while(locusIteratorByState.hasNext()) { - locusIteratorByState.next(); - updateIterationCount(); - } - stopTest(); - - reader.close(); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverCigarString.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverCigarString.java deleted file mode 100644 index 5836c0974..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverCigarString.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads.performance; - -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.SAMRecord; - -/** - * Created by IntelliJ IDEA. - * User: mhanna - * Date: Feb 25, 2011 - * Time: 10:16:53 AM - * To change this template use File | Settings | File Templates. - */ -class IterateOverCigarString extends ReadProcessor { - private long matchMismatches; - private long insertions; - private long deletions; - private long others; - - public IterateOverCigarString(final BAMProcessingPerformanceMeter performanceMeter) { - super(performanceMeter); - } - - @Override - public String getTestName() { return "iterator over cigar string"; } - public void processRead(final SAMRecord read) { - Cigar cigar = read.getCigar(); - for(CigarElement cigarElement: cigar.getCigarElements()) { - int elementSize = cigarElement.getLength(); - while(elementSize > 0) { - switch(cigarElement.getOperator()) { - case M: matchMismatches++; break; - case I: insertions++; break; - case D: deletions++; break; - default: others++; break; - } - elementSize--; - } - } - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverEachBase.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverEachBase.java deleted file mode 100644 index 3c019841d..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/IterateOverEachBase.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads.performance; - -import net.sf.samtools.SAMRecord; - -/** - * Created by IntelliJ IDEA. - * User: mhanna - * Date: Feb 25, 2011 - * Time: 10:16:53 AM - * To change this template use File | Settings | File Templates. - */ -class IterateOverEachBase extends ReadProcessor { - private long As; - private long Cs; - private long Gs; - private long Ts; - - public IterateOverEachBase(final BAMProcessingPerformanceMeter performanceMeter) { - super(performanceMeter); - } - - @Override - public String getTestName() { return "iterate over each base"; } - public void processRead(final SAMRecord read) { - for(byte base: read.getReadBases()) { - switch(base) { - case 'A': As++; break; - case 'C': Cs++; break; - case 'G': Gs++; break; - case 'T': Ts++; break; - } - } - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/NoAdditionalProcessing.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/NoAdditionalProcessing.java deleted file mode 100644 index 9e2afd548..000000000 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/NoAdditionalProcessing.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reads.performance; - -import net.sf.samtools.SAMRecord; - -/** - * Created by IntelliJ IDEA. - * User: mhanna - * Date: Feb 25, 2011 - * Time: 10:16:53 AM - * To change this template use File | Settings | File Templates. - */ -class NoAdditionalProcessing extends ReadProcessor { - public NoAdditionalProcessing(final BAMProcessingPerformanceMeter performanceMeter) { - super(performanceMeter); - } - - @Override - public String getTestName() { return "no additional processing"; } - public void processRead(final SAMRecord read) {} -} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/PicardBaselineBenchmark.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/PicardBaselineBenchmark.java new file mode 100644 index 000000000..1c22d7663 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/PicardBaselineBenchmark.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads.performance; + +import com.google.caliper.Param; +import com.google.caliper.SimpleBenchmark; +import net.sf.picard.util.SamLocusIterator; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.CloseableIterator; + +import java.io.File; +import java.util.Iterator; + +/** + * Created by IntelliJ IDEA. + * User: mhanna + * Date: Apr 22, 2011 + * Time: 3:51:06 PM + * To change this template use File | Settings | File Templates. + */ +public class PicardBaselineBenchmark extends ReadProcessingBenchmark { + @Param + private String bamFile; + + @Param + private Integer maxReads; + + @Override + public String getBAMFile() { return bamFile; } + + @Override + public Integer getMaxReads() { return maxReads; } + + public void timeDecompressBamFile(int reps) { + for(int i = 0; i < reps; i++) { + SAMFileReader reader = new SAMFileReader(inputFile); + CloseableIterator iterator = reader.iterator(); + while(iterator.hasNext()) + iterator.next(); + iterator.close(); + reader.close(); + } + } + + public void timeExtractTag(int reps) { + for(int i = 0; i < reps; i++) { + SAMFileReader reader = new SAMFileReader(inputFile); + CloseableIterator iterator = reader.iterator(); + while(iterator.hasNext()) { + SAMRecord read = iterator.next(); + read.getAttribute("OQ"); + } + iterator.close(); + reader.close(); + } + } + + public void timeSamLocusIterator(int reps) { + for(int i = 0; i < reps; i++) { + SAMFileReader reader = new SAMFileReader(inputFile); + long loci = 0; + + SamLocusIterator samLocusIterator = new SamLocusIterator(reader); + samLocusIterator.setEmitUncoveredLoci(false); + Iterator workhorseIterator = samLocusIterator.iterator(); + + while(workhorseIterator.hasNext()) { + SamLocusIterator.LocusInfo locusInfo = workhorseIterator.next(); + // Use the value of locusInfo to avoid optimization. + if(locusInfo != null) loci++; + } + System.out.printf("Total loci = %d%n",loci); + + reader.close(); + } + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeSamLocusIterator.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ReadProcessingBenchmark.java similarity index 50% rename from java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeSamLocusIterator.java rename to java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ReadProcessingBenchmark.java index f55dfcec1..206cb9e4a 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/InvokeSamLocusIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/ReadProcessingBenchmark.java @@ -24,47 +24,59 @@ package org.broadinstitute.sting.gatk.datasources.reads.performance; -import net.sf.picard.util.SamLocusIterator; +import com.google.caliper.Param; +import com.google.caliper.SimpleBenchmark; import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMFileWriterFactory; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; -import java.util.Iterator; +import java.io.IOException; /** * Created by IntelliJ IDEA. * User: mhanna - * Date: Feb 25, 2011 - * Time: 10:16:54 AM + * Date: Apr 22, 2011 + * Time: 4:04:38 PM * To change this template use File | Settings | File Templates. */ -class InvokeSamLocusIterator extends ReadProcessor { - public InvokeSamLocusIterator(final BAMProcessingPerformanceMeter performanceMeter) { - super(performanceMeter); - } +public abstract class ReadProcessingBenchmark extends SimpleBenchmark { + protected abstract String getBAMFile(); + protected abstract Integer getMaxReads(); + + protected File inputFile; @Override - public String getTestName() { - return String.format("invoke sam locus iterator"); - } + public void setUp() { + SAMFileReader fullInputFile = new SAMFileReader(new File(getBAMFile())); - @Override - public String getIterationType() { return "loci"; } - - @Override - public void execute(File samFile, File fastaFile) { - SAMFileReader reader = new SAMFileReader(samFile); - - SamLocusIterator samLocusIterator = new SamLocusIterator(reader); - samLocusIterator.setEmitUncoveredLoci(false); - Iterator workhorseIterator = samLocusIterator.iterator(); - - startTest(); - while(workhorseIterator.hasNext()) { - SamLocusIterator.LocusInfo locusInfo = workhorseIterator.next(); - updateIterationCount(); + File tempFile = null; + try { + tempFile = File.createTempFile("testfile_"+getMaxReads(),".bam"); } - stopTest(); + catch(IOException ex) { + throw new ReviewedStingException("Unable to create temporary BAM",ex); + } + SAMFileWriterFactory factory = new SAMFileWriterFactory(); + factory.setCreateIndex(true); + SAMFileWriter writer = factory.makeBAMWriter(fullInputFile.getFileHeader(),true,tempFile); - reader.close(); + long numReads = 0; + for(SAMRecord read: fullInputFile) { + if(numReads++ >= getMaxReads()) + break; + writer.addAlignment(read); + } + + writer.close(); + + inputFile = tempFile; + } + + @Override + public void tearDown() { + inputFile.delete(); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/TheoreticalMinimaBenchmark.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/TheoreticalMinimaBenchmark.java new file mode 100644 index 000000000..1ad286e97 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/performance/TheoreticalMinimaBenchmark.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.reads.performance; + +import com.google.caliper.Param; +import net.sf.samtools.Cigar; +import net.sf.samtools.CigarElement; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.CloseableIterator; + +import java.io.File; + +/** + * Created by IntelliJ IDEA. + * User: mhanna + * Date: Apr 22, 2011 + * Time: 4:01:23 PM + * To change this template use File | Settings | File Templates. + */ +public class TheoreticalMinimaBenchmark extends ReadProcessingBenchmark { + @Param + private String bamFile; + + @Param + private Integer maxReads; + + @Override + public String getBAMFile() { return bamFile; } + + @Override + public Integer getMaxReads() { return maxReads; } + + public void timeIterateOverEachBase(int reps) { + System.out.printf("Processing " + inputFile); + for(int i = 0; i < reps; i++) { + SAMFileReader reader = new SAMFileReader(inputFile); + CloseableIterator iterator = reader.iterator(); + + long As=0,Cs=0,Gs=0,Ts=0; + while(iterator.hasNext()) { + SAMRecord read = iterator.next(); + for(byte base: read.getReadBases()) { + switch(base) { + case 'A': As++; break; + case 'C': Cs++; break; + case 'G': Gs++; break; + case 'T': Ts++; break; + } + } + } + System.out.printf("As = %d; Cs = %d; Gs = %d; Ts = %d; total = %d%n",As,Cs,Gs,Ts,As+Cs+Gs+Ts); + iterator.close(); + reader.close(); + } + } + + public void timeIterateOverCigarString(int reps) { + for(int i = 0; i < reps; i++) { + long matchMismatches = 0; + long insertions = 0; + long deletions = 0; + long others = 0; + + SAMFileReader reader = new SAMFileReader(inputFile); + CloseableIterator iterator = reader.iterator(); + while(iterator.hasNext()) { + SAMRecord read = iterator.next(); + + Cigar cigar = read.getCigar(); + for(CigarElement cigarElement: cigar.getCigarElements()) { + int elementSize = cigarElement.getLength(); + while(elementSize > 0) { + switch(cigarElement.getOperator()) { + case M: matchMismatches++; break; + case I: insertions++; break; + case D: deletions++; break; + default: others++; break; + } + elementSize--; + } + } + } + System.out.printf("Ms = %d; Is = %d; Ds = %d; others = %d; total = %d%n",matchMismatches,insertions,deletions,others,matchMismatches+insertions+deletions+others); + + iterator.close(); + reader.close(); + } + } + +} diff --git a/settings/ivysettings.xml b/settings/ivysettings.xml index 4a773e3fa..cf9373dda 100644 --- a/settings/ivysettings.xml +++ b/settings/ivysettings.xml @@ -11,6 +11,7 @@ + @@ -24,5 +25,7 @@ + +