First working version of VariantContextBenchmark

This commit is contained in:
Mark DePristo 2011-11-11 09:56:00 -05:00
parent ee40791776
commit e216e85465
1 changed files with 121 additions and 0 deletions

View File

@ -0,0 +1,121 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import com.google.caliper.runner.CaliperMain;
import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import java.io.*;
import java.util.*;
/**
* Caliper microbenchmark of parsing a VCF file
*/
public class VariantContextBenchmark extends SimpleBenchmark {
@Param({"/Users/depristo/Desktop/broadLocal/localData/ALL.chr20.merged_beagle_mach.20101123.snps_indels_svs.genotypes.vcf"})
String vcfFile;
@Param({"1000"})
int linesToRead; // set automatically by framework
@Param({"100"})
int nSamplesToTake; // set automatically by framework
private String INPUT_STRING;
private enum Operation {
READ,
READ_SUBSET
}
@Override protected void setUp() {
// read it into a String so that we don't try to benchmark IO issues
try {
FileInputStream s = new FileInputStream(new File(vcfFile));
AsciiLineReader lineReader = new AsciiLineReader(s);
int counter = 0;
StringBuffer sb = new StringBuffer();
while (counter++ < linesToRead ) {
String line = lineReader.readLine();
if ( line == null )
break;
sb.append(line + "\n");
}
s.close();
INPUT_STRING = sb.toString();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private void parseGenotypes(VCFCodec codec, Operation op) {
try {
InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes());
AsciiLineReader lineReader = new AsciiLineReader(is);
codec.readHeader(lineReader);
int counter = 0;
List<String> samples = null;
while (counter++ < linesToRead ) {
String line = lineReader.readLine();
if ( line == null )
break;
VariantContext vc = (VariantContext)codec.decode(line);
if ( samples == null ) {
samples = new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake);
}
if ( op == Operation.READ_SUBSET)
processOneVC(vc, samples);
}
} catch (Exception e) {
System.out.println("Benchmarking run failure because of " + e.getMessage());
}
}
public void timeOriginalRead(int rep) {
for ( int i = 0; i < rep; i++ )
parseGenotypes(new VCFCodec(), Operation.READ);
}
public void timeOriginalReadSubset(int rep) {
for ( int i = 0; i < rep; i++ )
parseGenotypes(new VCFCodec(), Operation.READ_SUBSET);
}
public static void main(String[] args) {
CaliperMain.main(VariantContextBenchmark.class, args);
}
private static final void processOneVC(VariantContext vc, List<String> samples) {
VariantContext sub = vc.subContextFromGenotypes(vc.getGenotypes(samples).values(), vc.getAlleles());
sub.getNSamples();
}
}