/* * Copyright (c) 2012 The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.variant.variantcontext; import com.google.caliper.Param; import com.google.caliper.SimpleBenchmark; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.variant.vcf.VCFCodec; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; /** * Caliper microbenchmark of parsing a VCF file */ public class VariantContextBenchmark extends SimpleBenchmark { @Param({"/Users/depristo/Desktop/broadLocal/localData/ALL.chr20.merged_beagle_mach.20101123.snps_indels_svs.genotypes.vcf"}) String vcfFile; @Param({"1000"}) int linesToRead; // set automatically by framework @Param({"100"}) int nSamplesToTake; // set automatically by framework @Param({"10"}) int dupsToMerge; // set automatically by framework @Param Operation operation; // set automatically by framework private String INPUT_STRING; public enum Operation { READ, SUBSET_TO_SAMPLES, GET_TYPE, GET_ID, GET_GENOTYPES, GET_ATTRIBUTE_STRING, GET_ATTRIBUTE_INT, GET_N_SAMPLES, GET_GENOTYPES_FOR_SAMPLES, GET_GENOTYPES_IN_ORDER_OF_NAME, CALC_GENOTYPE_COUNTS, MERGE } private GenomeLocParser b37GenomeLocParser; @Override protected void setUp() { // TODO -- update for new tribble interface // try { // ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.b37KGReference)); // b37GenomeLocParser = new GenomeLocParser(seq); // } catch ( FileNotFoundException e) { // throw new RuntimeException(e); // } // // // read it into a String so that we don't try to benchmark IO issues // try { // FileInputStream s = new FileInputStream(new File(vcfFile)); // AsciiLineReader lineReader = new AsciiLineReader(s); // int counter = 0; // StringBuffer sb = new StringBuffer(); // while (counter++ < linesToRead ) { // String line = lineReader.readLine(); // if ( line == null ) // break; // sb.append(line + "\n"); // } // s.close(); // INPUT_STRING = sb.toString(); // } catch (IOException e) { // throw new RuntimeException(e); // } } private interface FunctionToBenchmark { public void run(T vc); } private void runBenchmark(FeatureCodec codec, FunctionToBenchmark func) { // TODO -- update for new Tribble interface // try { // InputStream is = new ByteArrayInputStream(INPUT_STRING.getBytes()); // AsciiLineReader lineReader = new AsciiLineReader(is); // codec.readHeader(lineReader); // // int counter = 0; // while (counter++ < linesToRead ) { // String line = lineReader.readLine(); // if ( line == null ) // break; // // T vc = codec.decode(line); // func.run(vc); // } // } catch (Exception e) { // System.out.println("Benchmarking run failure because of " + e.getMessage()); // } } public void timeV14(int rep) { for ( int i = 0; i < rep; i++ ) { FunctionToBenchmark func = getV14FunctionToBenchmark(); FeatureCodec codec = new VCFCodec(); runBenchmark(codec, func); } } public FunctionToBenchmark getV14FunctionToBenchmark() { switch ( operation ) { case READ: return new FunctionToBenchmark() { public void run(final VariantContext vc) { ; // empty operation } }; case SUBSET_TO_SAMPLES: return new FunctionToBenchmark() { Set samples; public void run(final VariantContext vc) { if ( samples == null ) samples = new HashSet(new ArrayList(vc.getSampleNames()).subList(0, nSamplesToTake)); VariantContext sub = vc.subContextFromSamples(samples); sub.getNSamples(); } }; case GET_TYPE: return new FunctionToBenchmark() { public void run(final VariantContext vc) { vc.getType(); } }; case GET_ID: return new FunctionToBenchmark() { public void run(final VariantContext vc) { vc.getID(); } }; case GET_GENOTYPES: return new FunctionToBenchmark() { public void run(final VariantContext vc) { vc.getGenotypes().size(); } }; case GET_GENOTYPES_FOR_SAMPLES: return new FunctionToBenchmark() { Set samples; public void run(final VariantContext vc) { if ( samples == null ) samples = new HashSet(new ArrayList(vc.getSampleNames()).subList(0, nSamplesToTake)); vc.getGenotypes(samples).size(); } }; case GET_ATTRIBUTE_STRING: return new FunctionToBenchmark() { public void run(final VariantContext vc) { vc.getAttribute("AN", null); } }; case GET_ATTRIBUTE_INT: return new FunctionToBenchmark() { public void run(final VariantContext vc) { vc.getAttributeAsInt("AC", 0); } }; case GET_N_SAMPLES: return new FunctionToBenchmark() { public void run(final VariantContext vc) { vc.getNSamples(); } }; case GET_GENOTYPES_IN_ORDER_OF_NAME: return new FunctionToBenchmark() { public void run(final VariantContext vc) { ; // TODO - TEST IS BROKEN // int n = 0; // for ( final Genotype g: vc.getGenotypesOrderedByName() ) n++; } }; case CALC_GENOTYPE_COUNTS: return new FunctionToBenchmark() { public void run(final VariantContext vc) { vc.getHetCount(); } }; case MERGE: return new FunctionToBenchmark() { public void run(final VariantContext vc) { List toMerge = new ArrayList(); for ( int i = 0; i < dupsToMerge; i++ ) { GenotypesContext gc = GenotypesContext.create(vc.getNSamples()); for ( final Genotype g : vc.getGenotypes() ) { gc.add(new GenotypeBuilder(g).name(g.getSampleName()+"_"+i).make()); } toMerge.add(new VariantContextBuilder(vc).genotypes(gc).make()); } VariantContextUtils.simpleMerge(toMerge, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, true, false, "set", false, true); } }; default: throw new IllegalArgumentException("Unexpected operation " + operation); } } // -------------------------------------------------------------------------------- // // V13 // // In order to use this, you must move the v13 version from archive and uncomment // // git mv private/archive/java/src/org/broadinstitute/sting/utils/variantcontext/v13 public/java/test/org/broadinstitute/sting/utils/variantcontext/v13 // // -------------------------------------------------------------------------------- // public void timeV13(int rep) { // for ( int i = 0; i < rep; i++ ) { // FunctionToBenchmark func = getV13FunctionToBenchmark(); // FeatureCodec codec = new org.broadinstitute.variant.variantcontext.v13.VCFCodec(); // runBenchmark(codec, func); // } // } // // public FunctionToBenchmark getV13FunctionToBenchmark() { // switch ( operation ) { // case READ: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // ; // empty operation // } // }; // case SUBSET_TO_SAMPLES: // return new FunctionToBenchmark() { // List samples; // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // if ( samples == null ) // samples = new ArrayList(vc.getSampleNames()).subList(0, nSamplesToTake); // org.broadinstitute.variant.variantcontext.v13.VariantContext sub = vc.subContextFromGenotypes(vc.getGenotypes(samples).values()); // sub.getNSamples(); // } // }; // // case GET_TYPE: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getType(); // } // }; // case GET_ID: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getID(); // } // }; // case GET_GENOTYPES: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getGenotypes().size(); // } // }; // // case GET_GENOTYPES_FOR_SAMPLES: // return new FunctionToBenchmark() { // Set samples; // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // if ( samples == null ) // samples = new HashSet(new ArrayList(vc.getSampleNames()).subList(0, nSamplesToTake)); // vc.getGenotypes(samples).size(); // } // }; // // case GET_ATTRIBUTE_STRING: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getExtendedAttribute("AN", null); // } // }; // // case GET_ATTRIBUTE_INT: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getAttributeAsInt("AC", 0); // } // }; // // case GET_N_SAMPLES: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getNSamples(); // } // }; // // case GET_GENOTYPES_IN_ORDER_OF_NAME: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // ; // TODO - TEST IS BROKEN // //vc.getGenotypesOrderedByName(); // } // }; // // case CALC_GENOTYPE_COUNTS: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // vc.getHetCount(); // } // }; // // case MERGE: // return new FunctionToBenchmark() { // public void run(final org.broadinstitute.variant.variantcontext.v13.VariantContext vc) { // List toMerge = new ArrayList(); // // for ( int i = 0; i < dupsToMerge; i++ ) { // Map gc = new HashMap(); // for ( final org.broadinstitute.variant.variantcontext.v13.Genotype g : vc.getGenotypes().values() ) { // String name = g.getSampleName()+"_"+i; // gc.put(name, new org.broadinstitute.variant.variantcontext.v13.Genotype(name, // g.getAlleles(), g.getLog10PError(), g.getFilters(), g.getAttributes(), g.isPhased(), g.getLikelihoods().getAsVector())); // toMerge.add(org.broadinstitute.variant.variantcontext.v13.VariantContext.modifyGenotypes(vc, gc)); // } // } // // org.broadinstitute.variant.variantcontext.v13.VariantContextUtils.simpleMerge(b37GenomeLocParser, // toMerge, null, // org.broadinstitute.variant.variantcontext.v13.VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, // org.broadinstitute.variant.variantcontext.v13.VariantContextUtils.GenotypeMergeType.UNSORTED, // true, false, "set", false, true); // } // }; // // default: throw new IllegalArgumentException("Unexpected operation " + operation); // } // } public static void main(String[] args) { com.google.caliper.Runner.main(VariantContextBenchmark.class, args); } }