diff --git a/R/analyzeRodProfile.R b/R/analyzeRodProfile.R new file mode 100755 index 000000000..4d57d72d3 --- /dev/null +++ b/R/analyzeRodProfile.R @@ -0,0 +1,15 @@ +#!/bin/env Rscript + +args <- commandArgs(TRUE) +verbose = TRUE + +d = read.table(args[1],head=T) +outfile = args[2] +title = args[3] + +# ----------------------------------------------------------------------------------------------- +# plot timing +# ----------------------------------------------------------------------------------------------- +pdf(outfile, height=5, width=8) +boxplot(d$walltime ~ d$operation, ylab = "Elapsed wall time in seconds [Log10 Scale]", log="y", main=title, cex.axis=0.75) +dev.off() diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 6bb6d19dc..4a9c6f819 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -297,6 +297,19 @@ public class RefMetaDataTracker { return contexts.iterator().next(); } + /** + * Very simple accessor that gets the first (and only!) VC associated with name at the current location, or + * null if there's no binding here. + * + * @param ref + * @param name + * @param curLocation + * @return + */ + public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) { + return getVariantContext(ref, name, null, curLocation, true); + } + private void addVariantContexts(Collection contexts, RODRecordList rodList, ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { for ( GATKFeature rec : rodList ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java new file mode 100755 index 000000000..fa75542d7 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ProfileRodSystem.java @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.util.ParsingUtils; +import org.broad.tribble.vcf.VCFConstants; +import org.broad.tribble.vcf.VCFCodec; +import org.broad.tribble.readers.LineReader; +import org.broad.tribble.readers.AsciiLineReader; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.utils.Utils; + +import java.io.PrintStream; +import java.io.File; +import java.io.FileInputStream; +import java.util.*; + +/** + * Emits specific fields as dictated by the user from one or more VCF files. + */ +@Requires(value={}) +public class ProfileRodSystem extends RodWalker { + @Output(doc="File to which results should be written",required=true) + protected PrintStream out; + + @Argument(fullName="nIterations", shortName="N", doc="Number of raw reading iterations to perform", required=false) + int nIterations = 1; + + @Argument(fullName="maxRecords", shortName="M", doc="Max. number of records to process", required=false) + int MAX_RECORDS = -1; + + public void initialize() { + File rodFile = getRodFile(); + + out.printf("# walltime is in seconds%n"); + out.printf("# file is %s%n", rodFile); + out.printf("# file size is %d bytes%n", rodFile.length()); + out.printf("operation\titeration\twalltime%n"); + for ( int i = 0; i < nIterations; i++ ) { + out.printf("read.bytes\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.BY_BYTE)); + out.printf("read.line\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.BY_LINE)); + out.printf("line.and.parts\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.BY_PARTS)); + out.printf("decode.loc\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.DECODE_LOC)); + out.printf("full.decode\t%d\t%.2f%n", i, readFile(rodFile, ReadMode.DECODE)); + } + + startTimer(); // start up timer for map itself + } + + private enum ReadMode { BY_BYTE, BY_LINE, BY_PARTS, DECODE_LOC, DECODE }; + + private final double readFile(File f, ReadMode mode) { + startTimer(); + + try { + byte[] data = new byte[100000]; + FileInputStream s = new FileInputStream(f); + + if ( mode == ReadMode.BY_BYTE ) { + while (true) { + if ( s.read(data) == -1 ) + break; + } + } else { + int counter = 0; + VCFCodec codec = new VCFCodec(); + String[] parts = new String[100000]; + AsciiLineReader lineReader = new AsciiLineReader(s); + + if ( mode == ReadMode.DECODE_LOC || mode == ReadMode.DECODE ) + codec.readHeader(lineReader); + + while (counter++ < MAX_RECORDS || MAX_RECORDS == -1) { + String line = lineReader.readLine(); + if ( line == null ) + break; + else if ( mode == ReadMode.BY_PARTS ) { + ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR); + } + else if ( mode == ReadMode.DECODE_LOC ) { + codec.decodeLoc(line); + } + else if ( mode == ReadMode.DECODE ) { + processOneVC((VariantContext)codec.decode(line)); + } + } + } + } catch ( Exception e ) { + throw new RuntimeException(e); + } + + return elapsedTime(); + } + + private long startTime = -1l; + private void startTimer() { + startTime = System.currentTimeMillis(); + } + + private double elapsedTime() { + final long curTime = System.currentTimeMillis(); + return (curTime - startTime) / 1000.0; + } + + private File getRodFile() { + List rods = this.getToolkit().getRodDataSources(); + ReferenceOrderedDataSource rod = rods.get(0); + return rod.getReferenceOrderedData().getFile(); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) // RodWalkers can make funky map calls + return 0; + + VariantContext vc = tracker.getVariantContext(ref, "rod", context.getLocation()); + processOneVC(vc); + + return 0; + } + + private static final void processOneVC(VariantContext vc) { + vc.getNSamples(); // force us to parse the samples + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + + public void onTraversalDone(Integer sum) { + out.printf("gatk.traversal\t%d\t%.2f%n", 0, elapsedTime()); + } +} \ No newline at end of file