From cb2e26a00436624c5e3fa7feabea13e6badae2b8 Mon Sep 17 00:00:00 2001 From: aaron Date: Sat, 6 Nov 2010 18:59:06 +0000 Subject: [PATCH] by request, an indexer tool to create Tribble style indexes outside of the GATK git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4632 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/refdata/indexer/RMDIndexer.java | 110 ++++++++++++++++++ packages/RMDIndexer.xml | 12 ++ 2 files changed, 122 insertions(+) create mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java create mode 100644 packages/RMDIndexer.xml diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java new file mode 100644 index 000000000..fa248badb --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -0,0 +1,110 @@ +package org.broadinstitute.sting.gatk.refdata.indexer; + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.apache.log4j.Logger; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.index.Index; +import org.broad.tribble.index.IndexFactory; +import org.broad.tribble.util.LittleEndianOutputStream; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.utils.Utils; + +import java.io.File; +import java.io.FileOutputStream; +import java.util.Map; + +/** + * a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory + * in which an index is located, or if you'd like to pre-index files to save time. + */ +public class RMDIndexer extends CommandLineProgram { + @Argument(shortName="in", fullName="inputFile", doc="The reference meta data file to index", required = true) + File inputFileSource = null; + + @Argument(shortName="t", fullName="type", doc="The reference meta data file format (e.g. vcf, bed)", required = true) + String inputFileType = null; + + @Input(fullName = "referenceSequence", shortName = "R", doc = "The reference to use when indexing; this sequence will be set in the index", required = true) + public File referenceFile = null; + + @Input(shortName = "i", fullName = "indexFile", doc = "Where to write the index to (as a file), if not supplied we write to .idx", required = false) + public File indexFile = null; + + @Argument(shortName = "ba", fullName = "balanceApproach", doc="the index balancing approach to take", required=false) + IndexFactory.IndexBalanceApproach approach = IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME; + + private static Logger logger = Logger.getLogger(RMDIndexer.class); + + @Override + protected int execute() throws Exception { + logger.info(String.format("attempting to index file: %s",inputFileSource)); + logger.info(String.format("using reference: %s",referenceFile.getAbsolutePath())); + logger.info(String.format("using type: %s",inputFileType)); + logger.info(String.format("writing to location: %s",indexFile.getAbsolutePath())); + + // check parameters + // --------------------------------------------------------------------------------- + + // check the input parameters + if (referenceFile == null || !referenceFile.canRead()) + throw new IllegalArgumentException("We can't read the reference file: " + + referenceFile + ", check that it exists, and that you have permissions to read it"); + + // create a reference file reader + IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile); + + // check that we can create the output file + if (indexFile == null || indexFile.exists()) + throw new IllegalArgumentException("We can't write to the index file location: " + + indexFile + ", the index exists"); + + + // try to index the file + // --------------------------------------------------------------------------------- + + // get a track builder + RMDTrackBuilder builder = new RMDTrackBuilder(); + + // find the types available to the track builders + Map typeMapping = builder.getAvailableTrackNamesAndTypes(); + + // check that the type is valid + if (!typeMapping.containsKey(inputFileType)) + throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet())); + + // create the codec + FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType)); + + // get some timing info + long currentTime = System.currentTimeMillis(); + + Index index = IndexFactory.createIndex(inputFileSource, codec, approach); + + // create the output stream, and write the index + LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile)); + index.write(stream); + stream.close(); + + // report and exit + logger.info("Successfully wrote the index to location: " + indexFile + " in " + ((System.currentTimeMillis() - currentTime)/1000) + " seconds"); + return 0; // return successfully + } + + + /** + * the generic call execute main + * @param argv the arguments from the command line + */ + public static void main(String[] argv) { + try { + RMDIndexer instance = new RMDIndexer(); + start(instance, argv); + System.exit(CommandLineProgram.result); + } catch (Exception e) { + exitSystemWithError(e); + } + } +} diff --git a/packages/RMDIndexer.xml b/packages/RMDIndexer.xml new file mode 100644 index 000000000..def5bed9c --- /dev/null +++ b/packages/RMDIndexer.xml @@ -0,0 +1,12 @@ + + + + + + + + + + + +