diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java index f1a6d57c6..c46f012b6 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java @@ -52,6 +52,17 @@ import java.util.Map; * * This class keeps track of the available codecs, and knows how to put together a track of * that gets iterators from the FeatureReader using Tribble. + * + * Here's an example run command to find SNPs 200 base pairs up and downstream of the target file. + * + * java -jar dist/GenomeAnalysisTK.jar \ + * -R /broad/1KG/reference/human_b36_both.fasta \ + * -L 1:1863 \ + * -L MT:16520 \ + * -db /humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b36.rod \ + * -dbw 200 \ + * -l INFO \ + * -T DbSNPWindowCounter */ public class TribbleRMDTrackBuilder extends PluginManager implements RMDTrackBuilder { /** @@ -92,6 +103,19 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException { // make a feature reader FeatureReader reader; + reader = createFeatureReader(targetClass, inputFile); + // return a feature reader track + return new FeatureReaderTrack(targetClass, name, inputFile, reader); + } + + /** + * create a feature reader of the specified type + * @param targetClass the target codec type + * @param inputFile the input file to create the track from (of the codec type) + * @return the FeatureReader instance + */ + public FeatureReader createFeatureReader(Class targetClass, File inputFile) { + FeatureReader reader = null; try { // check to see if the input file has an index if (requireIndex(inputFile)) { @@ -107,8 +131,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen } catch (IOException e) { throw new StingException("Unable to make the index file for " + inputFile, e); } - // return a feature reader track - return new FeatureReaderTrack(targetClass, name, inputFile, reader); + return reader; } /** @@ -120,13 +143,13 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen */ public static LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException { LinearIndexCreator create = new LinearIndexCreator(inputFile, codec); - create.setDisplayProgress(false); // don't display progress indicators - + // if we can write the index, we should, but if not just create it in memory - if (new File(inputFile.getAbsoluteFile() + linearIndexExtension).canWrite()) + File indexFile = new File(inputFile.getAbsoluteFile() + linearIndexExtension); + if (indexFile.getParentFile().canWrite() && (!indexFile.exists() || indexFile.canWrite())) return create.createIndex(); else { - logger.info("Unable to write to location " + inputFile.getAbsoluteFile() + linearIndexExtension + " for index file, creating index in memory only"); + logger.info("Unable to write to location " + indexFile + " for index file, creating index in memory only"); return create.createIndex(null); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java new file mode 100644 index 000000000..30b84a3dd --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java @@ -0,0 +1,81 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broad.tribble.FeatureIterator; +import org.broad.tribble.FeatureReader; +import org.broad.tribble.dbsnp.DbSNPCodec; +import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; +import org.broadinstitute.sting.gatk.walkers.By; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.utils.StingException; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; + +/** + * DbSNPWindowCounter + * + * Count the number of upstream and downstream dbSNP entries from the current position using the specified window size. + * (really the window size upstream and downstream, so windowSize * 2) + * + * @Author Aaron + * @Date May 7th, 2010 + */ +@By(DataSource.REFERENCE) +@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES}) +public class DbSNPWindowCounter extends LocusWalker { + + // what we read in new tracks with + private FeatureReader reader; + + @Argument(fullName = "dbSNPFile", shortName = "db", doc="The dbsnp file to search upstream and downstream for nearby snps", required = true) + private File myDbSNPFile; + + @Argument(fullName = "dbSNPWindowSize", shortName = "dbw", doc="The distance to look both upstream and downstream for SNPs", required = true) + private int windowSize; + + + public void initialize() { + TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder(); + reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile); + } + + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + FeatureIterator dbSNPs; + + // our upstream and downstream window locations + int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0); + int windowStop = (int)context.getLocation().getStop()+windowSize; + + // query the dnSNP iterator + try { + dbSNPs = reader.query(context.getContig(), + windowStart, + windowStop); + } catch (IOException e) { + throw new StingException("Unable to query dbSNP track due to IO Exception",e); + } + + // count the number of dbSNPs we've seen + int counter = 0; + for (DbSNPFeature feature: dbSNPs) + counter++; + out.println(context.getContig() + ":" + windowStart + "-" + context.getContig() + ":" + windowStop + "=" + + counter + " (dnSNP records)"); + return 1; + } + + public Long reduceInit() { return 0l; } + + public Long reduce(Integer value, Long sum) { + return value + sum; + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java index af39ecd98..b5a5d5ac3 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java @@ -23,6 +23,7 @@ package org.broadinstitute.sting.gatk.refdata.tracks.builders; + import org.broad.tribble.vcf.VCFCodec; import org.broadinstitute.sting.BaseTest; import org.junit.Assert; diff --git a/settings/repository/org.broad/tribble-77.xml b/settings/repository/org.broad/tribble-77.xml deleted file mode 100644 index 0db99f214..000000000 --- a/settings/repository/org.broad/tribble-77.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/org.broad/tribble-77.jar b/settings/repository/org.broad/tribble-79.jar similarity index 82% rename from settings/repository/org.broad/tribble-77.jar rename to settings/repository/org.broad/tribble-79.jar index fcc69ef14..2d2da49da 100644 Binary files a/settings/repository/org.broad/tribble-77.jar and b/settings/repository/org.broad/tribble-79.jar differ diff --git a/settings/repository/org.broad/tribble-79.xml b/settings/repository/org.broad/tribble-79.xml new file mode 100644 index 000000000..9d57d65d0 --- /dev/null +++ b/settings/repository/org.broad/tribble-79.xml @@ -0,0 +1,3 @@ + + +