example windowed ROD walker for Kristian, and updates to Tribble

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3325 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-05-07 17:12:50 +00:00
parent 57f254b13a
commit 7d2df3f511
6 changed files with 114 additions and 9 deletions

View File

@ -52,6 +52,17 @@ import java.util.Map;
*
* This class keeps track of the available codecs, and knows how to put together a track of
* that gets iterators from the FeatureReader using Tribble.
*
* Here's an example run command to find SNPs 200 base pairs up and downstream of the target file.
*
* java -jar dist/GenomeAnalysisTK.jar \
* -R /broad/1KG/reference/human_b36_both.fasta \
* -L 1:1863 \
* -L MT:16520 \
* -db /humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b36.rod \
* -dbw 200 \
* -l INFO \
* -T DbSNPWindowCounter
*/
public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implements RMDTrackBuilder {
/**
@ -92,6 +103,19 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
// make a feature reader
FeatureReader reader;
reader = createFeatureReader(targetClass, inputFile);
// return a feature reader track
return new FeatureReaderTrack(targetClass, name, inputFile, reader);
}
/**
* create a feature reader of the specified type
* @param targetClass the target codec type
* @param inputFile the input file to create the track from (of the codec type)
* @return the FeatureReader instance
*/
public FeatureReader createFeatureReader(Class targetClass, File inputFile) {
FeatureReader reader = null;
try {
// check to see if the input file has an index
if (requireIndex(inputFile)) {
@ -107,8 +131,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
} catch (IOException e) {
throw new StingException("Unable to make the index file for " + inputFile, e);
}
// return a feature reader track
return new FeatureReaderTrack(targetClass, name, inputFile, reader);
return reader;
}
/**
@ -120,13 +143,13 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
*/
public static LinearIndex createIndex(File inputFile, FeatureCodec codec) throws IOException {
LinearIndexCreator create = new LinearIndexCreator(inputFile, codec);
create.setDisplayProgress(false); // don't display progress indicators
// if we can write the index, we should, but if not just create it in memory
if (new File(inputFile.getAbsoluteFile() + linearIndexExtension).canWrite())
File indexFile = new File(inputFile.getAbsoluteFile() + linearIndexExtension);
if (indexFile.getParentFile().canWrite() && (!indexFile.exists() || indexFile.canWrite()))
return create.createIndex();
else {
logger.info("Unable to write to location " + inputFile.getAbsoluteFile() + linearIndexExtension + " for index file, creating index in memory only");
logger.info("Unable to write to location " + indexFile + " for index file, creating index in memory only");
return create.createIndex(null);
}

View File

@ -0,0 +1,81 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.FeatureIterator;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.dbsnp.DbSNPCodec;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* DbSNPWindowCounter
*
* Count the number of upstream and downstream dbSNP entries from the current position using the specified window size.
* (really the window size upstream and downstream, so windowSize * 2)
*
* @Author Aaron
* @Date May 7th, 2010
*/
@By(DataSource.REFERENCE)
@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES})
public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
// what we read in new tracks with
private FeatureReader reader;
@Argument(fullName = "dbSNPFile", shortName = "db", doc="The dbsnp file to search upstream and downstream for nearby snps", required = true)
private File myDbSNPFile;
@Argument(fullName = "dbSNPWindowSize", shortName = "dbw", doc="The distance to look both upstream and downstream for SNPs", required = true)
private int windowSize;
public void initialize() {
TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
reader = builder.createFeatureReader(DbSNPCodec.class,myDbSNPFile);
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
FeatureIterator<DbSNPFeature> dbSNPs;
// our upstream and downstream window locations
int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0);
int windowStop = (int)context.getLocation().getStop()+windowSize;
// query the dnSNP iterator
try {
dbSNPs = reader.query(context.getContig(),
windowStart,
windowStop);
} catch (IOException e) {
throw new StingException("Unable to query dbSNP track due to IO Exception",e);
}
// count the number of dbSNPs we've seen
int counter = 0;
for (DbSNPFeature feature: dbSNPs)
counter++;
out.println(context.getContig() + ":" + windowStart + "-" + context.getContig() + ":" + windowStop + "=" +
counter + " (dnSNP records)");
return 1;
}
public Long reduceInit() { return 0l; }
public Long reduce(Integer value, Long sum) {
return value + sum;
}
}

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
import org.broad.tribble.vcf.VCFCodec;
import org.broadinstitute.sting.BaseTest;
import org.junit.Assert;

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="77" status="integration" publication="20100504124200" />
</ivy-module>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="79" status="integration" publication="20100507124200" />
</ivy-module>