gatk-3.8/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DbSNPWindowCounter.java

86 lines
3.4 KiB
Java

package org.broadinstitute.sting.oneoffprojects.walkers;
import net.sf.samtools.util.CloseableIterator;
import org.broad.tribble.dbsnp.DbSNPCodec;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
/**
* DbSNPWindowCounter
*
* Count the number of upstream and downstream dbSNP entries from the current position using the specified window size.
* (really the window size upstream and downstream, so windowSize * 2)
*
* @Author Aaron
* @Date May 7th, 2010
*/
@By(DataSource.REFERENCE)
@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES})
public class DbSNPWindowCounter extends LocusWalker<Integer, Long> {
// what we read in new tracks with
private RMDTrack track;
@Output
private PrintStream out;
@Argument(fullName = "dbSNPFile", shortName = "db", doc="The dbsnp file to search upstream and downstream for nearby snps", required = true)
private File myDbSNPFile;
@Argument(fullName = "dbSNPWindowSize", shortName = "dbw", doc="The distance to look both upstream and downstream for SNPs", required = true)
private int windowSize;
public void initialize() {
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
getToolkit().getArguments().unsafe);
track = builder.createInstanceOfTrack(DbSNPCodec.class,myDbSNPFile);
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
CloseableIterator<GATKFeature> dbSNPs;
// our upstream and downstream window locations
int windowStart = (int)Math.max(context.getLocation().getStart()-windowSize,0);
int windowStop = (int)context.getLocation().getStop()+windowSize;
// query the dnSNP iterator
try {
dbSNPs = track.query(getToolkit().getGenomeLocParser().createGenomeLoc(context.getContig(),windowStart,windowStop));
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(myDbSNPFile, e);
}
// count the number of dbSNPs we've seen
int counter = 0;
while(dbSNPs.hasNext())
counter++;
out.println(context.getContig() + ":" + windowStart + "-" + context.getContig() + ":" + windowStop + "=" +
counter + " (dbSNP records)");
return 1;
}
public Long reduceInit() { return 0l; }
public Long reduce(Integer value, Long sum) {
return value + sum;
}
}