Computes a vector of numbers of reads falling into successive intervals of specified length (e.g. numbers of reads per every 1Mbase)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1115 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3bacb3db03
commit
ceeeec13b8
|
|
@ -0,0 +1,81 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
|
public class CoarseCoverageWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
|
@Argument(fullName="granularity",shortName="G",doc="Will print numbers of reads per every <granularity> bases "+
|
||||||
|
"on the reference, or on the subset of the reference specified by Intervals (if given). Moving to the next "+
|
||||||
|
"contig on the reference will always restart the count anew, even if the count of bases in the last chunk on"+
|
||||||
|
" the previous contig did not reach specified <granularity>.",required=true)
|
||||||
|
public Integer N;
|
||||||
|
|
||||||
|
|
||||||
|
private int chunkStart = 1; // start of the current chunk we are counting reads for
|
||||||
|
private int contig = 0; // current contig we are on
|
||||||
|
private int count = 0; // number of reads overlapping with the current chunk
|
||||||
|
private static String zeroString = "0";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
chunkStart = 1;
|
||||||
|
contig = 0;
|
||||||
|
count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer map(char[] ref, SAMRecord read) {
|
||||||
|
|
||||||
|
if ( read.getReadUnmappedFlag() ||
|
||||||
|
read.getDuplicateReadFlag() ||
|
||||||
|
read.getNotPrimaryAlignmentFlag() ||
|
||||||
|
read.getMappingQuality() == 0 ) return 0;
|
||||||
|
|
||||||
|
if ( read.getReferenceIndex() != contig ) {
|
||||||
|
// we jumped onto another contig
|
||||||
|
out.printf("%d%n", count); // print old count
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
// if we skipped one or more contigs completely, make sure we print 0 counts over all of them:
|
||||||
|
for ( contig++ ; contig < read.getReferenceIndex() ; contig++) {
|
||||||
|
int contigSize = read.getHeader().getSequence(contig).getSequenceLength();
|
||||||
|
for ( int k = 1 ; k < contigSize ; k+=N ) out.println(zeroString);
|
||||||
|
}
|
||||||
|
// by now we scrolled to the right contig
|
||||||
|
|
||||||
|
chunkStart = 1; // reset chunk start
|
||||||
|
}
|
||||||
|
|
||||||
|
// if our read is past the boundary of the current chunk, print old count(s)
|
||||||
|
// (for the current chunk and all chunks we may have skipped altogether) and reinitialize:
|
||||||
|
while ( chunkStart+N < read.getAlignmentStart() ) {
|
||||||
|
out.printf("%d%n", count); // print old count
|
||||||
|
count = 0;
|
||||||
|
chunkStart += N;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return value+sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
out.printf("%d%n", count); // print count from the last chunk
|
||||||
|
super.onTraversalDone(result);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue