From ee2abd30c477e662e33860d0fdcef1effdec3398 Mon Sep 17 00:00:00 2001 From: hanna Date: Mon, 23 Nov 2009 16:37:59 +0000 Subject: [PATCH] Count the best alignments and emit them to a file. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2118 348d0f76-0448-11de-a6fe-93d51630548a --- .../alignment/CountBestAlignmentsWalker.java | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java diff --git a/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java b/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java new file mode 100644 index 000000000..3fdd46952 --- /dev/null +++ b/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java @@ -0,0 +1,90 @@ +package org.broadinstitute.sting.alignment; + +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.alignment.bwa.BWTFiles; +import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; +import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import net.sf.samtools.SAMRecord; + +import java.util.*; + +/** + * Counts the number of best alignments + * + * @author mhanna + * @version 0.1 + */ +public class CountBestAlignmentsWalker extends ReadWalker { + /** + * The supporting BWT index generated using BWT. + */ + @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) + String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta"; + + /** + * The actual aligner. + */ + private Aligner aligner = null; + + private SortedMap alignmentFrequencies = new TreeMap(); + + /** + * Create an aligner object. The aligner object will load and hold the BWT until close() is called. + */ + @Override + public void initialize() { + BWTFiles bwtFiles = new BWTFiles(prefix); + BWAConfiguration configuration = new BWAConfiguration(); + aligner = new BWACAligner(bwtFiles,configuration); + } + + /** + * Aligns a read to the given reference. + * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. + * @param read Read to align. + * @return Number of alignments found for this read. + */ + @Override + public Integer map(char[] ref, SAMRecord read) { + Iterator alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator(); + if(alignmentIterator.hasNext()) { + int numAlignments = alignmentIterator.next().length; + if(alignmentFrequencies.containsKey(numAlignments)) + alignmentFrequencies.put(numAlignments,alignmentFrequencies.get(numAlignments)+1); + else + alignmentFrequencies.put(numAlignments,1); + } + return 1; + } + + /** + * Initial value for reduce. In this case, validated reads will be counted. + * @return 0, indicating no reads yet validated. + */ + @Override + public Integer reduceInit() { return 0; } + + /** + * Calculates the number of reads processed. + * @param value Number of reads processed by this map. + * @param sum Number of reads processed before this map. + * @return Number of reads processed up to and including this map. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + /** + * Cleanup. + * @param result Number of reads processed. + */ + @Override + public void onTraversalDone(Integer result) { + aligner.close(); + for(Map.Entry alignmentFrequency: alignmentFrequencies.entrySet()) + out.printf("%d\t%d%n", alignmentFrequency.getKey(), alignmentFrequency.getValue()); + super.onTraversalDone(result); + } +}