package org.broadinstitute.sting.alignment; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWTFiles; import net.sf.samtools.*; import net.sf.picard.reference.ReferenceSequenceFileFactory; /** * Align reads to the reference specified by BWTPrefix. * * @author mhanna * @version 0.1 */ @WalkerName("Align") public class AlignmentWalker extends ReadWalker { @Argument(fullName="BWTPrefix",shortName="BWT",doc="Index files generated by bwa index -d bwtsw",required=false) String prefix = "/Users/mhanna/reference/Ecoli/Escherichia_coli_K12_MG1655.fasta"; @Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false) String outputBamFile = null; @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false) public Integer bamCompression = 5; /** * The actual aligner. */ private BWACAligner aligner = null; /** * Target for reads to output. */ private SAMFileWriter outputBam = null; /** * New header to use, if desired. */ private SAMFileHeader header = null; /** Must return true for reads that need to be processed. Reads, for which this method return false will * be skipped by the engine and never passed to the walker. */ @Override public boolean filter(char[] ref, SAMRecord read) { return true; } /** * Create an aligner object. The aligner object will load and hold the BWT until close() is called. */ @Override public void initialize() { BWTFiles bwtFiles = new BWTFiles(prefix); BWAConfiguration configuration = new BWAConfiguration(); aligner = new BWACAligner(bwtFiles,configuration); // HACK: If the sequence dictionary in the existing header is null, stuff the contents of the current reference // into it, so that the sequence has something to which to back-align. SAMFileHeader originalHeader = getToolkit().getSAMFileHeader(); if(originalHeader.getSequenceDictionary().isEmpty()) { header = originalHeader.clone(); SAMSequenceDictionary referenceDictionary = ReferenceSequenceFileFactory.getReferenceSequenceFile(getToolkit().getArguments().referenceFile).getSequenceDictionary(); header.setSequenceDictionary(referenceDictionary); } else header = originalHeader; if ( outputBamFile != null ) { // Stuff the header from the fasta into that of the sequence dictionary. outputBam = Utils.createSAMFileWriterWithCompression(header, false, outputBamFile, bamCompression); } } /** * Aligns a read to the given reference. * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. * @param read Read to align. * @return Number of alignments found for this read. */ @Override public Integer map(char[] ref, SAMRecord read) { SAMRecord alignedRead = aligner.align(read,header); if (outputBam != null) { outputBam.addAlignment(alignedRead); } else { out.println(alignedRead.format()); } return 1; } /** * Initial value for reduce. In this case, alignments will be counted. * @return 0, indicating no alignments yet found. */ @Override public Integer reduceInit() { return 0; } /** * Calculates the number of alignments found. * @param value Number of alignments found by this map. * @param sum Number of alignments found before this map. * @return Number of alignments found up to and including this map. */ @Override public Integer reduce(Integer value, Integer sum) { return value + sum; } /** * Cleanup. * @param result Number of reads processed. */ @Override public void onTraversalDone(Integer result) { aligner.close(); super.onTraversalDone(result); } }