package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Utils; import java.io.PrintStream; import java.io.FileNotFoundException; import java.io.File; import java.util.Random; /* * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /** * This walker prints out the reads from the BAM files provided to the traversal engines. * It also supports the command line option '-outputBamFile filname', which outputs all the * reads to a specified BAM file * The walker now also optionally filters reads based on command line options. */ @Requires({DataSource.READS, DataSource.REFERENCE}) public class PrintReadsWalker extends ReadWalker { /** an optional argument to dump the reads out to a BAM file */ @Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false) String outputBamFile = null; @Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false) Integer maxLength = null; @Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false) String platform = null; // E.g. ILLUMINA, 454 /** * The initialize function. */ public void initialize() { if ( platform != null ) platform = platform.toUpperCase(); } /** * The reads filter function. * @param ref the reference bases that correspond to our read, if a reference was provided * @param read the read itself, as a SAMRecord * @return true if the read passes the filter, false if it doesn't */ public boolean filter(char[] ref, SAMRecord read) { if ( maxLength != null && read.getReadLength() > maxLength ) return false; if ( platform != null ) { Object readGroupAttr = read.getAttribute("RG"); if ( readGroupAttr != null ) { SAMReadGroupRecord readGroup = getToolkit().getEngine().getSAMHeader().getReadGroup(readGroupAttr.toString()); if ( readGroup != null ) { Object readPlatformAttr = readGroup.getAttribute("PL"); if ( readPlatformAttr != null ) return readPlatformAttr.toString().toUpperCase().contains(platform); } } } return true; } /** * The reads map function. * @param ref the reference bases that correspond to our read, if a reference was provided * @param read the read itself, as a SAMRecord * @return the read itself */ public SAMRecord map( char[] ref, SAMRecord read ) { return read; } /** * reduceInit is called once before any calls to the map function. We use it here to setup the output * bam file, if it was specified on the command line * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise */ public SAMFileWriter reduceInit() { if (outputBamFile != null) { SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader(); return Utils.createSAMFileWriterWithCompression(header, true, outputBamFile, getToolkit().getBAMCompression()); } else { return null; } } /** * given a read and a output location, reduce by emitting the read * @param read the read itself * @param output the output source * @return the SAMFileWriter, so that the next reduce can emit to the same source */ public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) { if (output != null) { output.addAlignment(read); } else { out.println(read.format()); } return output; } /** * when we're done traversing, close the reads file * @param output the SAMFileWriter we've used in the reduce phase */ public void onTraversalDone( SAMFileWriter output ) { if (output != null) { output.close(); } } }