2009-03-16 06:42:24 +08:00
|
|
|
package org.broadinstitute.sting.gatk.walkers;
|
|
|
|
|
|
|
|
|
|
import net.sf.samtools.SAMRecord;
|
2009-06-16 03:19:50 +08:00
|
|
|
import net.sf.samtools.SAMReadGroupRecord;
|
2009-05-22 02:30:45 +08:00
|
|
|
import net.sf.samtools.SAMFileWriter;
|
|
|
|
|
import net.sf.samtools.SAMFileHeader;
|
|
|
|
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
2009-06-05 16:48:34 +08:00
|
|
|
import org.broadinstitute.sting.utils.Utils;
|
2009-03-16 06:42:24 +08:00
|
|
|
|
2009-05-22 02:30:45 +08:00
|
|
|
import java.io.PrintStream;
|
|
|
|
|
import java.io.FileNotFoundException;
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.util.Random;
|
|
|
|
|
|
2009-06-09 22:28:32 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2009 The Broad Institute
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* This walker prints out the reads from the BAM files provided to the traversal engines.
|
|
|
|
|
* It also supports the command line option '-outputBamFile filname', which outputs all the
|
|
|
|
|
* reads to a specified BAM file
|
2009-06-16 00:38:08 +08:00
|
|
|
* The walker now also optionally filters reads based on command line options.
|
2009-06-09 22:28:32 +08:00
|
|
|
*/
|
2009-06-19 05:03:57 +08:00
|
|
|
@Requires({DataSource.READS, DataSource.REFERENCE})
|
2009-05-22 02:30:45 +08:00
|
|
|
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|
|
|
|
|
2009-06-09 22:28:32 +08:00
|
|
|
/** an optional argument to dump the reads out to a BAM file */
|
|
|
|
|
@Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
|
2009-07-30 00:11:45 +08:00
|
|
|
SAMFileWriter outputBamFile = null;
|
2009-06-16 00:38:08 +08:00
|
|
|
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false)
|
|
|
|
|
Integer maxLength = null;
|
2009-06-16 03:19:50 +08:00
|
|
|
@Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false)
|
|
|
|
|
String platform = null;
|
|
|
|
|
// E.g. ILLUMINA, 454
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The initialize function.
|
|
|
|
|
*/
|
|
|
|
|
public void initialize() {
|
|
|
|
|
if ( platform != null )
|
|
|
|
|
platform = platform.toUpperCase();
|
|
|
|
|
}
|
2009-06-16 00:38:08 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The reads filter function.
|
|
|
|
|
* @param ref the reference bases that correspond to our read, if a reference was provided
|
|
|
|
|
* @param read the read itself, as a SAMRecord
|
|
|
|
|
* @return true if the read passes the filter, false if it doesn't
|
|
|
|
|
*/
|
|
|
|
|
public boolean filter(char[] ref, SAMRecord read) {
|
2009-06-16 03:19:50 +08:00
|
|
|
if ( maxLength != null && read.getReadLength() > maxLength )
|
|
|
|
|
return false;
|
|
|
|
|
if ( platform != null ) {
|
|
|
|
|
Object readGroupAttr = read.getAttribute("RG");
|
|
|
|
|
if ( readGroupAttr != null ) {
|
2009-07-30 07:00:15 +08:00
|
|
|
SAMReadGroupRecord readGroup = getToolkit().getSAMFileHeader().getReadGroup(readGroupAttr.toString());
|
2009-06-16 03:19:50 +08:00
|
|
|
if ( readGroup != null ) {
|
|
|
|
|
Object readPlatformAttr = readGroup.getAttribute("PL");
|
|
|
|
|
if ( readPlatformAttr != null )
|
|
|
|
|
return readPlatformAttr.toString().toUpperCase().contains(platform);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
2009-06-16 00:38:08 +08:00
|
|
|
}
|
2009-05-22 02:30:45 +08:00
|
|
|
|
2009-06-09 22:28:32 +08:00
|
|
|
/**
|
|
|
|
|
* The reads map function.
|
|
|
|
|
* @param ref the reference bases that correspond to our read, if a reference was provided
|
|
|
|
|
* @param read the read itself, as a SAMRecord
|
|
|
|
|
* @return the read itself
|
|
|
|
|
*/
|
|
|
|
|
public SAMRecord map( char[] ref, SAMRecord read ) {
|
2009-05-22 02:30:45 +08:00
|
|
|
return read;
|
2009-03-16 06:42:24 +08:00
|
|
|
}
|
|
|
|
|
|
2009-06-09 22:28:32 +08:00
|
|
|
/**
|
|
|
|
|
* reduceInit is called once before any calls to the map function. We use it here to setup the output
|
|
|
|
|
* bam file, if it was specified on the command line
|
|
|
|
|
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
|
|
|
|
|
*/
|
2009-05-22 02:30:45 +08:00
|
|
|
public SAMFileWriter reduceInit() {
|
2009-07-30 00:11:45 +08:00
|
|
|
return outputBamFile;
|
2009-05-22 02:30:45 +08:00
|
|
|
}
|
|
|
|
|
|
2009-06-09 22:28:32 +08:00
|
|
|
/**
|
|
|
|
|
* given a read and a output location, reduce by emitting the read
|
|
|
|
|
* @param read the read itself
|
|
|
|
|
* @param output the output source
|
|
|
|
|
* @return the SAMFileWriter, so that the next reduce can emit to the same source
|
|
|
|
|
*/
|
|
|
|
|
public SAMFileWriter reduce( SAMRecord read, SAMFileWriter output ) {
|
|
|
|
|
if (output != null) {
|
2009-05-22 02:30:45 +08:00
|
|
|
output.addAlignment(read);
|
|
|
|
|
} else {
|
|
|
|
|
out.println(read.format());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return output;
|
|
|
|
|
}
|
2009-03-16 06:42:24 +08:00
|
|
|
|
2009-06-09 22:28:32 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* when we're done traversing, close the reads file
|
|
|
|
|
* @param output the SAMFileWriter we've used in the reduce phase
|
|
|
|
|
*/
|
|
|
|
|
public void onTraversalDone( SAMFileWriter output ) {
|
|
|
|
|
if (output != null) {
|
2009-05-22 02:30:45 +08:00
|
|
|
output.close();
|
|
|
|
|
}
|
2009-03-16 06:42:24 +08:00
|
|
|
}
|
|
|
|
|
}
|