From ebec0ec1719fbf82f1b9c94fd857ed76b778ea1f Mon Sep 17 00:00:00 2001 From: asivache Date: Wed, 12 Aug 2009 20:24:37 +0000 Subject: [PATCH] A standalone companion to BamToFastqWalker: does the same thing but without calling in gatk's heavy artillery (does not "require" a reference either). Extracts seqs and quals and places them into fastq; along the way it also reverse complements reads that align to the negative strand (so that fastq contains reads as they come from the machine). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1419 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/playground/tools/BamToFastq.java | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 java/src/org/broadinstitute/sting/playground/tools/BamToFastq.java diff --git a/java/src/org/broadinstitute/sting/playground/tools/BamToFastq.java b/java/src/org/broadinstitute/sting/playground/tools/BamToFastq.java new file mode 100644 index 000000000..f1080c065 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/tools/BamToFastq.java @@ -0,0 +1,81 @@ +package org.broadinstitute.sting.playground.tools; + +import net.sf.picard.cmdline.CommandLineProgram; +import net.sf.picard.cmdline.Usage; +import net.sf.picard.cmdline.Option; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; + +import java.io.*; + +import org.broadinstitute.sting.utils.BaseUtils; + +/** + * Created by IntelliJ IDEA. + * User: asivache + * Date: Aug 12, 2009 + * Time: 3:24:46 PM + * To change this template use File | Settings | File Templates. + */ +public class BamToFastq extends CommandLineProgram { + @Usage(programVersion="1.0") public String USAGE = "Extracts read sequences and qualities from the input sam/bam file and wirtes them into "+ + "the output file in fastq format. In the RC mode (default is True), if the read is aligned and the alignment is to the reverse strand on the genome, "+ + "the read's sequence from input sam file will be reverse-complemented prior to writing it to fastq in order restore correctly "+ + "the original read sequence as it was generated by the sequencer."; + @Option(shortName="I", doc="Input file (bam or sam) to extract reads from. If not specified, reads from stdin.", + optional=true) public File IN = null; + @Option(shortName="O",doc="Output file (fastq). If not specified, output is printed to stdout.", + optional=true) public File OUT = null; + @Option(shortName="RC", doc="re-reverse bases and quals of reads aligned to the negative strand before writing them to fastq", optional=true) + public Boolean RE_REVERSE = true; + + public static void main(final String[] argv) { + System.exit(new BamToFastq().instanceMain(argv)); + } + + protected int doWork() { + + + InputStream ins = null; + if ( IN == null ) ins = System.in; + else { + try { + ins = new FileInputStream(IN); + } catch ( FileNotFoundException ie ) { + System.out.println("Failed to open input file "+IN+": "+ie.getCause()); + return 1; + } + } + + SAMFileReader inReader = new SAMFileReader(ins); + PrintStream out = null; + if ( OUT == null ) out = System.out; + else { + try { + out = new PrintStream(OUT); + } catch ( FileNotFoundException ie ) { + System.out.println("Failed to open output file "+OUT+": "+ie.getCause()); + return 1; + } + } + + for (SAMRecord read : inReader ) { + out.println("@" + read.getReadName()); + if ( read.getReadUnmappedFlag() || !RE_REVERSE || !read.getReadNegativeStrandFlag() ) { + out.println(read.getReadString()); + out.println('+'); + out.println(read.getBaseQualityString()); + } else { + out.println(BaseUtils.simpleReverseComplement(read.getReadString())); + out.println('+'); + out.println(BaseUtils.reverse(read.getBaseQualityString())); + } + } + inReader.close(); + out.close(); + + return 0; + } + + +}