From 13fd182b7c46bc6cdb5ecb60cfb0ea1c66b57435 Mon Sep 17 00:00:00 2001 From: kiran Date: Mon, 10 May 2010 15:17:05 +0000 Subject: [PATCH] For dealing with slightly malformatted BAMs - mark every alignment as primary, or in the case of some BAM files from UWash, supply the sample information for each read group. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3340 348d0f76-0448-11de-a6fe-93d51630548a --- .../tools/MarkAllAlignmentsAsPrimary.java | 39 ++++++++++++++++ .../tools/RepairSeattleBAM.java | 44 +++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/tools/MarkAllAlignmentsAsPrimary.java create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/tools/RepairSeattleBAM.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/tools/MarkAllAlignmentsAsPrimary.java b/java/src/org/broadinstitute/sting/oneoffprojects/tools/MarkAllAlignmentsAsPrimary.java new file mode 100755 index 000000000..6e7c37174 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/tools/MarkAllAlignmentsAsPrimary.java @@ -0,0 +1,39 @@ +package org.broadinstitute.sting.oneoffprojects.tools; + +import net.sf.picard.cmdline.CommandLineProgram; +import net.sf.picard.cmdline.Usage; +import net.sf.picard.cmdline.Option; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMFileWriterFactory; + +import java.io.*; + +public class MarkAllAlignmentsAsPrimary extends CommandLineProgram { + @Usage(programVersion="1.0") public String USAGE = "Mark all alignments as primary."; + @Option(shortName="I", doc="Input file (bam or sam) to extract reads from. If not specified, reads from stdin.", + optional=true) public File IN = null; + @Option(shortName="O",doc="Output file (bam or sam).", + optional=true) public File OUT = null; + + public static void main(final String[] argv) { + System.exit(new MarkAllAlignmentsAsPrimary().instanceMain(argv)); + } + + protected int doWork() { + SAMFileReader inReader = new SAMFileReader(IN); + SAMFileWriter outWriter = (new SAMFileWriterFactory()).makeSAMOrBAMWriter(inReader.getFileHeader(), true, OUT); + + for (SAMRecord read : inReader ) { + read.setNotPrimaryAlignmentFlag(false); + + outWriter.addAlignment(read); + } + + inReader.close(); + outWriter.close(); + + return 0; + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/tools/RepairSeattleBAM.java b/java/src/org/broadinstitute/sting/oneoffprojects/tools/RepairSeattleBAM.java new file mode 100755 index 000000000..f0ee5b534 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/tools/RepairSeattleBAM.java @@ -0,0 +1,44 @@ +package org.broadinstitute.sting.oneoffprojects.tools; + +import net.sf.picard.cmdline.CommandLineProgram; +import net.sf.picard.cmdline.Usage; +import net.sf.picard.cmdline.Option; +import net.sf.samtools.*; + +import java.io.*; + +public class RepairSeattleBAM extends CommandLineProgram { + @Usage(programVersion="1.0") public String USAGE = "Fix read group info"; + @Option(shortName="I", doc="Input file (bam or sam) to extract reads from. If not specified, reads from stdin.", + optional=true) public File IN = null; + @Option(shortName="O",doc="Output file (bam or sam).", + optional=true) public File OUT = null; + @Option(shortName="S",doc="Sample.", + optional=true) public String SAMPLE = null; + + public static void main(final String[] argv) { + System.exit(new RepairSeattleBAM().instanceMain(argv)); + } + + protected int doWork() { + SAMFileReader inReader = new SAMFileReader(IN); + + for (SAMReadGroupRecord rg : inReader.getFileHeader().getReadGroups()) { + rg.setSample(SAMPLE); + } + + SAMFileWriter outWriter = (new SAMFileWriterFactory()).makeSAMOrBAMWriter(inReader.getFileHeader(), true, OUT); + + for (SAMRecord read : inReader ) { + //read.getReadGroup().setSample(SAMPLE); + read.setAttribute("SM", SAMPLE); + + outWriter.addAlignment(read); + } + + inReader.close(); + outWriter.close(); + + return 0; + } +} \ No newline at end of file