From 1a5d296737db518fb63f3257810a84eabd4722b2 Mon Sep 17 00:00:00 2001 From: depristo Date: Mon, 21 Feb 2011 14:02:41 +0000 Subject: [PATCH] ReplaceReadGroups. Fixes BAM files without read group info. MissingReadGroup points people to this tool now. Please point users on the forum to this tool now. Will migrate to Picard. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5284 348d0f76-0448-11de-a6fe-93d51630548a --- build.xml | 15 ++++ .../sting/gatk/phonehome/GATKRunReport.java | 6 +- .../tools/CompareBAMAlignments.java | 14 ++-- .../playground/tools/ReplaceReadGroups.java | 78 ++++++++++++++++++ .../sting/utils/exceptions/UserException.java | 2 +- testdata/exampleNORG.bam | Bin 0 -> 3586 bytes testdata/exampleNORG.bam.bai | Bin 0 -> 232 bytes 7 files changed, 105 insertions(+), 10 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/playground/tools/ReplaceReadGroups.java create mode 100644 testdata/exampleNORG.bam create mode 100644 testdata/exampleNORG.bam.bai diff --git a/build.xml b/build.xml index a85e16e4a..3f7297728 100644 --- a/build.xml +++ b/build.xml @@ -367,6 +367,15 @@ + + + + + + + + + @@ -450,6 +459,12 @@ + + + + + + diff --git a/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index 3a8c139fc..c8e25b01a 100644 --- a/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -345,10 +345,10 @@ public class GATKRunReport { // Create an S3Object based on a file, with Content-Length set automatically and // Content-Type set based on the file's extension (using the Mimetypes utility class) S3Object fileObject = new S3Object(localFile); - logger.info("Created S3Object" + fileObject); - logger.info("Uploading " + localFile + " to AWS bucket"); + //logger.info("Created S3Object" + fileObject); + //logger.info("Uploading " + localFile + " to AWS bucket"); S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject); - logger.info("Uploaded: " + s3Object); + logger.info("Uploaded to AWS: " + s3Object); } catch ( S3ServiceException e ) { exceptDuringRunReport("S3 exception occurred", e); } catch ( NoSuchAlgorithmException e ) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/tools/CompareBAMAlignments.java b/java/src/org/broadinstitute/sting/oneoffprojects/tools/CompareBAMAlignments.java index bd1b88dff..464967312 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/tools/CompareBAMAlignments.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/tools/CompareBAMAlignments.java @@ -61,10 +61,12 @@ public class CompareBAMAlignments extends CommandLineProgram { if ( ! read1.getReadName().equals(read.getReadName()) ) bad(read1, read, "Names not equal"); - if ( read1.getAlignmentStart() != read.getAlignmentStart() ) - bad(read1, read, "Alignment starts not equal"); - if ( ! read1.getCigarString().equals(read.getCigarString()) ) - bad(read1, read, "Unequal CIGAR strings"); + else { + if ( read1.getAlignmentStart() != read.getAlignmentStart() ) + bad(read1, read, "Alignment starts not equal"); + if ( ! read1.getCigarString().equals(read.getCigarString()) ) + bad(read1, read, "Unequal CIGAR strings"); + } } } counter++; @@ -79,8 +81,8 @@ public class CompareBAMAlignments extends CommandLineProgram { private void bad(SAMRecord read1, SAMRecord read2, String msg) { System.out.printf("%nBAD: %s%n", msg); - System.out.printf(" read1: %s %s %s%n", read1.getReadName(), read1.getAlignmentStart(), read1.getCigarString()); - System.out.printf(" read2: %s %s %s%n", read2.getReadName(), read2.getAlignmentStart(), read2.getCigarString()); + System.out.printf(" read1: %s %s %s %s%n", read1.getReadName(), read1.getAlignmentStart(), read1.getCigarString(), read1.getInferredInsertSize()); + System.out.printf(" read2: %s %s %s %s%n", read2.getReadName(), read2.getAlignmentStart(), read2.getCigarString(), read2.getInferredInsertSize()); // System.exit(1); } diff --git a/java/src/org/broadinstitute/sting/playground/tools/ReplaceReadGroups.java b/java/src/org/broadinstitute/sting/playground/tools/ReplaceReadGroups.java new file mode 100644 index 000000000..efbd0cdb9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/tools/ReplaceReadGroups.java @@ -0,0 +1,78 @@ +package org.broadinstitute.sting.playground.tools; + +import net.sf.picard.cmdline.CommandLineProgram; +import net.sf.picard.cmdline.Option; +import net.sf.picard.cmdline.Usage; +import net.sf.samtools.*; + +import java.io.File; +import java.util.Arrays; + +/** + * User: mdepristo + * + * Replaces read groups in a BAM file + */ +public class ReplaceReadGroups extends CommandLineProgram { + @Usage(programVersion="1.0") public String USAGE = "Creates a new read group, and assigns all reads from the I BAM file to this read group in the O BAM"; + @Option(shortName="I", doc="Input file (bam or sam).", optional=false) + public File IN = null; + @Option(shortName="O",doc="Output file (bam or sam).", optional=false) + public File OUT = null; + + @Option(shortName="ID",doc="Read Group ID", optional=false) + public String RGID = null; + + @Option(shortName="LB",doc="Read Group Library", optional=false) + public String RGLB = null; + + @Option(shortName="PL",doc="Read Group platform", optional=false) + public String RGPL = null; + + @Option(shortName="SM",doc="Read Group sample", optional=false) + public String RGSM = null; + + private static final String RGFIELD = "RG"; // todo -- use binary tag that's private in picard + + // todo -- is it worth supporting these fields? + // CN Name of sequencing center producing the read. + // DS Description. + // DT Date the run was produced (ISO8601 date or date/time). + // PU Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD). Unique identi er. + + /** Required main method implementation. */ + public static void main(final String[] argv) { + System.exit(new ReplaceReadGroups().instanceMain(argv)); + } + + protected int doWork() { + SAMFileReader inReader = new SAMFileReader(IN); + + // create the read group we'll be using + SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID); + rg.setLibrary(RGLB); + rg.setPlatform(RGPL); + rg.setSample(RGSM); + System.out.printf("Created read group ID=%s PL=%s LB=%s SM=%s%n", rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()); + + // create the new header and output file + SAMFileHeader outHeader = inReader.getFileHeader().clone(); + outHeader.setReadGroups(Arrays.asList(rg)); + SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, true, OUT) ; + + // + // write the reads in contig order + // + for ( SAMRecord read : inReader ) { + read.setAttribute(RGFIELD, rg.getId()); + outWriter.addAlignment(read); + } + + // cleanup + inReader.close(); + outWriter.close(); + return 0; + } +} + + diff --git a/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 86e3d479a..b8b64eb86 100755 --- a/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -124,7 +124,7 @@ public class UserException extends ReviewedStingException { public static class ReadMissingReadGroup extends MalformedBam { public ReadMissingReadGroup(SAMRecord read) { - super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK", read.getReadName())); + super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName())); } } diff --git a/testdata/exampleNORG.bam b/testdata/exampleNORG.bam new file mode 100644 index 0000000000000000000000000000000000000000..f59219fec6be8055292c7414758af8ff23fa9dc7 GIT binary patch literal 3586 zcmV+d4*l^TiwFb&00000{{{d;LjnNh4Xs#BZ{*l@Eg7ZyE|uQnBl-C8KBEOUlB)Er z3LFe+0>g2-$88G<93#O*-K|ar=x+5$JxPq%3CIu*;Q-kr%fJp`1BMqtU1 zrwd$@acxVw9N@Ob~>?!$wJ2i=ngQ*#qJze(?{ z2XA-ZNvC6jVZ3#?HQ7Awe(v5ZrpxmuFaP}X;%xiH`;Sg99&bPT@B%`7xxIVw_`MgW z7calM{p|Duv&(0vFXUz_H!tN_Zl21?{uq9r$sGVI>s!t5useS6;PY|(}px&EUqc*tAlAHLi=efoUM{Fk4? zlY98&^wW-T0yq8O)4u6Webb%a`LyqA9w$7?t*7UI(pzaX8mq9IQf>V6H&z-yY0S1C z!7dx`;FZRs#=ZI4|HA%PAAkJu)$PW|xPO6csVb6;`lmp}CYwD;3z7g-WoVq66CNAm#RZ0wYHe$>^xvW`ka~~`H9DlVIWO>Gy^F>G{Mnaq9NVf&O#sUZPFbMm;WC2Imm5hgW z$dDt#yEx&;uf#sXbIl1~e|{xjHb#j_7!CVK~l z4K6txCL+BoPM6AYEqB&T6Oa$Ph{7*rRqhYD4T@ z+eSU)JFWhxO|b=?kJsp^Pi)!gNWRwZwz}A49 zhihWJ?P}g50tlpU%2PXmcC_fZmIy5oj0Ep5skyZe7d3aCqtC+HH*T-F-Rb1;V7fOs zUT{JJWI&=aCQx3gfaXX_q9a1%7$>9zP_fdH5>Wm#wvvVj45LwK8h6di+m0iZ5ACHH zY|31~nXe=h9%ochhB;K93RI|C_CPGb^Vusr*BM%SeLQhVP^7Y?OiGM(NmHdOOh~Lr zMUt}ADAFa4Y%h>9bb4NgbC(kyl2D3}OPfl|Fa&Q4OTavmQgY=wK8Mx;d^Ts?G3XTW z;gkx3<)q!I3~B%Fjr<(I+TY%;6AYgH0~o7Ei-Id~UX=w&QHFDxBN(KRo3enUAYE1j zl}TPv1rNE-3dBC*HPI<)!2y0W3|a*#pnydgKqhWxE|7xrP`QF}CLJb}@3cK^F9CFL z1?W0Koi_mVn~@_FmxxfJa9$9|R7nZYohMxEShmZ%jM)yx)ZtFcvISub`z;Sh&in`p z!zq9d018v8?10xiuUJnSZ4~q|WOE6ihyS_&bUlT>3u~9RSKRS*@4$@JgT+`Sy3$2e z0jNBRimD)G1r-Plf~qoyf+H1%T`jg+no3C^>iXL}Yvu8eywrBaPmNudDaxB^sFlx0x`r9)9Jq}^iFVvZ0lm9F3cMgYqbB4ES{ z#F}uurVEM{=BtqFjM|U+0}bc$5Ve z6YI<}>SzgX|NJM0x9{D^(XU|bJGaxq@pNK}ZhVy?T4lM38Wc8F6;e@-6PiO?K_^8h zD`^oG*{pvQH}i7VIq)H5Qh2PEsg5Z<7KUQR4vYts37h(YO9wj379Jvj7xWS1%jVFp zuJBxE=;yDG=l_LKnaov5r3TOz9`LSf$UtaIQP1^ECxMp}$YV3Ode&Q@=t5{SF$#>g zsKM8LBsI{VzN?28c)tCMPv+-mu=Y2%lfvHgcxOC4I@(>}QBi`xEmSd58Lntm6b0}B zs8v`glcIpBTWVj-8rm2fZVvT6@A8|U6frbVU#Z0m#%w_wX)47s@9?uMr~sfj-~a)RRe+2!!oY(u3QrxX%*@fi^bt@&xDHqQ zb!s51N;)QS4Ap$MjI_V`Uxu{r-Izi@fVIEB70)r$+u`BPbh5t?K2?%~U&ykgz-0*& zsDi!7sVWLcQeIFEK1tKsrPgf&brnG1k`o5%h5QNec9v2>20_X=oc^(xQtb)dQZf0#vHI37R&c&MsmLs8Q|Bxv# zq#O)3NIR%I5Nognvuw$Fpc;1zq{oEM;R+u1T@GKvWp3z~SBS2A==*PiNE6TjB}opkS9En*QMmAbR&kj{XVOPHtyyLkz}-?_X&jkk%vvvZGm< zs5DA}A#f4Jx}b3hiP@-9$R84vL##d*12xw@<|{K&4X7M|K%`)gdVq1=tDOXev%wJX znwKj9$O_*UZPs3I-_=GxeI@!CqU#(@-vkj*Yo<4nD9y600xzUV0n-OaFUm-aQCU#1 zu+gA5XszPj0QWlGPQSTodA)vf=(ibZl2y+SMLlps>LXwH#v!e`+=FU!y^f%Sd7&ah z$LKt`ZV7Y$@Q+p+V)*V2ru!dQ`|I1Q4zv$2_hf%DSxZe44Bi_YT|`J#U>X-$oMmNZ z-UKO{#kfof$ZrNjAPBZO1s`s^F1Ik`L(rfZc${*MJ!d$e;CyJu3qkTwY7J?pV|QHC zu>;#4kyYEW?bfnMWMUU}hOUY3hp&(4m7r5lao``~EKw?j;iM}daKnH)g1}t6Nd

nt8-$+pZQl0QhU+1u?hZU3 z&13(^4^|qyaWg-^eX-J5yUXD1gTFgoyu>0Y44#xCg5pS}C{`pdQ9@BsmY4!rY$OEJ z+|U-Q4Sj@Ah-)$p%`pxK~o&3QKII`Mb*bEkpm=FuPbiXDyMclvi9Qj z1@*fi^Nzus;`2XUrsMwYFBjEzot@6tRvLeNJDj8G(eA-?Z*Ng;G4xD?u`vh=rDYBi z3EHM4G={Gcv{6;$RoM|HJoEaxeko<3gb=fkHNdb2qvcGsEtDOkX=dmYT;OqA%@FD( zcuefS**ym1YvQT0l7Jee83nst&;$Xwl@(1>nt}S!poMTz7Vf;TYN=VvX1Pe#Os&n% z))G`0>8fXcFw3etbE`tu-|oQkf8tcA^kigYU|?VaVm~1U1`yj@97uox6Ca53go;05f{6YEDFy;Bs5p#&mm4DP4Hbv+ uj|xD