ReplaceReadGroups. Fixes BAM files without read group info. MissingReadGroup points people to this tool now. Please point users on the forum to this tool now. Will migrate to Picard.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5284 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-02-21 14:02:41 +00:00
parent aa4a4e515d
commit 1a5d296737
7 changed files with 105 additions and 10 deletions

View File

@ -367,6 +367,15 @@
</manifest>
</jar>
<jar jarfile="${dist.dir}/ModifyReadGroup.jar" whenmanifestonly="skip">
<fileset dir="${java.classes}">
<include name="**/tools/**/*.class" />
</fileset>
<manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.playground.tools.ModifyReadGroup" />
</manifest>
</jar>
<jar jarfile="${dist.dir}/CompareBAMAlignments.jar" whenmanifestonly="skip">
<fileset dir="${java.classes}">
<include name="**/tools/**/*.class" />
@ -450,6 +459,12 @@
</manifest>
</jar>
<jar jarfile="${dist.dir}/ModifyReadGroup.jar" update="true" whenmanifestonly="skip">
<manifest>
<attribute name="Class-Path" value="${jar.classpath}" />
</manifest>
</jar>
</target>
<target name="queue.manifests" depends="queue.jar, init.manifests" if="scala.include">

View File

@ -345,10 +345,10 @@ public class GATKRunReport {
// Create an S3Object based on a file, with Content-Length set automatically and
// Content-Type set based on the file's extension (using the Mimetypes utility class)
S3Object fileObject = new S3Object(localFile);
logger.info("Created S3Object" + fileObject);
logger.info("Uploading " + localFile + " to AWS bucket");
//logger.info("Created S3Object" + fileObject);
//logger.info("Uploading " + localFile + " to AWS bucket");
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
logger.info("Uploaded: " + s3Object);
logger.info("Uploaded to AWS: " + s3Object);
} catch ( S3ServiceException e ) {
exceptDuringRunReport("S3 exception occurred", e);
} catch ( NoSuchAlgorithmException e ) {

View File

@ -61,10 +61,12 @@ public class CompareBAMAlignments extends CommandLineProgram {
if ( ! read1.getReadName().equals(read.getReadName()) )
bad(read1, read, "Names not equal");
if ( read1.getAlignmentStart() != read.getAlignmentStart() )
bad(read1, read, "Alignment starts not equal");
if ( ! read1.getCigarString().equals(read.getCigarString()) )
bad(read1, read, "Unequal CIGAR strings");
else {
if ( read1.getAlignmentStart() != read.getAlignmentStart() )
bad(read1, read, "Alignment starts not equal");
if ( ! read1.getCigarString().equals(read.getCigarString()) )
bad(read1, read, "Unequal CIGAR strings");
}
}
}
counter++;
@ -79,8 +81,8 @@ public class CompareBAMAlignments extends CommandLineProgram {
private void bad(SAMRecord read1, SAMRecord read2, String msg) {
System.out.printf("%nBAD: %s%n", msg);
System.out.printf(" read1: %s %s %s%n", read1.getReadName(), read1.getAlignmentStart(), read1.getCigarString());
System.out.printf(" read2: %s %s %s%n", read2.getReadName(), read2.getAlignmentStart(), read2.getCigarString());
System.out.printf(" read1: %s %s %s %s%n", read1.getReadName(), read1.getAlignmentStart(), read1.getCigarString(), read1.getInferredInsertSize());
System.out.printf(" read2: %s %s %s %s%n", read2.getReadName(), read2.getAlignmentStart(), read2.getCigarString(), read2.getInferredInsertSize());
// System.exit(1);
}

View File

@ -0,0 +1,78 @@
package org.broadinstitute.sting.playground.tools;
import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.Usage;
import net.sf.samtools.*;
import java.io.File;
import java.util.Arrays;
/**
* User: mdepristo
*
* Replaces read groups in a BAM file
*/
public class ReplaceReadGroups extends CommandLineProgram {
@Usage(programVersion="1.0") public String USAGE = "Creates a new read group, and assigns all reads from the I BAM file to this read group in the O BAM";
@Option(shortName="I", doc="Input file (bam or sam).", optional=false)
public File IN = null;
@Option(shortName="O",doc="Output file (bam or sam).", optional=false)
public File OUT = null;
@Option(shortName="ID",doc="Read Group ID", optional=false)
public String RGID = null;
@Option(shortName="LB",doc="Read Group Library", optional=false)
public String RGLB = null;
@Option(shortName="PL",doc="Read Group platform", optional=false)
public String RGPL = null;
@Option(shortName="SM",doc="Read Group sample", optional=false)
public String RGSM = null;
private static final String RGFIELD = "RG"; // todo -- use binary tag that's private in picard
// todo -- is it worth supporting these fields?
// CN Name of sequencing center producing the read.
// DS Description.
// DT Date the run was produced (ISO8601 date or date/time).
// PU Platform unit (e.g. flowcell-barcode.lane for Illumina or slide for SOLiD). Unique identi er.
/** Required main method implementation. */
public static void main(final String[] argv) {
System.exit(new ReplaceReadGroups().instanceMain(argv));
}
protected int doWork() {
SAMFileReader inReader = new SAMFileReader(IN);
// create the read group we'll be using
SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
rg.setLibrary(RGLB);
rg.setPlatform(RGPL);
rg.setSample(RGSM);
System.out.printf("Created read group ID=%s PL=%s LB=%s SM=%s%n", rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample());
// create the new header and output file
SAMFileHeader outHeader = inReader.getFileHeader().clone();
outHeader.setReadGroups(Arrays.asList(rg));
SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, true, OUT) ;
//
// write the reads in contig order
//
for ( SAMRecord read : inReader ) {
read.setAttribute(RGFIELD, rg.getId());
outWriter.addAlignment(read);
}
// cleanup
inReader.close();
outWriter.close();
return 0;
}
}

View File

@ -124,7 +124,7 @@ public class UserException extends ReviewedStingException {
public static class ReadMissingReadGroup extends MalformedBam {
public ReadMissingReadGroup(SAMRecord read) {
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK", read.getReadName()));
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
}
}

BIN
testdata/exampleNORG.bam vendored 100644

Binary file not shown.

BIN
testdata/exampleNORG.bam.bai vendored 100644

Binary file not shown.