diff --git a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 4e81b8294..890b0830f 100755 --- a/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -118,6 +118,10 @@ public class GATKArgumentCollection { @Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false) public String DBSNPFile = null; + @Element(required = false) + @Argument(fullName = "simplifyBAM", shortName = "simplifyBAM", doc = "If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier", required = false) + public boolean simplifyBAM = false; + /** * The override mechanism in the GATK, by default, populates the command-line arguments, then * the defaults from the walker annotations. Unfortunately, walker annotations should be trumped @@ -435,6 +439,9 @@ public class GATKArgumentCollection { if (enableLowMemorySharding != other.enableLowMemorySharding) return false; + if ( simplifyBAM != other.simplifyBAM ) + return false; + return true; } diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java index 03a95c30b..a3da3156c 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/SAMFileWriterStorage.java @@ -36,6 +36,7 @@ import net.sf.samtools.util.RuntimeIOException; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.SimplifyingSAMFileWriter; /** * Provides temporary storage for SAMFileWriters. @@ -81,6 +82,11 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage, StingSAMFileWrite return samFile; } + public boolean simplifyBAM() { + return engine.getArguments().simplifyBAM; + } + public OutputStream getSAMOutputStream() { return samOutputStream; } diff --git a/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java b/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java new file mode 100644 index 000000000..df2010e8b --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/sam/SimplifyingSAMFileWriter.java @@ -0,0 +1,62 @@ +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +/** + * XXX + */ +public class SimplifyingSAMFileWriter implements SAMFileWriter { + final SAMFileWriter dest; + + public SimplifyingSAMFileWriter(final SAMFileWriter finalDestination) { + this.dest = finalDestination; + } + + public void addAlignment( SAMRecord read ) { + if ( keepRead(read) ) { + dest.addAlignment(simplifyRead(read)); + + } + } + + /** + * Retrieves the header to use when creating the new SAM file. + * @return header to use when creating the new SAM file. + */ + public SAMFileHeader getFileHeader() { + return dest.getFileHeader(); + } + + /** + * @{inheritDoc} + */ + public void close() { + dest.close(); + } + + + public static final boolean keepRead(SAMRecord read) { + return ! excludeRead(read); + } + + public static final boolean excludeRead(SAMRecord read) { + return read.getReadUnmappedFlag() || read.getReadFailsVendorQualityCheckFlag() || read.getDuplicateReadFlag() || read.getNotPrimaryAlignmentFlag(); + } + + public static final SAMRecord simplifyRead(SAMRecord read) { + // the only attribute we keep is the RG + Object rg = read.getAttribute("RG"); + read.clearAttributes(); + read.setAttribute("RG", rg); + return read; + } +}