diff --git a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java index b67b7738c..c87ae5828 100755 --- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java @@ -112,6 +112,10 @@ public class GATKArgumentCollection { @Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false) public Integer maximumReadSorts = null; + @Element(required=false) + @Argument(fullName = "bam_compression", shortName = "compress", doc = "Compression level to use for writing BAM files", required = false) + public Integer BAMcompression = null; + @Element(required=false) @Argument(fullName = "filterZeroMappingQualityReads", shortName = "fmq0", doc = "If true, mapping quality zero reads will be filtered at the lowest GATK level. Vastly improves performance at areas with abnormal depth due to mapping Q0 reads", required = false) public Boolean filterZeroMappingQualityReads = false; @@ -245,6 +249,10 @@ public class GATKArgumentCollection { (other.maximumReadSorts != null && !other.maximumReadSorts.equals(this.maximumReadSorts))) { return false; } + if ((other.BAMcompression == null && this.BAMcompression != null) || + (other.BAMcompression != null && !other.BAMcompression.equals(this.BAMcompression))) { + return false; + } if ((other.filterZeroMappingQualityReads == null && this.filterZeroMappingQualityReads != null) || (other.filterZeroMappingQualityReads != null && !other.filterZeroMappingQualityReads.equals(this.filterZeroMappingQualityReads))) { return false; diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 47e985ae2..6179d3297 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -280,6 +280,17 @@ public class GenomeAnalysisEngine { return strictness; } + /** + * Default to 5 (based on research by Alec Wysoker) + * + * @return the BAM compression + */ + public int getBAMCompression() { + return (argCollection.BAMcompression == null || + argCollection.BAMcompression < 1 || + argCollection.BAMcompression > 8) ? 5 : argCollection.BAMcompression; + } + /** * Convenience function that binds RODs using the old-style command line parser to the new style list for * a uniform processing. diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index dae6c404b..e569552da 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -2,9 +2,9 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMFileWriterFactory; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.Utils; import java.io.PrintStream; import java.io.FileNotFoundException; @@ -22,9 +22,8 @@ public class PrintReadsWalker extends ReadWalker { public SAMFileWriter reduceInit() { if ( outputBamFile != null ) { // ! outputBamFile.equals("") ) { - SAMFileWriterFactory fact = new SAMFileWriterFactory(); SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader(); - return fact.makeBAMWriter(header, true, new File(outputBamFile)); + return Utils.createSAMFileWriterWithCompression(header, true, outputBamFile, getToolkit().getBAMCompression()); } else { return null; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java index 15ba86991..0fcfa7980 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java @@ -4,9 +4,9 @@ import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.Utils; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMFileWriterFactory; import net.sf.samtools.SAMFileHeader; import net.sf.picard.reference.ReferenceSequence; @@ -48,11 +48,10 @@ public class IOCrusherWalker extends ReadWalker reduceInit() { - SAMFileWriterFactory fact = new SAMFileWriterFactory(); ArrayList outputs = new ArrayList(nWaysOut); for ( int i = 0; i < nWaysOut; i++ ) { - SAMFileHeader header = this.getToolkit().getSamReader().getFileHeader(); - outputs.add(fact.makeBAMWriter(header, true, new File(outputBase + "." + i + ".bam"))); + SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader(); + outputs.add(Utils.createSAMFileWriterWithCompression(header, true, outputBase + "." + i + ".bam", getToolkit().getBAMCompression())); } return outputs; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LogisticRecalibrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LogisticRecalibrationWalker.java index ac2d091da..2c5d2aae3 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LogisticRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LogisticRecalibrationWalker.java @@ -149,9 +149,8 @@ public class LogisticRecalibrationWalker extends ReadWalker { @Argument(fullName="output_file", shortName="O",doc="SAM or BAM file to write filtered reads into (will be overwritten if exists)",required=true ) public String output; @@ -18,8 +18,8 @@ public class ReadFilterWalker extends ReadWalker { private SAMFileWriter writer = null; public void initialize() { - SAMFileHeader header = getToolkit().getSamReader().getFileHeader(); - writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, header.getSortOrder() != SAMFileHeader.SortOrder.unsorted, new File(output)); + SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader(); + writer = Utils.createSAMFileWriterWithCompression(header, header.getSortOrder() != SAMFileHeader.SortOrder.unsorted, output, getToolkit().getBAMCompression()); } @Override diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReplaceQuals.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReplaceQuals.java index 7f8c368ac..17d96ed56 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReplaceQuals.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReplaceQuals.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.Utils; import net.sf.samtools.*; import net.sf.picard.reference.ReferenceSequence; @@ -108,9 +109,8 @@ public class ReplaceQuals extends ReadWalker { public SAMFileWriter reduceInit() { if ( outputFilename != null ) { // ! outputBamFile.equals("") ) { - SAMFileWriterFactory fact = new SAMFileWriterFactory(); SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader(); - return fact.makeBAMWriter(header, true, new File(outputFilename)); + return Utils.createSAMFileWriterWithCompression(header, true, outputFilename, getToolkit().getBAMCompression()); } else { return null; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java index 50d8db818..d30f5753b 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/duplicates/CombineDuplicatesWalker.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.playground.gatk.walkers; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.duplicates.DupUtils; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -11,7 +12,6 @@ import java.io.File; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMFileWriter; -import net.sf.samtools.SAMFileWriterFactory; import net.sf.samtools.SAMFileHeader; /** @@ -49,9 +49,8 @@ public class CombineDuplicatesWalker extends DuplicateWalker throw new RuntimeException("LOD threshold cannot be a negative number"); SAMFileHeader header = getToolkit().getEngine().getSAMHeader(); - writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, new File(OUT)); + writer = Utils.createSAMFileWriterWithCompression(header, false, OUT, getToolkit().getBAMCompression()); if ( OUT_INDELS != null ) { try { indelOutput = new FileWriter(new File(OUT_INDELS)); diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java index c9d531180..6ed01b089 100755 --- a/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/java/src/org/broadinstitute/sting/utils/Utils.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.utils; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMSequenceRecord; -import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.*; import net.sf.samtools.util.StringUtil; import net.sf.picard.reference.ReferenceSequenceFile; @@ -45,6 +43,12 @@ public class Utils { throw new RuntimeException(msg); } + public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) { + if (file.endsWith(".bam")) + return new SAMFileWriterFactory().makeBAMWriter(header, presorted, new File(file), compression); + return new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, new File(file)); + } + /** * Returns a new list built from those objects found in collection that satisfy the * predicate ( i.e. pred.apply() is true for the objects in th eresulting list ).