diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 5d6fb75ed..228f8351a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -813,7 +813,8 @@ public class GenomeAnalysisEngine { getWalkerBAQQualityMode(), refReader, getBaseRecalibration(), - argCollection.defaultBaseQualities); + argCollection.defaultBaseQualities, + argCollection.removeProgramRecords); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 3fd3857c5..91a008cbc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -249,6 +249,9 @@ public class GATKArgumentCollection { @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; + @Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we remove program records from the SAM header", required = false) + public boolean removeProgramRecords = false; + @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) public ValidationExclusion.TYPE unsafe; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 0fa4234b3..7f0a0c4c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -89,6 +89,11 @@ public class SAMDataSource { */ private final SAMFileReader.ValidationStringency validationStringency; + /** + * Do we want to remove the program records from this data source? + */ + private final boolean removeProgramRecords; + /** * Store BAM indices for each reader present. */ @@ -200,7 +205,8 @@ public class SAMDataSource { BAQ.QualityMode.DONT_MODIFY, null, // no BAQ null, // no BQSR - (byte) -1); + (byte) -1, + false); } /** @@ -233,7 +239,8 @@ public class SAMDataSource { BAQ.QualityMode qmode, IndexedFastaSequenceFile refReader, BaseRecalibration bqsrApplier, - byte defaultBaseQualities) { + byte defaultBaseQualities, + boolean removeProgramRecords) { this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; @@ -249,6 +256,7 @@ public class SAMDataSource { dispatcher = null; validationStringency = strictness; + this.removeProgramRecords = removeProgramRecords; if(readBufferSize != null) ReadShard.setReadBufferSize(readBufferSize); else { @@ -748,7 +756,7 @@ public class SAMDataSource { private synchronized void createNewResource() { if(allResources.size() > maxEntries) throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use."); - SAMReaders readers = new SAMReaders(readerIDs, validationStringency); + SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords); allResources.add(readers); availableResources.add(readers); } @@ -777,9 +785,11 @@ public class SAMDataSource { /** * Derive a new set of readers from the Reads metadata. * @param readerIDs reads to load. + * TODO: validationStringency is not used here * @param validationStringency validation stringency. + * @param removeProgramRecords indicate whether to clear program records from the readers */ - public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { + public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) { final int totalNumberOfFiles = readerIDs.size(); int readerNumber = 1; final SimpleTimer timer = new SimpleTimer().start(); @@ -790,6 +800,9 @@ public class SAMDataSource { long lastTick = timer.currentTime(); for(final SAMReaderID readerID: readerIDs) { final ReaderInitializer init = new ReaderInitializer(readerID).call(); + if (removeProgramRecords) { + init.reader.getFileHeader().setProgramRecords(new ArrayList()); + } if (threadAllocation.getNumIOThreads() > 0) { inputStreams.put(init.readerID, init.blockInputStream); // get from initializer } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java index 1c5dab254..f2c546317 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java @@ -24,9 +24,12 @@ package org.broadinstitute.sting.gatk.datasources.reads; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMProgramRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.Tags; @@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.AfterMethod; @@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest { fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception"); } } + + /** Test that we clear program records when requested */ + @Test + public void testRemoveProgramRecords() { + logger.warn("Executing testRemoveProgramRecords"); + + // setup the data + readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags())); + + // use defaults + SAMDataSource data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false); + + List defaultProgramRecords = data.getHeader().getProgramRecords(); + assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor"); + + boolean removeProgramRecords = false; + data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + BAQ.CalculationMode.OFF, + BAQ.QualityMode.DONT_MODIFY, + null, // no BAQ + null, // no BQSR + (byte) -1, + removeProgramRecords); + + List dontRemoveProgramRecords = data.getHeader().getProgramRecords(); + assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false"); + + removeProgramRecords = true; + data = new SAMDataSource(readers, + new ThreadAllocation(), + null, + genomeLocParser, + false, + SAMFileReader.ValidationStringency.SILENT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + BAQ.CalculationMode.OFF, + BAQ.QualityMode.DONT_MODIFY, + null, // no BAQ + null, // no BQSR + (byte) -1, + removeProgramRecords); + + List doRemoveProgramRecords = data.getHeader().getProgramRecords(); + assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true"); + } }