Add removeProgramRecords argument

* Add unit test for the removeProgramRecords
This commit is contained in:
Joel Thibault 2012-07-30 10:19:57 -04:00
parent d53105668b
commit 2b25df3d53
4 changed files with 95 additions and 5 deletions

View File

@ -813,7 +813,8 @@ public class GenomeAnalysisEngine {
getWalkerBAQQualityMode(), getWalkerBAQQualityMode(),
refReader, refReader,
getBaseRecalibration(), getBaseRecalibration(),
argCollection.defaultBaseQualities); argCollection.defaultBaseQualities,
argCollection.removeProgramRecords);
} }
/** /**

View File

@ -249,6 +249,9 @@ public class GATKArgumentCollection {
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
@Argument(fullName = "remove_program_records", shortName = "rpr", doc = "Should we remove program records from the SAM header", required = false)
public boolean removeProgramRecords = false;
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
public ValidationExclusion.TYPE unsafe; public ValidationExclusion.TYPE unsafe;

View File

@ -89,6 +89,11 @@ public class SAMDataSource {
*/ */
private final SAMFileReader.ValidationStringency validationStringency; private final SAMFileReader.ValidationStringency validationStringency;
/**
* Do we want to remove the program records from this data source?
*/
private final boolean removeProgramRecords;
/** /**
* Store BAM indices for each reader present. * Store BAM indices for each reader present.
*/ */
@ -200,7 +205,8 @@ public class SAMDataSource {
BAQ.QualityMode.DONT_MODIFY, BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ null, // no BAQ
null, // no BQSR null, // no BQSR
(byte) -1); (byte) -1,
false);
} }
/** /**
@ -233,7 +239,8 @@ public class SAMDataSource {
BAQ.QualityMode qmode, BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader, IndexedFastaSequenceFile refReader,
BaseRecalibration bqsrApplier, BaseRecalibration bqsrApplier,
byte defaultBaseQualities) { byte defaultBaseQualities,
boolean removeProgramRecords) {
this.readMetrics = new ReadMetrics(); this.readMetrics = new ReadMetrics();
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
@ -249,6 +256,7 @@ public class SAMDataSource {
dispatcher = null; dispatcher = null;
validationStringency = strictness; validationStringency = strictness;
this.removeProgramRecords = removeProgramRecords;
if(readBufferSize != null) if(readBufferSize != null)
ReadShard.setReadBufferSize(readBufferSize); ReadShard.setReadBufferSize(readBufferSize);
else { else {
@ -748,7 +756,7 @@ public class SAMDataSource {
private synchronized void createNewResource() { private synchronized void createNewResource() {
if(allResources.size() > maxEntries) if(allResources.size() > maxEntries)
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use."); throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
SAMReaders readers = new SAMReaders(readerIDs, validationStringency); SAMReaders readers = new SAMReaders(readerIDs, validationStringency, removeProgramRecords);
allResources.add(readers); allResources.add(readers);
availableResources.add(readers); availableResources.add(readers);
} }
@ -777,9 +785,11 @@ public class SAMDataSource {
/** /**
* Derive a new set of readers from the Reads metadata. * Derive a new set of readers from the Reads metadata.
* @param readerIDs reads to load. * @param readerIDs reads to load.
* TODO: validationStringency is not used here
* @param validationStringency validation stringency. * @param validationStringency validation stringency.
* @param removeProgramRecords indicate whether to clear program records from the readers
*/ */
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) { public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency, boolean removeProgramRecords) {
final int totalNumberOfFiles = readerIDs.size(); final int totalNumberOfFiles = readerIDs.size();
int readerNumber = 1; int readerNumber = 1;
final SimpleTimer timer = new SimpleTimer().start(); final SimpleTimer timer = new SimpleTimer().start();
@ -790,6 +800,9 @@ public class SAMDataSource {
long lastTick = timer.currentTime(); long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) { for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call(); final ReaderInitializer init = new ReaderInitializer(readerID).call();
if (removeProgramRecords) {
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
}
if (threadAllocation.getNumIOThreads() > 0) { if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
} }

View File

@ -24,9 +24,12 @@
package org.broadinstitute.sting.gatk.datasources.reads; package org.broadinstitute.sting.gatk.datasources.reads;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail; import static org.testng.Assert.fail;
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMProgramRecord;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.commandline.Tags;
@ -36,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.AfterMethod; import org.testng.annotations.AfterMethod;
@ -143,4 +147,73 @@ public class SAMDataSourceUnitTest extends BaseTest {
fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception"); fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
} }
} }
/** Test that we clear program records when requested */
@Test
public void testRemoveProgramRecords() {
logger.warn("Executing testRemoveProgramRecords");
// setup the data
readers.add(new SAMReaderID(new File(b37GoodBAM),new Tags()));
// use defaults
SAMDataSource data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
false);
List<SAMProgramRecord> defaultProgramRecords = data.getHeader().getProgramRecords();
assertTrue(defaultProgramRecords.size() != 0, "testRemoveProgramRecords: No program records found when using default constructor");
boolean removeProgramRecords = false;
data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1,
removeProgramRecords);
List<SAMProgramRecord> dontRemoveProgramRecords = data.getHeader().getProgramRecords();
assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false");
removeProgramRecords = true;
data = new SAMDataSource(readers,
new ThreadAllocation(),
null,
genomeLocParser,
false,
SAMFileReader.ValidationStringency.SILENT,
null,
null,
new ValidationExclusion(),
new ArrayList<ReadFilter>(),
false,
BAQ.CalculationMode.OFF,
BAQ.QualityMode.DONT_MODIFY,
null, // no BAQ
null, // no BQSR
(byte) -1,
removeProgramRecords);
List<SAMProgramRecord> doRemoveProgramRecords = data.getHeader().getProgramRecords();
assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true");
}
} }