From edaa278eddcb1dcc408205e9735ce5a36f72035d Mon Sep 17 00:00:00 2001 From: kshakir Date: Sat, 25 Sep 2010 02:49:30 +0000 Subject: [PATCH] Removed cases where various toolkit functions were accessing GenomeAnalysisEngine.instance. This will allow other programs like Queue to reuse the functionality. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4351 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/AbstractGenomeAnalysisEngine.java | 54 ++++----- .../sting/gatk/CommandLineExecutable.java | 2 + .../sting/gatk/GenomeAnalysisEngine.java | 23 +--- .../sting/gatk/ReadProperties.java | 31 +++-- .../datasources/sample/SampleDataSource.java | 9 +- .../simpleDataSources/SAMDataSource.java | 107 +++++++++++++++--- .../sting/gatk/executive/MicroScheduler.java | 4 +- .../gatk/filters/MalformedReadFilter.java | 11 +- .../gatk/filters/SamRecordHeaderFilter.java | 15 +++ .../sting/gatk/io/OutputTracker.java | 6 +- .../gatk/iterators/LocusIteratorByState.java | 9 +- .../sting/gatk/iterators/NullSAMIterator.java | 4 +- .../iterators/ReadFormattingIterator.java | 11 +- .../gatk/traversals/TraversalEngine.java | 12 +- .../sting/gatk/walkers/Walker.java | 16 ++- .../gatk/walkers/indels/IndelRealigner.java | 3 +- .../walkers/HLAcaller/HLACallerWalker.java | 2 - .../validation/RodSystemValidationWalker.java | 2 +- .../sting/utils/GenomeLocParser.java | 7 +- .../sting/utils/SequenceDictionaryUtils.java | 17 +-- .../sting/utils/interval/IntervalUtils.java | 5 +- .../sting/utils/sam/GATKSAMRecord.java | 5 +- .../providers/LocusViewTemplate.java | 2 +- .../datasources/shards/MockLocusShard.java | 3 +- .../SAMBAMDataSourceUnitTest.java | 9 +- .../LocusIteratorByStateUnitTest.java | 28 +++-- .../traversals/TraverseReadsUnitTest.java | 2 +- 27 files changed, 262 insertions(+), 137 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/gatk/filters/SamRecordHeaderFilter.java diff --git a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java index 6351b6c68..4a39678e8 100755 --- a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java @@ -30,9 +30,6 @@ import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.*; import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.gatk.DownsamplingMethod; -import org.broadinstitute.sting.gatk.ReadMetrics; -import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.sample.Sample; @@ -43,6 +40,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter; +import org.broadinstitute.sting.gatk.filters.SamRecordHeaderFilter; import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; @@ -203,7 +201,8 @@ public abstract class AbstractGenomeAnalysisEngine { List additionalIntervals) { return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals, - IntervalUtils.parseIntervalArguments(argList), + IntervalUtils.parseIntervalArguments(argList, + this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), argCollection.BTIMergeRule), mergingRule); } @@ -278,7 +277,7 @@ public abstract class AbstractGenomeAnalysisEngine { * the caller must handle that directly. * @return A collection of available filters. */ - protected Collection createFilters() { + public Collection createFilters() { Set filters = new HashSet(); if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0) filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList)); @@ -291,8 +290,12 @@ public abstract class AbstractGenomeAnalysisEngine { logger.info("Strictness is " + argCollection.strictnessLevel); validateSuppliedReads(); - readsDataSource = createReadsDataSource(extractSourceInfo()); + readsDataSource = createReadsDataSource(); + for (SamRecordFilter filter : filters) + if (filter instanceof SamRecordHeaderFilter) + ((SamRecordHeaderFilter)filter).setHeader(this.getSAMFileHeader()); + validateSuppliedReference(); referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); @@ -456,25 +459,6 @@ public abstract class AbstractGenomeAnalysisEngine { } - /** - * Bundles all the source information about the reads into a unified data structure. - * - * @return The reads object providing reads source info. - */ - private ReadProperties extractSourceInfo() { - - DownsamplingMethod method = getDownsamplingMethod(); - - return new ReadProperties(unpackBAMFileList(argCollection.samFiles), - argCollection.strictnessLevel, - argCollection.readBufferSize, - method, - new ValidationExclusion(Arrays.asList(argCollection.unsafe)), - filters, - includeReadsWithDeletionAtLoci(), - generateExtendedEvents()); - } - protected DownsamplingMethod getDownsamplingMethod() { DownsamplingMethod method; if(argCollection.getDownsamplingMethod() != null) @@ -522,7 +506,7 @@ public abstract class AbstractGenomeAnalysisEngine { } // compare the reads to the reference - SequenceDictionaryUtils.validateDictionaries(logger, "reads", readsDictionary, "reference", referenceDictionary); + SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, "reference", referenceDictionary); } // compare the tracks to the reference, if they have a sequence dictionary @@ -538,7 +522,7 @@ public abstract class AbstractGenomeAnalysisEngine { Set trackSequences = new TreeSet(); for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences()) trackSequences.add(dictionaryEntry.getSequenceName()); - SequenceDictionaryUtils.validateDictionaries(logger, track.getName(), trackDict, "reference", referenceDictionary); + SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, track.getName(), trackDict, "reference", referenceDictionary); } } @@ -559,11 +543,21 @@ public abstract class AbstractGenomeAnalysisEngine { /** * Gets a data source for the given set of reads. * - * @param reads the read source information * @return A data source for the given set of reads. */ - private SAMDataSource createReadsDataSource(ReadProperties reads) { - return new SAMDataSource(reads); + private SAMDataSource createReadsDataSource() { + DownsamplingMethod method = getDownsamplingMethod(); + + return new SAMDataSource( + unpackBAMFileList(argCollection.samFiles), + argCollection.useOriginalBaseQualities, + argCollection.strictnessLevel, + argCollection.readBufferSize, + method, + new ValidationExclusion(Arrays.asList(argCollection.unsafe)), + filters, + includeReadsWithDeletionAtLoci(), + generateExtendedEvents()); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 63542314b..3a26d970a 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -76,6 +76,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram { try { engine.setArguments(getArgumentCollection()); engine.setWalker(walker); + walker.setToolkit(engine); Collection filters = engine.createFilters(); engine.setFilters(filters); @@ -165,6 +166,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram { Walker walker = engine.getWalkerByName(getAnalysisName()); engine.setArguments(getArgumentCollection()); engine.setWalker(walker); + walker.setToolkit(engine); argumentSources.add(walker.getClass()); Collection filters = engine.createFilters(); diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 612044805..0710fd9d8 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -56,30 +56,13 @@ import java.util.*; * A GenomeAnalysisEngine that runs a specified walker. */ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { - - // our instance of this genome analysis toolkit; it's used by other classes to extract the traversal engine - // TODO: public static without final tends to indicate we're thinking about this the wrong way - public static GenomeAnalysisEngine instance; - /** * our walker manager */ - private final WalkerManager walkerManager = new WalkerManager();; - + private final WalkerManager walkerManager = new WalkerManager(); private Walker walker; - /** - * our constructor, where all the work is done - *

- * legacy traversal types are sent to legacyTraversal function; as we move more of the traversals to the - * new MicroScheduler class we'll be able to delete that function. - */ - public GenomeAnalysisEngine() { - // make sure our instance variable points to this analysis engine - instance = this; - } - public void setWalker(Walker walker) { this.walker = walker; } @@ -150,7 +133,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { * @return A collection of available filters. */ @Override - protected Collection createFilters() { + public Collection createFilters() { Set filters = new HashSet(); filters.addAll(WalkerManager.getReadFilters(walker,this.getFilterManager())); filters.addAll(super.createFilters()); @@ -383,6 +366,6 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { for (Stub stub : getOutputs()) outputTracker.addOutput(stub); - outputTracker.prepareWalker(walker); + outputTracker.prepareWalker(walker, getArguments().strictnessLevel); } } diff --git a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index 8e9ec7b82..271d6696c 100755 --- a/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; -import java.util.ArrayList; import java.util.List; import java.util.Collection; /** @@ -27,12 +27,14 @@ import java.util.Collection; */ public class ReadProperties { private List readers = null; + private SAMFileHeader header = null; private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private Integer readBufferSize = null; private DownsamplingMethod downsamplingMethod = null; private ValidationExclusion exclusionList = null; private Collection supplementalFilters = null; private boolean includeReadsWithDeletionAtLoci = false; + private boolean useOriginalBaseQualities = false; private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels) // immediately after the reference base such event is associated with? @@ -66,6 +68,14 @@ public class ReadProperties { return readers; } + /** + * Gets the sam file header + * @return the sam file header + */ + public SAMFileHeader getHeader() { + return header; + } + /** * How strict should validation be? * @return Stringency of validation. @@ -103,14 +113,11 @@ public class ReadProperties { } /** - * Simple constructor for unit testing. - * @param readsFiles List of reads files to open. + * Return whether to use original base qualities. + * @return Whether to use original base qualities. */ - public ReadProperties( List readsFiles ) { - this.readers = readsFiles; - this.downsamplingMethod = DownsamplingMethod.NONE; - this.supplementalFilters = new ArrayList(); - this.exclusionList = new ValidationExclusion(); + public boolean useOriginalBaseQualities() { + return useOriginalBaseQualities; } /** @@ -118,6 +125,8 @@ public class ReadProperties { * files and store them in an easy-to-work-with package. Constructor * is package protected. * @param samFiles list of reads files. + * @param header sam file header. + * @param useOriginalBaseQualities True if original base qualities should be used. * @param strictness Stringency of reads file parsing. * @param readBufferSize Number of reads to hold in memory per BAM. * @param downsamplingMethod Method for downsampling reads at a given locus. @@ -131,6 +140,8 @@ public class ReadProperties { * bases will be seen in the pileups, and the deletions will be skipped silently. */ public ReadProperties( List samFiles, + SAMFileHeader header, + boolean useOriginalBaseQualities, SAMFileReader.ValidationStringency strictness, Integer readBufferSize, DownsamplingMethod downsamplingMethod, @@ -139,12 +150,14 @@ public class ReadProperties { boolean includeReadsWithDeletionAtLoci, boolean generateExtendedEvents) { this.readers = samFiles; + this.header = header; this.readBufferSize = readBufferSize; this.validationStringency = strictness; - this.downsamplingMethod = downsamplingMethod; + this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod; this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList; this.supplementalFilters = supplementalFilters; this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; this.generateExtendedEvents = generateExtendedEvents; + this.useOriginalBaseQualities = useOriginalBaseQualities; } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java index 55f4e2fd5..f9f5ad4f1 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java @@ -37,6 +37,11 @@ import java.util.Collection; */ public class SampleDataSource { + /** + * SAMFileHeader that has been created for this analysis. + */ + private final SAMFileHeader header; + /** * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so * this is stored as a HashMap. @@ -63,7 +68,7 @@ public class SampleDataSource { * @param sampleFiles Sample files that were included on the command line */ public SampleDataSource(SAMFileHeader header, List sampleFiles) { - + this.header = header; // create empty sample object for each sample referenced in the SAM header for (String sampleName : SampleUtils.getSAMFileSamples(header)) { if (!hasSample(sampleName)) { @@ -85,7 +90,7 @@ public class SampleDataSource { * Hallucinates sample objects for all the samples in the SAM file and stores them */ private void getSamplesFromSAMFile() { - for (String sampleName : SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader())) { + for (String sampleName : SampleUtils.getSAMFileSamples(header)) { if (!hasSample(sampleName)) { Sample newSample = new Sample(sampleName); newSample.setSAMFileEntry(true); diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 39c378124..64b2340de 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -32,6 +32,7 @@ import net.sf.picard.sam.SamFileHeaderMerger; import net.sf.picard.sam.MergingSamRecordIterator; import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard; import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard; @@ -68,6 +69,11 @@ public class SAMDataSource implements SimpleDataSource { */ protected final List readerIDs; + /** + * How strict are the readers driving this data source. + */ + protected final SAMFileReader.ValidationStringency validationStringency; + /** * How far along is each reader? */ @@ -104,14 +110,54 @@ public class SAMDataSource implements SimpleDataSource { /** * Create a new SAM data source given the supplied read metadata. - * @param reads The read metadata. + * @param samFiles list of reads files. */ - public SAMDataSource(ReadProperties reads) { - this.readProperties = reads; + public SAMDataSource(List samFiles) { + this( + samFiles, + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + false + ); + } + + /** + * Create a new SAM data source given the supplied read metadata. + * @param samFiles list of reads files. + * @param useOriginalBaseQualities True if original base qualities should be used. + * @param strictness Stringency of reads file parsing. + * @param readBufferSize Number of reads to hold in memory per BAM. + * @param downsamplingMethod Method for downsampling reads at a given locus. + * @param exclusionList what safety checks we're willing to let slide + * @param supplementalFilters additional filters to dynamically apply. + * @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with + * a pile of indel/noevent extended events at every locus with at least one indel associated with it + * (in addition to a "regular" call to map() at this locus performed with base pileup) + * @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method + * will explicitly list reads with deletion over the current reference base; otherwise, only observed + * bases will be seen in the pileups, and the deletions will be skipped silently. + */ + public SAMDataSource( + List samFiles, + boolean useOriginalBaseQualities, + SAMFileReader.ValidationStringency strictness, + Integer readBufferSize, + DownsamplingMethod downsamplingMethod, + ValidationExclusion exclusionList, + Collection supplementalFilters, + boolean includeReadsWithDeletionAtLoci, + boolean generateExtendedEvents + ) { this.readMetrics = new ReadMetrics(); - readerIDs = reads.getSAMReaderIDs(); - for (SAMReaderID readerID : reads.getSAMReaderIDs()) { + readerIDs = samFiles; + validationStringency = strictness; + for (SAMReaderID readerID : samFiles) { if (!readerID.samFile.canRead()) throw new UserException.CouldNotReadInputFile(readerID.samFile,"file is not present or user does not have appropriate permissions. " + "Please check that the file is present and readable and try again."); @@ -136,10 +182,23 @@ public class SAMDataSource implements SimpleDataSource { initializeReaderPositions(readers); - SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true); + SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true); mergedHeader = headerMerger.getMergedHeader(); hasReadGroupCollisions = headerMerger.hasReadGroupCollisions(); + readProperties = new ReadProperties( + samFiles, + mergedHeader, + useOriginalBaseQualities, + strictness, + readBufferSize, + downsamplingMethod, + exclusionList, + supplementalFilters, + includeReadsWithDeletionAtLoci, + generateExtendedEvents + ); + // cache the read group id (original) -> read group id (merged) mapping. for(SAMReaderID id: readerIDs) { SAMFileReader reader = readers.getReader(id); @@ -371,10 +430,10 @@ public class SAMDataSource implements SimpleDataSource { * @return An iterator over the selected data. */ private StingSAMIterator getIterator(SAMReaders readers, BAMFormatAwareShard shard, boolean enableVerification) { - SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true); + SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true); // Set up merging to dynamically merge together multiple BAMs. - MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,true); + MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true); for(SAMReaderID id: getReaderIDs()) { if(shard.getFileSpans().get(id) == null) continue; @@ -388,6 +447,7 @@ public class SAMDataSource implements SimpleDataSource { return applyDecoratingIterators(shard.getReadMetrics(), enableVerification, + readProperties.useOriginalBaseQualities(), new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), readProperties.getDownsamplingMethod().toFraction, readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), @@ -403,13 +463,14 @@ public class SAMDataSource implements SimpleDataSource { SAMReaders readers = resourcePool.getAvailableReaders(); // Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set. - SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true); - MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,true); + SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true); + MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true); for(SAMReaderID id: getReaderIDs()) mergingIterator.addIterator(readers.getReader(id),readers.getReader(id).iterator()); return applyDecoratingIterators(shard.getReadMetrics(), shard instanceof ReadShard, + readProperties.useOriginalBaseQualities(), new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), readProperties.getDownsamplingMethod().toFraction, readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), @@ -433,6 +494,7 @@ public class SAMDataSource implements SimpleDataSource { * * @param readMetrics metrics to track when using this iterator. * @param enableVerification Verify the order of reads. + * @param useOriginalBaseQualities True if original base qualities should be used. * @param wrappedIterator the raw data source. * @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus). * @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this. @@ -441,11 +503,12 @@ public class SAMDataSource implements SimpleDataSource { */ protected StingSAMIterator applyDecoratingIterators(ReadMetrics readMetrics, boolean enableVerification, + boolean useOriginalBaseQualities, StingSAMIterator wrappedIterator, Double downsamplingFraction, Boolean noValidationOfReadOrder, Collection supplementalFilters) { - wrappedIterator = new ReadFormattingIterator(wrappedIterator); + wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities); // NOTE: this (and other filtering) should be done before on-the-fly sorting // as there is no reason to sort something that we will end of throwing away @@ -530,7 +593,7 @@ public class SAMDataSource implements SimpleDataSource { private synchronized void createNewResource() { if(allResources.size() > maxEntries) throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use."); - SAMReaders readers = new SAMReaders(readProperties); + SAMReaders readers = new SAMReaders(readerIDs, validationStringency); allResources.add(readers); availableResources.add(readers); } @@ -548,14 +611,15 @@ public class SAMDataSource implements SimpleDataSource { /** * Derive a new set of readers from the Reads metadata. - * @param sourceInfo Metadata for the reads to load. + * @param readerIDs reads to load. + * @param validationStringency validation stringency. */ - public SAMReaders(ReadProperties sourceInfo) { - for(SAMReaderID readerID: sourceInfo.getSAMReaderIDs()) { + public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { + for(SAMReaderID readerID: readerIDs) { SAMFileReader reader = new SAMFileReader(readerID.samFile); reader.enableFileSource(true); reader.enableIndexCaching(true); - reader.setValidationStringency(sourceInfo.getValidationStringency()); + reader.setValidationStringency(validationStringency); // If no read group is present, hallucinate one. // TODO: Straw poll to see whether this is really required. @@ -614,6 +678,17 @@ public class SAMDataSource implements SimpleDataSource { public Collection values() { return readers.values(); } + + /** + * Gets all the actual readers out of this data structure. + * @return A collection of the readers. + */ + public Collection headers() { + ArrayList headers = new ArrayList(readers.size()); + for (SAMFileReader reader : values()) + headers.add(reader.getFileHeader()); + return headers; + } } private class ReleasingIterator implements StingSAMIterator { diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 10f094010..74c5ff6f6 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -122,7 +122,7 @@ public abstract class MicroScheduler { throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type."); } - traversalEngine.initialize(); + traversalEngine.initialize(engine); } /** @@ -147,7 +147,7 @@ public abstract class MicroScheduler { * @return an iterator over the reads specified in the shard. */ protected StingSAMIterator getReadIterator(Shard shard) { - return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(new ReadProperties(Collections.emptyList())); + return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index b48a5cd87..ca6387d8d 100644 --- a/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java +++ b/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -36,11 +36,18 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; * @author mhanna * @version 0.1 */ -public class MalformedReadFilter implements SamRecordFilter { +public class MalformedReadFilter implements SamRecordHeaderFilter { + private SAMFileHeader header; + + @Override + public void setHeader(SAMFileHeader header) { + this.header = header; + } + public boolean filterOut(SAMRecord read) { return !checkInvalidAlignmentStart(read) || !checkInvalidAlignmentEnd(read) || - !checkAlignmentDisagreesWithHeader(GenomeAnalysisEngine.instance.getSAMFileHeader(),read) || + !checkAlignmentDisagreesWithHeader(this.header,read) || !checkCigarDisagreesWithAlignment(read); } diff --git a/java/src/org/broadinstitute/sting/gatk/filters/SamRecordHeaderFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/SamRecordHeaderFilter.java new file mode 100644 index 000000000..548115b58 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/filters/SamRecordHeaderFilter.java @@ -0,0 +1,15 @@ +package org.broadinstitute.sting.gatk.filters; + +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMFileHeader; + +/** + * A SamRecordFilter that also depends on the header. + */ +public interface SamRecordHeaderFilter extends SamRecordFilter { + /** + * Sets the header for use by this filter. + * @param header the header for use by this filter. + */ + void setHeader(SAMFileHeader header); +} diff --git a/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java b/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java index 5acf579e7..b68013aa4 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/io/OutputTracker.java @@ -25,12 +25,12 @@ package org.broadinstitute.sting.gatk.io; +import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.commandline.ArgumentSource; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.storage.StorageFactory; @@ -74,7 +74,7 @@ public abstract class OutputTracker { */ public abstract T getStorage( Stub stub ); - public void prepareWalker( Walker walker ) { + public void prepareWalker( Walker walker, SAMFileReader.ValidationStringency strictnessLevel ) { for( Map.Entry io: inputs.entrySet() ) { ArgumentSource targetField = io.getKey(); Object targetValue = io.getValue(); @@ -83,7 +83,7 @@ public abstract class OutputTracker { // TODO: Generalize this, and move it to its own initialization step. if( targetValue instanceof SAMFileReaderBuilder) { SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue; - builder.setValidationStringency(GenomeAnalysisEngine.instance.getArguments().strictnessLevel); + builder.setValidationStringency(strictnessLevel); targetValue = builder.build(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index e0e7b040b..104bbb583 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -273,16 +273,13 @@ public class LocusIteratorByState extends LocusIterator { } public LocusIteratorByState(final Iterator samIterator, ReadProperties readInformation, List filters ) { + this.readInfo = readInformation; + this.filters = filters; // Aggregate all sample names. - // TODO: Push in header via constructor - if(GenomeAnalysisEngine.instance != null && GenomeAnalysisEngine.instance.getDataSource() != null) { - sampleNames.addAll(SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader())); - } + sampleNames.addAll(SampleUtils.getSAMFileSamples(readInfo.getHeader())); // Add a null sample name as a catch-all for reads without samples if(!sampleNames.contains(null)) sampleNames.add(null); readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),sampleNames); - this.readInfo = readInformation; - this.filters = filters; } public Iterator iterator() { diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java index ac91af18f..ff458467f 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/NullSAMIterator.java @@ -22,9 +22,7 @@ import java.util.NoSuchElementException; * A placeholder for an iterator with no data. */ public class NullSAMIterator implements StingSAMIterator { - private ReadProperties reads = null; - - public NullSAMIterator( ReadProperties reads ) { this.reads = reads; } + public NullSAMIterator() {} public Iterator iterator() { return this; } public void close() { /* NO-OP */ } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index 4f7d45655..bbe95096e 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -27,12 +27,19 @@ public class ReadFormattingIterator implements StingSAMIterator { */ private StingSAMIterator wrappedIterator; + /** + * True if original base qualities should be used. + */ + private final boolean useOriginalBaseQualities; + /** * Decorate the given iterator inside a ReadWrappingIterator. * @param wrappedIterator iterator + * @param useOriginalBaseQualities true if original base qualities should be used */ - public ReadFormattingIterator(StingSAMIterator wrappedIterator) { + public ReadFormattingIterator(StingSAMIterator wrappedIterator, boolean useOriginalBaseQualities) { this.wrappedIterator = wrappedIterator; + this.useOriginalBaseQualities = useOriginalBaseQualities; } /** @@ -67,7 +74,7 @@ public class ReadFormattingIterator implements StingSAMIterator { * no next exists. */ public SAMRecord next() { - return new GATKSAMRecord(wrappedIterator.next()); + return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 798238b94..6ccfe1396 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -24,6 +24,8 @@ public abstract class TraversalEngine,Provide /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); + private GenomeAnalysisEngine engine; + /** * Gets the named traversal type associated with the given traversal. * @return A user-friendly name for the given traversal type. @@ -48,7 +50,7 @@ public abstract class TraversalEngine,Provide public void printProgress(Shard shard,GenomeLoc loc) { // A bypass is inserted here for unit testing. // TODO: print metrics outside of the traversal engine to more easily handle cumulative stats. - ReadMetrics cumulativeMetrics = GenomeAnalysisEngine.instance != null ? GenomeAnalysisEngine.instance.getCumulativeMetrics().clone() : new ReadMetrics(); + ReadMetrics cumulativeMetrics = engine != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics(); cumulativeMetrics.incrementMetrics(shard.getReadMetrics()); printProgress(loc, cumulativeMetrics, false); } @@ -103,8 +105,12 @@ public abstract class TraversalEngine,Provide } } - /** Initialize the traversal engine. After this point traversals can be run over the data */ - public void initialize() { + /** + * Initialize the traversal engine. After this point traversals can be run over the data + * @param engine GenomeAnalysisEngine for this traversal + */ + public void initialize(GenomeAnalysisEngine engine) { + this.engine = engine; lastProgressPrintTime = startTime = System.currentTimeMillis(); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 64ede866d..04b5b5e77 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -43,10 +43,20 @@ import org.apache.log4j.Logger; @ReadFilters(MalformedReadFilter.class) public abstract class Walker { final protected static Logger logger = Logger.getLogger(Walker.class); + private GenomeAnalysisEngine toolkit; protected Walker() { } + /** + * Set the toolkit, for peering into internal structures that can't + * otherwise be read. + * @param toolkit The genome analysis toolkit. + */ + public void setToolkit(GenomeAnalysisEngine toolkit) { + this.toolkit = toolkit; + } + /** * Retrieve the toolkit, for peering into internal structures that can't * otherwise be read. Use sparingly, and discuss uses with software engineering @@ -54,7 +64,7 @@ public abstract class Walker { * @return The genome analysis toolkit. */ protected GenomeAnalysisEngine getToolkit() { - return GenomeAnalysisEngine.instance; + return toolkit; } /** @@ -126,7 +136,7 @@ public abstract class Walker { /** * General interval reduce routine called after all of the traversals are done - * @param results + * @param results interval reduce results */ public void onTraversalDone(List> results) { for ( Pair result : results ) { @@ -145,6 +155,8 @@ public abstract class Walker { * However, onTraversalDone(reduce) will be called after each interval is processed. * The system will call onTraversalDone( GenomeLoc -> reduce ), after all reductions are done, * which is overloaded here to call onTraversalDone(reduce) for each location + * + * @return true if your walker wants to reduce each interval separately. */ public boolean isReduceByInterval() { return false; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index d785bf595..03e2c8fb6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -31,6 +31,7 @@ import net.sf.samtools.util.SequenceUtil; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; @@ -179,7 +180,7 @@ public class IndelRealigner extends ReadWalker { intervals = new IntervalFileMergingIterator( new java.io.File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY ); } else { // read in the whole list of intervals for cleaning - GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY); + GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY); intervals = locs.iterator(); } currentInterval = intervals.hasNext() ? intervals.next() : null; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/HLACallerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/HLACallerWalker.java index 64c5b3d9d..4ff43be87 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/HLACallerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/HLACallerWalker.java @@ -83,8 +83,6 @@ public class HLACallerWalker extends ReadWalker { @Argument(fullName = "turnOffVerboseOutput", shortName = "noVerbose", doc = "Do not output verbose probability descriptions (INFO lines) ", required = false) protected boolean NO_VERBOSE = false; - GATKArgumentCollection args = this.getToolkit().getArguments(); - // Initializing variables HLAFileReader HLADictionaryReader = new HLAFileReader(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java index 7e2e7b794..85d687e48 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java @@ -53,7 +53,7 @@ public class RodSystemValidationWalker extends RodWalker { } out.println("Header:"); // enumerate the list of ROD's we've loaded - rodList = GenomeAnalysisEngine.instance.getRodDataSources(); + rodList = this.getToolkit().getRodDataSources(); for (ReferenceOrderedDataSource rod : rodList) { out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getType()); out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getFile()); diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 832616731..6336e113f 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -323,16 +323,17 @@ public class GenomeLocParser { * Where each locN can be: * 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' * - * @param file_name + * @param file_name interval file + * @param allowEmptyIntervalList if false empty interval lists will return null * @return List List of Genome Locs that have been parsed from file */ - public static List intervalFileToList(final String file_name) { + public static List intervalFileToList(final String file_name, boolean allowEmptyIntervalList) { // try to open file File inputFile = new File(file_name); // check if file is empty if (inputFile.exists() && inputFile.length() < 1) { - if (GenomeAnalysisEngine.instance.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST) + if (allowEmptyIntervalList) return new ArrayList(); else { Utils.warnUser("The interval file " + file_name + " is empty. The GATK will continue processing but you " + diff --git a/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java b/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java index 344a60963..0a1b088e7 100755 --- a/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java +++ b/java/src/org/broadinstitute/sting/utils/SequenceDictionaryUtils.java @@ -76,12 +76,12 @@ public class SequenceDictionaryUtils { } /** + * @param validationExclusion exclusions to validation * @return Returns true if the engine is in tolerant mode and we'll let through dangerous but not fatal dictionary inconsistency */ - public static boolean allowNonFatalIncompabilities() { - return GenomeAnalysisEngine.instance != null && - ( GenomeAnalysisEngine.instance.getArguments().unsafe == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY || - GenomeAnalysisEngine.instance.getArguments().unsafe == ValidationExclusion.TYPE.ALL ); + public static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) { + return ( validationExclusion == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY || + validationExclusion == ValidationExclusion.TYPE.ALL ); } /** @@ -89,12 +89,13 @@ public class SequenceDictionaryUtils { * thrown with detailed error messages. If the engine is in permissive mode, then logger.warnings of generated instead * * @param logger for warnings + * @param validationExclusion exclusions to validation * @param name1 name associated with dict1 * @param dict1 the sequence dictionary dict1 * @param name2 name associated with dict2 * @param dict2 the sequence dictionary dict2 */ - public static void validateDictionaries(Logger logger, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) { + public static void validateDictionaries(Logger logger, ValidationExclusion.TYPE validationExclusion, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) { SequenceDictionaryCompatability type = compareDictionaries(dict1, dict2); switch ( type ) { case IDENTICAL: @@ -115,7 +116,7 @@ public class SequenceDictionaryUtils { name2, elt2.getSequenceName(), elt2.getSequenceLength()), name1, dict1, name2, dict2); - if ( allowNonFatalIncompabilities() ) + if ( allowNonFatalIncompabilities(validationExclusion) ) logger.warn(ex.getMessage()); else throw ex; @@ -129,7 +130,7 @@ public class SequenceDictionaryUtils { else ex = new UserException.LexicographicallySortedSequenceDictionary(name2, dict2); - if ( allowNonFatalIncompabilities() ) + if ( allowNonFatalIncompabilities(validationExclusion) ) logger.warn(ex.getMessage()); else throw ex; @@ -137,7 +138,7 @@ public class SequenceDictionaryUtils { case OUT_OF_ORDER: { UserException ex = new UserException.IncompatibleSequenceDictionaries("Order of contigs differences, which is unsafe", name1, dict1, name2, dict2); - if ( allowNonFatalIncompabilities() ) + if ( allowNonFatalIncompabilities(validationExclusion) ) logger.warn(ex.getMessage()); else throw ex; diff --git a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 16494b56d..9f68ac87a 100644 --- a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -26,9 +26,10 @@ public class IntervalUtils { * specifications. * * @param argList A list of strings containing interval data. + * @param allowEmptyIntervalList If false instead of an empty interval list will return null. * @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used. */ - public static List parseIntervalArguments(List argList) { + public static List parseIntervalArguments(List argList, boolean allowEmptyIntervalList) { List rawIntervals = new ArrayList(); // running list of raw GenomeLocs if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to @@ -50,7 +51,7 @@ public class IntervalUtils { // if it's a file, add items to raw interval list if (isIntervalFile(fileOrInterval)) { try { - rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval)); + rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval, allowEmptyIntervalList)); } catch (Exception e) { throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in either format.", e); diff --git a/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index fc58ea7af..9e3b2609f 100755 --- a/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -39,7 +39,7 @@ public class GATKSAMRecord extends SAMRecord { // These attributes exist in memory only, and are never written to disk. private Map temporaryAttributes; - public GATKSAMRecord(SAMRecord record) { + public GATKSAMRecord(SAMRecord record, boolean useOriginalBaseQualities) { super(null); // it doesn't matter - this isn't used if ( record == null ) throw new IllegalArgumentException("The SAMRecord argument cannot be null"); @@ -55,8 +55,7 @@ public class GATKSAMRecord extends SAMRecord { setAttribute(attribute.tag, attribute.value); // if we are using original quals, set them now if t hey are present in the record - if ( GenomeAnalysisEngine.instance != null && - GenomeAnalysisEngine.instance.getArguments().useOriginalBaseQualities ) { + if ( useOriginalBaseQualities ) { byte[] originalQuals = mRecord.getOriginalBaseQualities(); if ( originalQuals != null ) mRecord.setBaseQualities(originalQuals); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index bbab06168..260217566 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -50,7 +50,7 @@ public abstract class LocusViewTemplate extends BaseTest { SAMRecordIterator iterator = new SAMRecordIterator(); GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); - Shard shard = new LocusShard(new SAMDataSource(new ReadProperties(Collections.emptyList())),Collections.singletonList(shardBounds),Collections.emptyMap()); + Shard shard = new LocusShard(new SAMDataSource(Collections.emptyList()),Collections.singletonList(shardBounds),Collections.emptyMap()); WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker.WindowMakerIterator window = windowMaker.next(); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null); diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java index ce466a995..f5e90a8be 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/shards/MockLocusShard.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.datasources.shards; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; @@ -40,6 +39,6 @@ import java.util.Collections; */ public class MockLocusShard extends LocusShard { public MockLocusShard(final List intervals) { - super(new SAMDataSource(new ReadProperties(Collections.emptyList())),intervals,null); + super(new SAMDataSource(Collections.emptyList()),intervals,null); } } diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java index 9d0e534c9..cd520c1aa 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMBAMDataSourceUnitTest.java @@ -86,10 +86,9 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { // setup the data readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); - ReadProperties reads = new ReadProperties(readers); // the sharding strat. - SAMDataSource data = new SAMDataSource(reads); + SAMDataSource data = new SAMDataSource(readers); ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); int count = 0; @@ -131,10 +130,9 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { // setup the test files readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); - ReadProperties reads = new ReadProperties(readers); // the sharding strat. - SAMDataSource data = new SAMDataSource(reads); + SAMDataSource data = new SAMDataSource(readers); ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); ArrayList readcountPerShard = new ArrayList(); @@ -173,11 +171,10 @@ public class SAMBAMDataSourceUnitTest extends BaseTest { readers.clear(); readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.emptyList())); - reads = new ReadProperties(readers); count = 0; // the sharding strat. - data = new SAMDataSource(reads); + data = new SAMDataSource(readers); strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); logger.debug("Pile two:"); diff --git a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java index 7e1c78c78..723d40207 100644 --- a/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java @@ -1,11 +1,14 @@ package org.broadinstitute.sting.gatk.iterators; import junit.framework.Assert; +import net.sf.picard.filter.SamRecordFilter; import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.CloseableIterator; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.ReadProperties; +import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -15,11 +18,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.junit.BeforeClass; import org.junit.Test; -import java.io.File; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Arrays; +import java.util.*; /** * testing of the LocusIteratorByState @@ -41,7 +40,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; // create a test version of the Reads object - ReadProperties readAttributes = new ReadProperties(new ArrayList()); + ReadProperties readAttributes = createTestReadProperties(); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); @@ -93,7 +92,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest { final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}; // create a test version of the Reads object - ReadProperties readAttributes = new ReadProperties(new ArrayList()); + ReadProperties readAttributes = createTestReadProperties(); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); @@ -132,6 +131,21 @@ public class LocusIteratorByStateUnitTest extends BaseTest { Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup); } + + private static ReadProperties createTestReadProperties() { + return new ReadProperties( + Collections.emptyList(), + new SAMFileHeader(), + false, + SAMFileReader.ValidationStringency.STRICT, + null, + null, + new ValidationExclusion(), + new ArrayList(), + false, + false + ); + } } class FakeCloseableIterator implements CloseableIterator { diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java index 782cd296d..df8ec77f4 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java @@ -99,7 +99,7 @@ public class TraverseReadsUnitTest extends BaseTest { ref = new IndexedFastaSequenceFile(refFile); GenomeLocParser.setupRefContigOrdering(ref); - SAMDataSource dataSource = new SAMDataSource(new ReadProperties(bamList)); + SAMDataSource dataSource = new SAMDataSource(bamList); ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, ref.getSequenceDictionary(), readSize);