Removed cases where various toolkit functions were accessing GenomeAnalysisEngine.instance.

This will allow other programs like Queue to reuse the functionality.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4351 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-09-25 02:49:30 +00:00
parent 497bcbcbb7
commit edaa278edd
27 changed files with 262 additions and 137 deletions

View File

@ -30,9 +30,6 @@ import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.*;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.ArgumentSource;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
@ -43,6 +40,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.filters.SamRecordHeaderFilter;
import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
@ -203,7 +201,8 @@ public abstract class AbstractGenomeAnalysisEngine {
List<GenomeLoc> additionalIntervals) {
return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals,
IntervalUtils.parseIntervalArguments(argList),
IntervalUtils.parseIntervalArguments(argList,
this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST),
argCollection.BTIMergeRule),
mergingRule);
}
@ -278,7 +277,7 @@ public abstract class AbstractGenomeAnalysisEngine {
* the caller must handle that directly.
* @return A collection of available filters.
*/
protected Collection<SamRecordFilter> createFilters() {
public Collection<SamRecordFilter> createFilters() {
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0)
filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList));
@ -291,8 +290,12 @@ public abstract class AbstractGenomeAnalysisEngine {
logger.info("Strictness is " + argCollection.strictnessLevel);
validateSuppliedReads();
readsDataSource = createReadsDataSource(extractSourceInfo());
readsDataSource = createReadsDataSource();
for (SamRecordFilter filter : filters)
if (filter instanceof SamRecordHeaderFilter)
((SamRecordHeaderFilter)filter).setHeader(this.getSAMFileHeader());
validateSuppliedReference();
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
@ -456,25 +459,6 @@ public abstract class AbstractGenomeAnalysisEngine {
}
/**
* Bundles all the source information about the reads into a unified data structure.
*
* @return The reads object providing reads source info.
*/
private ReadProperties extractSourceInfo() {
DownsamplingMethod method = getDownsamplingMethod();
return new ReadProperties(unpackBAMFileList(argCollection.samFiles),
argCollection.strictnessLevel,
argCollection.readBufferSize,
method,
new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
filters,
includeReadsWithDeletionAtLoci(),
generateExtendedEvents());
}
protected DownsamplingMethod getDownsamplingMethod() {
DownsamplingMethod method;
if(argCollection.getDownsamplingMethod() != null)
@ -522,7 +506,7 @@ public abstract class AbstractGenomeAnalysisEngine {
}
// compare the reads to the reference
SequenceDictionaryUtils.validateDictionaries(logger, "reads", readsDictionary, "reference", referenceDictionary);
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, "reference", referenceDictionary);
}
// compare the tracks to the reference, if they have a sequence dictionary
@ -538,7 +522,7 @@ public abstract class AbstractGenomeAnalysisEngine {
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, track.getName(), trackDict, "reference", referenceDictionary);
SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, track.getName(), trackDict, "reference", referenceDictionary);
}
}
@ -559,11 +543,21 @@ public abstract class AbstractGenomeAnalysisEngine {
/**
* Gets a data source for the given set of reads.
*
* @param reads the read source information
* @return A data source for the given set of reads.
*/
private SAMDataSource createReadsDataSource(ReadProperties reads) {
return new SAMDataSource(reads);
private SAMDataSource createReadsDataSource() {
DownsamplingMethod method = getDownsamplingMethod();
return new SAMDataSource(
unpackBAMFileList(argCollection.samFiles),
argCollection.useOriginalBaseQualities,
argCollection.strictnessLevel,
argCollection.readBufferSize,
method,
new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
filters,
includeReadsWithDeletionAtLoci(),
generateExtendedEvents());
}
/**

View File

@ -76,6 +76,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
try {
engine.setArguments(getArgumentCollection());
engine.setWalker(walker);
walker.setToolkit(engine);
Collection<SamRecordFilter> filters = engine.createFilters();
engine.setFilters(filters);
@ -165,6 +166,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
Walker walker = engine.getWalkerByName(getAnalysisName());
engine.setArguments(getArgumentCollection());
engine.setWalker(walker);
walker.setToolkit(engine);
argumentSources.add(walker.getClass());
Collection<SamRecordFilter> filters = engine.createFilters();

View File

@ -56,30 +56,13 @@ import java.util.*;
* A GenomeAnalysisEngine that runs a specified walker.
*/
public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
// our instance of this genome analysis toolkit; it's used by other classes to extract the traversal engine
// TODO: public static without final tends to indicate we're thinking about this the wrong way
public static GenomeAnalysisEngine instance;
/**
* our walker manager
*/
private final WalkerManager walkerManager = new WalkerManager();;
private final WalkerManager walkerManager = new WalkerManager();
private Walker<?, ?> walker;
/**
* our constructor, where all the work is done
* <p/>
* legacy traversal types are sent to legacyTraversal function; as we move more of the traversals to the
* new MicroScheduler class we'll be able to delete that function.
*/
public GenomeAnalysisEngine() {
// make sure our instance variable points to this analysis engine
instance = this;
}
public void setWalker(Walker<?, ?> walker) {
this.walker = walker;
}
@ -150,7 +133,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
* @return A collection of available filters.
*/
@Override
protected Collection<SamRecordFilter> createFilters() {
public Collection<SamRecordFilter> createFilters() {
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
filters.addAll(WalkerManager.getReadFilters(walker,this.getFilterManager()));
filters.addAll(super.createFilters());
@ -383,6 +366,6 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
for (Stub<?> stub : getOutputs())
outputTracker.addOutput(stub);
outputTracker.prepareWalker(walker);
outputTracker.prepareWalker(walker, getArguments().strictnessLevel);
}
}

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
/**
@ -27,12 +27,14 @@ import java.util.Collection;
*/
public class ReadProperties {
private List<SAMReaderID> readers = null;
private SAMFileHeader header = null;
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
private Integer readBufferSize = null;
private DownsamplingMethod downsamplingMethod = null;
private ValidationExclusion exclusionList = null;
private Collection<SamRecordFilter> supplementalFilters = null;
private boolean includeReadsWithDeletionAtLoci = false;
private boolean useOriginalBaseQualities = false;
private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels)
// immediately after the reference base such event is associated with?
@ -66,6 +68,14 @@ public class ReadProperties {
return readers;
}
/**
* Gets the sam file header
* @return the sam file header
*/
public SAMFileHeader getHeader() {
return header;
}
/**
* How strict should validation be?
* @return Stringency of validation.
@ -103,14 +113,11 @@ public class ReadProperties {
}
/**
* Simple constructor for unit testing.
* @param readsFiles List of reads files to open.
* Return whether to use original base qualities.
* @return Whether to use original base qualities.
*/
public ReadProperties( List<SAMReaderID> readsFiles ) {
this.readers = readsFiles;
this.downsamplingMethod = DownsamplingMethod.NONE;
this.supplementalFilters = new ArrayList<SamRecordFilter>();
this.exclusionList = new ValidationExclusion();
public boolean useOriginalBaseQualities() {
return useOriginalBaseQualities;
}
/**
@ -118,6 +125,8 @@ public class ReadProperties {
* files and store them in an easy-to-work-with package. Constructor
* is package protected.
* @param samFiles list of reads files.
* @param header sam file header.
* @param useOriginalBaseQualities True if original base qualities should be used.
* @param strictness Stringency of reads file parsing.
* @param readBufferSize Number of reads to hold in memory per BAM.
* @param downsamplingMethod Method for downsampling reads at a given locus.
@ -131,6 +140,8 @@ public class ReadProperties {
* bases will be seen in the pileups, and the deletions will be skipped silently.
*/
public ReadProperties( List<SAMReaderID> samFiles,
SAMFileHeader header,
boolean useOriginalBaseQualities,
SAMFileReader.ValidationStringency strictness,
Integer readBufferSize,
DownsamplingMethod downsamplingMethod,
@ -139,12 +150,14 @@ public class ReadProperties {
boolean includeReadsWithDeletionAtLoci,
boolean generateExtendedEvents) {
this.readers = samFiles;
this.header = header;
this.readBufferSize = readBufferSize;
this.validationStringency = strictness;
this.downsamplingMethod = downsamplingMethod;
this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod;
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;
this.supplementalFilters = supplementalFilters;
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
this.generateExtendedEvents = generateExtendedEvents;
this.useOriginalBaseQualities = useOriginalBaseQualities;
}
}

View File

@ -37,6 +37,11 @@ import java.util.Collection;
*/
public class SampleDataSource {
/**
* SAMFileHeader that has been created for this analysis.
*/
private final SAMFileHeader header;
/**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap.
@ -63,7 +68,7 @@ public class SampleDataSource {
* @param sampleFiles Sample files that were included on the command line
*/
public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
this.header = header;
// create empty sample object for each sample referenced in the SAM header
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (!hasSample(sampleName)) {
@ -85,7 +90,7 @@ public class SampleDataSource {
* Hallucinates sample objects for all the samples in the SAM file and stores them
*/
private void getSamplesFromSAMFile() {
for (String sampleName : SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader())) {
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (!hasSample(sampleName)) {
Sample newSample = new Sample(sampleName);
newSample.setSAMFileEntry(true);

View File

@ -32,6 +32,7 @@ import net.sf.picard.sam.SamFileHeaderMerger;
import net.sf.picard.sam.MergingSamRecordIterator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard;
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
@ -68,6 +69,11 @@ public class SAMDataSource implements SimpleDataSource {
*/
protected final List<SAMReaderID> readerIDs;
/**
* How strict are the readers driving this data source.
*/
protected final SAMFileReader.ValidationStringency validationStringency;
/**
* How far along is each reader?
*/
@ -104,14 +110,54 @@ public class SAMDataSource implements SimpleDataSource {
/**
* Create a new SAM data source given the supplied read metadata.
* @param reads The read metadata.
* @param samFiles list of reads files.
*/
public SAMDataSource(ReadProperties reads) {
this.readProperties = reads;
public SAMDataSource(List<SAMReaderID> samFiles) {
this(
samFiles,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<SamRecordFilter>(),
false,
false
);
}
/**
* Create a new SAM data source given the supplied read metadata.
* @param samFiles list of reads files.
* @param useOriginalBaseQualities True if original base qualities should be used.
* @param strictness Stringency of reads file parsing.
* @param readBufferSize Number of reads to hold in memory per BAM.
* @param downsamplingMethod Method for downsampling reads at a given locus.
* @param exclusionList what safety checks we're willing to let slide
* @param supplementalFilters additional filters to dynamically apply.
* @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with
* a pile of indel/noevent extended events at every locus with at least one indel associated with it
* (in addition to a "regular" call to map() at this locus performed with base pileup)
* @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
* bases will be seen in the pileups, and the deletions will be skipped silently.
*/
public SAMDataSource(
List<SAMReaderID> samFiles,
boolean useOriginalBaseQualities,
SAMFileReader.ValidationStringency strictness,
Integer readBufferSize,
DownsamplingMethod downsamplingMethod,
ValidationExclusion exclusionList,
Collection<SamRecordFilter> supplementalFilters,
boolean includeReadsWithDeletionAtLoci,
boolean generateExtendedEvents
) {
this.readMetrics = new ReadMetrics();
readerIDs = reads.getSAMReaderIDs();
for (SAMReaderID readerID : reads.getSAMReaderIDs()) {
readerIDs = samFiles;
validationStringency = strictness;
for (SAMReaderID readerID : samFiles) {
if (!readerID.samFile.canRead())
throw new UserException.CouldNotReadInputFile(readerID.samFile,"file is not present or user does not have appropriate permissions. " +
"Please check that the file is present and readable and try again.");
@ -136,10 +182,23 @@ public class SAMDataSource implements SimpleDataSource {
initializeReaderPositions(readers);
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true);
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
mergedHeader = headerMerger.getMergedHeader();
hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();
readProperties = new ReadProperties(
samFiles,
mergedHeader,
useOriginalBaseQualities,
strictness,
readBufferSize,
downsamplingMethod,
exclusionList,
supplementalFilters,
includeReadsWithDeletionAtLoci,
generateExtendedEvents
);
// cache the read group id (original) -> read group id (merged) mapping.
for(SAMReaderID id: readerIDs) {
SAMFileReader reader = readers.getReader(id);
@ -371,10 +430,10 @@ public class SAMDataSource implements SimpleDataSource {
* @return An iterator over the selected data.
*/
private StingSAMIterator getIterator(SAMReaders readers, BAMFormatAwareShard shard, boolean enableVerification) {
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true);
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
// Set up merging to dynamically merge together multiple BAMs.
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,true);
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);
for(SAMReaderID id: getReaderIDs()) {
if(shard.getFileSpans().get(id) == null)
continue;
@ -388,6 +447,7 @@ public class SAMDataSource implements SimpleDataSource {
return applyDecoratingIterators(shard.getReadMetrics(),
enableVerification,
readProperties.useOriginalBaseQualities(),
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
@ -403,13 +463,14 @@ public class SAMDataSource implements SimpleDataSource {
SAMReaders readers = resourcePool.getAvailableReaders();
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true);
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,true);
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);
for(SAMReaderID id: getReaderIDs())
mergingIterator.addIterator(readers.getReader(id),readers.getReader(id).iterator());
return applyDecoratingIterators(shard.getReadMetrics(),
shard instanceof ReadShard,
readProperties.useOriginalBaseQualities(),
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
@ -433,6 +494,7 @@ public class SAMDataSource implements SimpleDataSource {
*
* @param readMetrics metrics to track when using this iterator.
* @param enableVerification Verify the order of reads.
* @param useOriginalBaseQualities True if original base qualities should be used.
* @param wrappedIterator the raw data source.
* @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus).
* @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this.
@ -441,11 +503,12 @@ public class SAMDataSource implements SimpleDataSource {
*/
protected StingSAMIterator applyDecoratingIterators(ReadMetrics readMetrics,
boolean enableVerification,
boolean useOriginalBaseQualities,
StingSAMIterator wrappedIterator,
Double downsamplingFraction,
Boolean noValidationOfReadOrder,
Collection<SamRecordFilter> supplementalFilters) {
wrappedIterator = new ReadFormattingIterator(wrappedIterator);
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities);
// NOTE: this (and other filtering) should be done before on-the-fly sorting
// as there is no reason to sort something that we will end of throwing away
@ -530,7 +593,7 @@ public class SAMDataSource implements SimpleDataSource {
private synchronized void createNewResource() {
if(allResources.size() > maxEntries)
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
SAMReaders readers = new SAMReaders(readProperties);
SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
allResources.add(readers);
availableResources.add(readers);
}
@ -548,14 +611,15 @@ public class SAMDataSource implements SimpleDataSource {
/**
* Derive a new set of readers from the Reads metadata.
* @param sourceInfo Metadata for the reads to load.
* @param readerIDs reads to load.
* @param validationStringency validation stringency.
*/
public SAMReaders(ReadProperties sourceInfo) {
for(SAMReaderID readerID: sourceInfo.getSAMReaderIDs()) {
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
for(SAMReaderID readerID: readerIDs) {
SAMFileReader reader = new SAMFileReader(readerID.samFile);
reader.enableFileSource(true);
reader.enableIndexCaching(true);
reader.setValidationStringency(sourceInfo.getValidationStringency());
reader.setValidationStringency(validationStringency);
// If no read group is present, hallucinate one.
// TODO: Straw poll to see whether this is really required.
@ -614,6 +678,17 @@ public class SAMDataSource implements SimpleDataSource {
public Collection<SAMFileReader> values() {
return readers.values();
}
/**
* Gets all the actual readers out of this data structure.
* @return A collection of the readers.
*/
public Collection<SAMFileHeader> headers() {
ArrayList<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
for (SAMFileReader reader : values())
headers.add(reader.getFileHeader());
return headers;
}
}
private class ReleasingIterator implements StingSAMIterator {

View File

@ -122,7 +122,7 @@ public abstract class MicroScheduler {
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
}
traversalEngine.initialize();
traversalEngine.initialize(engine);
}
/**
@ -147,7 +147,7 @@ public abstract class MicroScheduler {
* @return an iterator over the reads specified in the shard.
*/
protected StingSAMIterator getReadIterator(Shard shard) {
return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(new ReadProperties(Collections.<SAMReaderID>emptyList()));
return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator();
}
/**

View File

@ -36,11 +36,18 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
* @author mhanna
* @version 0.1
*/
public class MalformedReadFilter implements SamRecordFilter {
public class MalformedReadFilter implements SamRecordHeaderFilter {
private SAMFileHeader header;
@Override
public void setHeader(SAMFileHeader header) {
this.header = header;
}
public boolean filterOut(SAMRecord read) {
return !checkInvalidAlignmentStart(read) ||
!checkInvalidAlignmentEnd(read) ||
!checkAlignmentDisagreesWithHeader(GenomeAnalysisEngine.instance.getSAMFileHeader(),read) ||
!checkAlignmentDisagreesWithHeader(this.header,read) ||
!checkCigarDisagreesWithAlignment(read);
}

View File

@ -0,0 +1,15 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader;
/**
* A SamRecordFilter that also depends on the header.
*/
public interface SamRecordHeaderFilter extends SamRecordFilter {
/**
* Sets the header for use by this filter.
* @param header the header for use by this filter.
*/
void setHeader(SAMFileHeader header);
}

View File

@ -25,12 +25,12 @@
package org.broadinstitute.sting.gatk.io;
import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.commandline.ArgumentSource;
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub;
import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.io.storage.StorageFactory;
@ -74,7 +74,7 @@ public abstract class OutputTracker {
*/
public abstract <T> T getStorage( Stub<T> stub );
public void prepareWalker( Walker walker ) {
public void prepareWalker( Walker walker, SAMFileReader.ValidationStringency strictnessLevel ) {
for( Map.Entry<ArgumentSource,Object> io: inputs.entrySet() ) {
ArgumentSource targetField = io.getKey();
Object targetValue = io.getValue();
@ -83,7 +83,7 @@ public abstract class OutputTracker {
// TODO: Generalize this, and move it to its own initialization step.
if( targetValue instanceof SAMFileReaderBuilder) {
SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue;
builder.setValidationStringency(GenomeAnalysisEngine.instance.getArguments().strictnessLevel);
builder.setValidationStringency(strictnessLevel);
targetValue = builder.build();
}

View File

@ -273,16 +273,13 @@ public class LocusIteratorByState extends LocusIterator {
}
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, List<LocusIteratorFilter> filters ) {
this.readInfo = readInformation;
this.filters = filters;
// Aggregate all sample names.
// TODO: Push in header via constructor
if(GenomeAnalysisEngine.instance != null && GenomeAnalysisEngine.instance.getDataSource() != null) {
sampleNames.addAll(SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader()));
}
sampleNames.addAll(SampleUtils.getSAMFileSamples(readInfo.getHeader()));
// Add a null sample name as a catch-all for reads without samples
if(!sampleNames.contains(null)) sampleNames.add(null);
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),sampleNames);
this.readInfo = readInformation;
this.filters = filters;
}
public Iterator<AlignmentContext> iterator() {

View File

@ -22,9 +22,7 @@ import java.util.NoSuchElementException;
* A placeholder for an iterator with no data.
*/
public class NullSAMIterator implements StingSAMIterator {
private ReadProperties reads = null;
public NullSAMIterator( ReadProperties reads ) { this.reads = reads; }
public NullSAMIterator() {}
public Iterator<SAMRecord> iterator() { return this; }
public void close() { /* NO-OP */ }

View File

@ -27,12 +27,19 @@ public class ReadFormattingIterator implements StingSAMIterator {
*/
private StingSAMIterator wrappedIterator;
/**
* True if original base qualities should be used.
*/
private final boolean useOriginalBaseQualities;
/**
* Decorate the given iterator inside a ReadWrappingIterator.
* @param wrappedIterator iterator
* @param useOriginalBaseQualities true if original base qualities should be used
*/
public ReadFormattingIterator(StingSAMIterator wrappedIterator) {
public ReadFormattingIterator(StingSAMIterator wrappedIterator, boolean useOriginalBaseQualities) {
this.wrappedIterator = wrappedIterator;
this.useOriginalBaseQualities = useOriginalBaseQualities;
}
/**
@ -67,7 +74,7 @@ public class ReadFormattingIterator implements StingSAMIterator {
* no next exists.
*/
public SAMRecord next() {
return new GATKSAMRecord(wrappedIterator.next());
return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities);
}
/**

View File

@ -24,6 +24,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
/** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
private GenomeAnalysisEngine engine;
/**
* Gets the named traversal type associated with the given traversal.
* @return A user-friendly name for the given traversal type.
@ -48,7 +50,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
public void printProgress(Shard shard,GenomeLoc loc) {
// A bypass is inserted here for unit testing.
// TODO: print metrics outside of the traversal engine to more easily handle cumulative stats.
ReadMetrics cumulativeMetrics = GenomeAnalysisEngine.instance != null ? GenomeAnalysisEngine.instance.getCumulativeMetrics().clone() : new ReadMetrics();
ReadMetrics cumulativeMetrics = engine != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics();
cumulativeMetrics.incrementMetrics(shard.getReadMetrics());
printProgress(loc, cumulativeMetrics, false);
}
@ -103,8 +105,12 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
}
}
/** Initialize the traversal engine. After this point traversals can be run over the data */
public void initialize() {
/**
* Initialize the traversal engine. After this point traversals can be run over the data
* @param engine GenomeAnalysisEngine for this traversal
*/
public void initialize(GenomeAnalysisEngine engine) {
this.engine = engine;
lastProgressPrintTime = startTime = System.currentTimeMillis();
}

View File

@ -43,10 +43,20 @@ import org.apache.log4j.Logger;
@ReadFilters(MalformedReadFilter.class)
public abstract class Walker<MapType, ReduceType> {
final protected static Logger logger = Logger.getLogger(Walker.class);
private GenomeAnalysisEngine toolkit;
protected Walker() {
}
/**
* Set the toolkit, for peering into internal structures that can't
* otherwise be read.
* @param toolkit The genome analysis toolkit.
*/
public void setToolkit(GenomeAnalysisEngine toolkit) {
this.toolkit = toolkit;
}
/**
* Retrieve the toolkit, for peering into internal structures that can't
* otherwise be read. Use sparingly, and discuss uses with software engineering
@ -54,7 +64,7 @@ public abstract class Walker<MapType, ReduceType> {
* @return The genome analysis toolkit.
*/
protected GenomeAnalysisEngine getToolkit() {
return GenomeAnalysisEngine.instance;
return toolkit;
}
/**
@ -126,7 +136,7 @@ public abstract class Walker<MapType, ReduceType> {
/**
* General interval reduce routine called after all of the traversals are done
* @param results
* @param results interval reduce results
*/
public void onTraversalDone(List<Pair<GenomeLoc, ReduceType>> results) {
for ( Pair<GenomeLoc, ReduceType> result : results ) {
@ -145,6 +155,8 @@ public abstract class Walker<MapType, ReduceType> {
* However, onTraversalDone(reduce) will be called after each interval is processed.
* The system will call onTraversalDone( GenomeLoc -> reduce ), after all reductions are done,
* which is overloaded here to call onTraversalDone(reduce) for each location
*
* @return true if your walker wants to reduce each interval separately.
*/
public boolean isReduceByInterval() {
return false;

View File

@ -31,6 +31,7 @@ import net.sf.samtools.util.SequenceUtil;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
@ -179,7 +180,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
intervals = new IntervalFileMergingIterator( new java.io.File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
} else {
// read in the whole list of intervals for cleaning
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY);
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY);
intervals = locs.iterator();
}
currentInterval = intervals.hasNext() ? intervals.next() : null;

View File

@ -83,8 +83,6 @@ public class HLACallerWalker extends ReadWalker<Integer, Integer> {
@Argument(fullName = "turnOffVerboseOutput", shortName = "noVerbose", doc = "Do not output verbose probability descriptions (INFO lines) ", required = false)
protected boolean NO_VERBOSE = false;
GATKArgumentCollection args = this.getToolkit().getArguments();
// Initializing variables
HLAFileReader HLADictionaryReader = new HLAFileReader();

View File

@ -53,7 +53,7 @@ public class RodSystemValidationWalker extends RodWalker<Integer,Integer> {
}
out.println("Header:");
// enumerate the list of ROD's we've loaded
rodList = GenomeAnalysisEngine.instance.getRodDataSources();
rodList = this.getToolkit().getRodDataSources();
for (ReferenceOrderedDataSource rod : rodList) {
out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getType());
out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getFile());

View File

@ -323,16 +323,17 @@ public class GenomeLocParser {
* Where each locN can be:
* 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
*
* @param file_name
* @param file_name interval file
* @param allowEmptyIntervalList if false empty interval lists will return null
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file
*/
public static List<GenomeLoc> intervalFileToList(final String file_name) {
public static List<GenomeLoc> intervalFileToList(final String file_name, boolean allowEmptyIntervalList) {
// try to open file
File inputFile = new File(file_name);
// check if file is empty
if (inputFile.exists() && inputFile.length() < 1) {
if (GenomeAnalysisEngine.instance.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST)
if (allowEmptyIntervalList)
return new ArrayList<GenomeLoc>();
else {
Utils.warnUser("The interval file " + file_name + " is empty. The GATK will continue processing but you " +

View File

@ -76,12 +76,12 @@ public class SequenceDictionaryUtils {
}
/**
* @param validationExclusion exclusions to validation
* @return Returns true if the engine is in tolerant mode and we'll let through dangerous but not fatal dictionary inconsistency
*/
public static boolean allowNonFatalIncompabilities() {
return GenomeAnalysisEngine.instance != null &&
( GenomeAnalysisEngine.instance.getArguments().unsafe == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY ||
GenomeAnalysisEngine.instance.getArguments().unsafe == ValidationExclusion.TYPE.ALL );
public static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) {
return ( validationExclusion == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY ||
validationExclusion == ValidationExclusion.TYPE.ALL );
}
/**
@ -89,12 +89,13 @@ public class SequenceDictionaryUtils {
* thrown with detailed error messages. If the engine is in permissive mode, then logger.warnings of generated instead
*
* @param logger for warnings
* @param validationExclusion exclusions to validation
* @param name1 name associated with dict1
* @param dict1 the sequence dictionary dict1
* @param name2 name associated with dict2
* @param dict2 the sequence dictionary dict2
*/
public static void validateDictionaries(Logger logger, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
public static void validateDictionaries(Logger logger, ValidationExclusion.TYPE validationExclusion, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
SequenceDictionaryCompatability type = compareDictionaries(dict1, dict2);
switch ( type ) {
case IDENTICAL:
@ -115,7 +116,7 @@ public class SequenceDictionaryUtils {
name2, elt2.getSequenceName(), elt2.getSequenceLength()),
name1, dict1, name2, dict2);
if ( allowNonFatalIncompabilities() )
if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage());
else
throw ex;
@ -129,7 +130,7 @@ public class SequenceDictionaryUtils {
else
ex = new UserException.LexicographicallySortedSequenceDictionary(name2, dict2);
if ( allowNonFatalIncompabilities() )
if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage());
else
throw ex;
@ -137,7 +138,7 @@ public class SequenceDictionaryUtils {
case OUT_OF_ORDER: {
UserException ex = new UserException.IncompatibleSequenceDictionaries("Order of contigs differences, which is unsafe", name1, dict1, name2, dict2);
if ( allowNonFatalIncompabilities() )
if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage());
else
throw ex;

View File

@ -26,9 +26,10 @@ public class IntervalUtils {
* specifications.
*
* @param argList A list of strings containing interval data.
* @param allowEmptyIntervalList If false instead of an empty interval list will return null.
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
*/
public static List<GenomeLoc> parseIntervalArguments(List<String> argList) {
public static List<GenomeLoc> parseIntervalArguments(List<String> argList, boolean allowEmptyIntervalList) {
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to
@ -50,7 +51,7 @@ public class IntervalUtils {
// if it's a file, add items to raw interval list
if (isIntervalFile(fileOrInterval)) {
try {
rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval));
rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval, allowEmptyIntervalList));
}
catch (Exception e) {
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in either format.", e);

View File

@ -39,7 +39,7 @@ public class GATKSAMRecord extends SAMRecord {
// These attributes exist in memory only, and are never written to disk.
private Map<Object, Object> temporaryAttributes;
public GATKSAMRecord(SAMRecord record) {
public GATKSAMRecord(SAMRecord record, boolean useOriginalBaseQualities) {
super(null); // it doesn't matter - this isn't used
if ( record == null )
throw new IllegalArgumentException("The SAMRecord argument cannot be null");
@ -55,8 +55,7 @@ public class GATKSAMRecord extends SAMRecord {
setAttribute(attribute.tag, attribute.value);
// if we are using original quals, set them now if t hey are present in the record
if ( GenomeAnalysisEngine.instance != null &&
GenomeAnalysisEngine.instance.getArguments().useOriginalBaseQualities ) {
if ( useOriginalBaseQualities ) {
byte[] originalQuals = mRecord.getOriginalBaseQualities();
if ( originalQuals != null )
mRecord.setBaseQualities(originalQuals);

View File

@ -50,7 +50,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator();
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(new SAMDataSource(new ReadProperties(Collections.<SAMReaderID>emptyList())),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
Shard shard = new LocusShard(new SAMDataSource(Collections.<SAMReaderID>emptyList()),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null);

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
@ -40,6 +39,6 @@ import java.util.Collections;
*/
public class MockLocusShard extends LocusShard {
public MockLocusShard(final List<GenomeLoc> intervals) {
super(new SAMDataSource(new ReadProperties(Collections.<SAMReaderID>emptyList())),intervals,null);
super(new SAMDataSource(Collections.<SAMReaderID>emptyList()),intervals,null);
}
}

View File

@ -86,10 +86,9 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
// setup the data
readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
ReadProperties reads = new ReadProperties(readers);
// the sharding strat.
SAMDataSource data = new SAMDataSource(reads);
SAMDataSource data = new SAMDataSource(readers);
ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000);
int count = 0;
@ -131,10 +130,9 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
// setup the test files
readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
ReadProperties reads = new ReadProperties(readers);
// the sharding strat.
SAMDataSource data = new SAMDataSource(reads);
SAMDataSource data = new SAMDataSource(readers);
ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000);
ArrayList<Integer> readcountPerShard = new ArrayList<Integer>();
@ -173,11 +171,10 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
readers.clear();
readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
reads = new ReadProperties(readers);
count = 0;
// the sharding strat.
data = new SAMDataSource(reads);
data = new SAMDataSource(readers);
strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000);
logger.debug("Pile two:");

View File

@ -1,11 +1,14 @@
package org.broadinstitute.sting.gatk.iterators;
import junit.framework.Assert;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -15,11 +18,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Arrays;
import java.util.*;
/**
* testing of the LocusIteratorByState
@ -41,7 +40,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
// create a test version of the Reads object
ReadProperties readAttributes = new ReadProperties(new ArrayList<SAMReaderID>());
ReadProperties readAttributes = createTestReadProperties();
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
@ -93,7 +92,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20};
// create a test version of the Reads object
ReadProperties readAttributes = new ReadProperties(new ArrayList<SAMReaderID>());
ReadProperties readAttributes = createTestReadProperties();
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
@ -132,6 +131,21 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup);
}
private static ReadProperties createTestReadProperties() {
return new ReadProperties(
Collections.<SAMReaderID>emptyList(),
new SAMFileHeader(),
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<SamRecordFilter>(),
false,
false
);
}
}
class FakeCloseableIterator<T> implements CloseableIterator<T> {

View File

@ -99,7 +99,7 @@ public class TraverseReadsUnitTest extends BaseTest {
ref = new IndexedFastaSequenceFile(refFile);
GenomeLocParser.setupRefContigOrdering(ref);
SAMDataSource dataSource = new SAMDataSource(new ReadProperties(bamList));
SAMDataSource dataSource = new SAMDataSource(bamList);
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
ref.getSequenceDictionary(),
readSize);