Removed cases where various toolkit functions were accessing GenomeAnalysisEngine.instance.

This will allow other programs like Queue to reuse the functionality.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4351 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2010-09-25 02:49:30 +00:00
parent 497bcbcbb7
commit edaa278edd
27 changed files with 262 additions and 137 deletions

View File

@ -30,9 +30,6 @@ import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.*; import net.sf.samtools.*;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.ArgumentSource; import org.broadinstitute.sting.commandline.ArgumentSource;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadMetrics;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.datasources.sample.Sample;
@ -43,6 +40,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter; import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.filters.SamRecordHeaderFilter;
import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
@ -203,7 +201,8 @@ public abstract class AbstractGenomeAnalysisEngine {
List<GenomeLoc> additionalIntervals) { List<GenomeLoc> additionalIntervals) {
return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals, return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals,
IntervalUtils.parseIntervalArguments(argList), IntervalUtils.parseIntervalArguments(argList,
this.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST),
argCollection.BTIMergeRule), argCollection.BTIMergeRule),
mergingRule); mergingRule);
} }
@ -278,7 +277,7 @@ public abstract class AbstractGenomeAnalysisEngine {
* the caller must handle that directly. * the caller must handle that directly.
* @return A collection of available filters. * @return A collection of available filters.
*/ */
protected Collection<SamRecordFilter> createFilters() { public Collection<SamRecordFilter> createFilters() {
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>(); Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0) if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0)
filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList)); filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList));
@ -291,7 +290,11 @@ public abstract class AbstractGenomeAnalysisEngine {
logger.info("Strictness is " + argCollection.strictnessLevel); logger.info("Strictness is " + argCollection.strictnessLevel);
validateSuppliedReads(); validateSuppliedReads();
readsDataSource = createReadsDataSource(extractSourceInfo()); readsDataSource = createReadsDataSource();
for (SamRecordFilter filter : filters)
if (filter instanceof SamRecordHeaderFilter)
((SamRecordHeaderFilter)filter).setHeader(this.getSAMFileHeader());
validateSuppliedReference(); validateSuppliedReference();
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
@ -456,25 +459,6 @@ public abstract class AbstractGenomeAnalysisEngine {
} }
/**
* Bundles all the source information about the reads into a unified data structure.
*
* @return The reads object providing reads source info.
*/
private ReadProperties extractSourceInfo() {
DownsamplingMethod method = getDownsamplingMethod();
return new ReadProperties(unpackBAMFileList(argCollection.samFiles),
argCollection.strictnessLevel,
argCollection.readBufferSize,
method,
new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
filters,
includeReadsWithDeletionAtLoci(),
generateExtendedEvents());
}
protected DownsamplingMethod getDownsamplingMethod() { protected DownsamplingMethod getDownsamplingMethod() {
DownsamplingMethod method; DownsamplingMethod method;
if(argCollection.getDownsamplingMethod() != null) if(argCollection.getDownsamplingMethod() != null)
@ -522,7 +506,7 @@ public abstract class AbstractGenomeAnalysisEngine {
} }
// compare the reads to the reference // compare the reads to the reference
SequenceDictionaryUtils.validateDictionaries(logger, "reads", readsDictionary, "reference", referenceDictionary); SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, "reads", readsDictionary, "reference", referenceDictionary);
} }
// compare the tracks to the reference, if they have a sequence dictionary // compare the tracks to the reference, if they have a sequence dictionary
@ -538,7 +522,7 @@ public abstract class AbstractGenomeAnalysisEngine {
Set<String> trackSequences = new TreeSet<String>(); Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences()) for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName()); trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, track.getName(), trackDict, "reference", referenceDictionary); SequenceDictionaryUtils.validateDictionaries(logger, getArguments().unsafe, track.getName(), trackDict, "reference", referenceDictionary);
} }
} }
@ -559,11 +543,21 @@ public abstract class AbstractGenomeAnalysisEngine {
/** /**
* Gets a data source for the given set of reads. * Gets a data source for the given set of reads.
* *
* @param reads the read source information
* @return A data source for the given set of reads. * @return A data source for the given set of reads.
*/ */
private SAMDataSource createReadsDataSource(ReadProperties reads) { private SAMDataSource createReadsDataSource() {
return new SAMDataSource(reads); DownsamplingMethod method = getDownsamplingMethod();
return new SAMDataSource(
unpackBAMFileList(argCollection.samFiles),
argCollection.useOriginalBaseQualities,
argCollection.strictnessLevel,
argCollection.readBufferSize,
method,
new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
filters,
includeReadsWithDeletionAtLoci(),
generateExtendedEvents());
} }
/** /**

View File

@ -76,6 +76,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
try { try {
engine.setArguments(getArgumentCollection()); engine.setArguments(getArgumentCollection());
engine.setWalker(walker); engine.setWalker(walker);
walker.setToolkit(engine);
Collection<SamRecordFilter> filters = engine.createFilters(); Collection<SamRecordFilter> filters = engine.createFilters();
engine.setFilters(filters); engine.setFilters(filters);
@ -165,6 +166,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
Walker walker = engine.getWalkerByName(getAnalysisName()); Walker walker = engine.getWalkerByName(getAnalysisName());
engine.setArguments(getArgumentCollection()); engine.setArguments(getArgumentCollection());
engine.setWalker(walker); engine.setWalker(walker);
walker.setToolkit(engine);
argumentSources.add(walker.getClass()); argumentSources.add(walker.getClass());
Collection<SamRecordFilter> filters = engine.createFilters(); Collection<SamRecordFilter> filters = engine.createFilters();

View File

@ -56,30 +56,13 @@ import java.util.*;
* A GenomeAnalysisEngine that runs a specified walker. * A GenomeAnalysisEngine that runs a specified walker.
*/ */
public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
// our instance of this genome analysis toolkit; it's used by other classes to extract the traversal engine
// TODO: public static without final tends to indicate we're thinking about this the wrong way
public static GenomeAnalysisEngine instance;
/** /**
* our walker manager * our walker manager
*/ */
private final WalkerManager walkerManager = new WalkerManager();; private final WalkerManager walkerManager = new WalkerManager();
private Walker<?, ?> walker; private Walker<?, ?> walker;
/**
* our constructor, where all the work is done
* <p/>
* legacy traversal types are sent to legacyTraversal function; as we move more of the traversals to the
* new MicroScheduler class we'll be able to delete that function.
*/
public GenomeAnalysisEngine() {
// make sure our instance variable points to this analysis engine
instance = this;
}
public void setWalker(Walker<?, ?> walker) { public void setWalker(Walker<?, ?> walker) {
this.walker = walker; this.walker = walker;
} }
@ -150,7 +133,7 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
* @return A collection of available filters. * @return A collection of available filters.
*/ */
@Override @Override
protected Collection<SamRecordFilter> createFilters() { public Collection<SamRecordFilter> createFilters() {
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>(); Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
filters.addAll(WalkerManager.getReadFilters(walker,this.getFilterManager())); filters.addAll(WalkerManager.getReadFilters(walker,this.getFilterManager()));
filters.addAll(super.createFilters()); filters.addAll(super.createFilters());
@ -383,6 +366,6 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
for (Stub<?> stub : getOutputs()) for (Stub<?> stub : getOutputs())
outputTracker.addOutput(stub); outputTracker.addOutput(stub);
outputTracker.prepareWalker(walker); outputTracker.prepareWalker(walker, getArguments().strictnessLevel);
} }
} }

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk; package org.broadinstitute.sting.gatk;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Collection; import java.util.Collection;
/** /**
@ -27,12 +27,14 @@ import java.util.Collection;
*/ */
public class ReadProperties { public class ReadProperties {
private List<SAMReaderID> readers = null; private List<SAMReaderID> readers = null;
private SAMFileHeader header = null;
private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT; private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.STRICT;
private Integer readBufferSize = null; private Integer readBufferSize = null;
private DownsamplingMethod downsamplingMethod = null; private DownsamplingMethod downsamplingMethod = null;
private ValidationExclusion exclusionList = null; private ValidationExclusion exclusionList = null;
private Collection<SamRecordFilter> supplementalFilters = null; private Collection<SamRecordFilter> supplementalFilters = null;
private boolean includeReadsWithDeletionAtLoci = false; private boolean includeReadsWithDeletionAtLoci = false;
private boolean useOriginalBaseQualities = false;
private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels) private boolean generateExtendedEvents = false; // do we want to generate additional piles of "extended" events (indels)
// immediately after the reference base such event is associated with? // immediately after the reference base such event is associated with?
@ -66,6 +68,14 @@ public class ReadProperties {
return readers; return readers;
} }
/**
* Gets the sam file header
* @return the sam file header
*/
public SAMFileHeader getHeader() {
return header;
}
/** /**
* How strict should validation be? * How strict should validation be?
* @return Stringency of validation. * @return Stringency of validation.
@ -103,14 +113,11 @@ public class ReadProperties {
} }
/** /**
* Simple constructor for unit testing. * Return whether to use original base qualities.
* @param readsFiles List of reads files to open. * @return Whether to use original base qualities.
*/ */
public ReadProperties( List<SAMReaderID> readsFiles ) { public boolean useOriginalBaseQualities() {
this.readers = readsFiles; return useOriginalBaseQualities;
this.downsamplingMethod = DownsamplingMethod.NONE;
this.supplementalFilters = new ArrayList<SamRecordFilter>();
this.exclusionList = new ValidationExclusion();
} }
/** /**
@ -118,6 +125,8 @@ public class ReadProperties {
* files and store them in an easy-to-work-with package. Constructor * files and store them in an easy-to-work-with package. Constructor
* is package protected. * is package protected.
* @param samFiles list of reads files. * @param samFiles list of reads files.
* @param header sam file header.
* @param useOriginalBaseQualities True if original base qualities should be used.
* @param strictness Stringency of reads file parsing. * @param strictness Stringency of reads file parsing.
* @param readBufferSize Number of reads to hold in memory per BAM. * @param readBufferSize Number of reads to hold in memory per BAM.
* @param downsamplingMethod Method for downsampling reads at a given locus. * @param downsamplingMethod Method for downsampling reads at a given locus.
@ -131,6 +140,8 @@ public class ReadProperties {
* bases will be seen in the pileups, and the deletions will be skipped silently. * bases will be seen in the pileups, and the deletions will be skipped silently.
*/ */
public ReadProperties( List<SAMReaderID> samFiles, public ReadProperties( List<SAMReaderID> samFiles,
SAMFileHeader header,
boolean useOriginalBaseQualities,
SAMFileReader.ValidationStringency strictness, SAMFileReader.ValidationStringency strictness,
Integer readBufferSize, Integer readBufferSize,
DownsamplingMethod downsamplingMethod, DownsamplingMethod downsamplingMethod,
@ -139,12 +150,14 @@ public class ReadProperties {
boolean includeReadsWithDeletionAtLoci, boolean includeReadsWithDeletionAtLoci,
boolean generateExtendedEvents) { boolean generateExtendedEvents) {
this.readers = samFiles; this.readers = samFiles;
this.header = header;
this.readBufferSize = readBufferSize; this.readBufferSize = readBufferSize;
this.validationStringency = strictness; this.validationStringency = strictness;
this.downsamplingMethod = downsamplingMethod; this.downsamplingMethod = downsamplingMethod == null ? DownsamplingMethod.NONE : downsamplingMethod;
this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList; this.exclusionList = exclusionList == null ? new ValidationExclusion() : exclusionList;
this.supplementalFilters = supplementalFilters; this.supplementalFilters = supplementalFilters;
this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci;
this.generateExtendedEvents = generateExtendedEvents; this.generateExtendedEvents = generateExtendedEvents;
this.useOriginalBaseQualities = useOriginalBaseQualities;
} }
} }

View File

@ -37,6 +37,11 @@ import java.util.Collection;
*/ */
public class SampleDataSource { public class SampleDataSource {
/**
* SAMFileHeader that has been created for this analysis.
*/
private final SAMFileHeader header;
/** /**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap. * this is stored as a HashMap.
@ -63,7 +68,7 @@ public class SampleDataSource {
* @param sampleFiles Sample files that were included on the command line * @param sampleFiles Sample files that were included on the command line
*/ */
public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) { public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
this.header = header;
// create empty sample object for each sample referenced in the SAM header // create empty sample object for each sample referenced in the SAM header
for (String sampleName : SampleUtils.getSAMFileSamples(header)) { for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (!hasSample(sampleName)) { if (!hasSample(sampleName)) {
@ -85,7 +90,7 @@ public class SampleDataSource {
* Hallucinates sample objects for all the samples in the SAM file and stores them * Hallucinates sample objects for all the samples in the SAM file and stores them
*/ */
private void getSamplesFromSAMFile() { private void getSamplesFromSAMFile() {
for (String sampleName : SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader())) { for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (!hasSample(sampleName)) { if (!hasSample(sampleName)) {
Sample newSample = new Sample(sampleName); Sample newSample = new Sample(sampleName);
newSample.setSAMFileEntry(true); newSample.setSAMFileEntry(true);

View File

@ -32,6 +32,7 @@ import net.sf.picard.sam.SamFileHeaderMerger;
import net.sf.picard.sam.MergingSamRecordIterator; import net.sf.picard.sam.MergingSamRecordIterator;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard; import org.broadinstitute.sting.gatk.datasources.shards.BAMFormatAwareShard;
import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard; import org.broadinstitute.sting.gatk.datasources.shards.MonolithicShard;
@ -68,6 +69,11 @@ public class SAMDataSource implements SimpleDataSource {
*/ */
protected final List<SAMReaderID> readerIDs; protected final List<SAMReaderID> readerIDs;
/**
* How strict are the readers driving this data source.
*/
protected final SAMFileReader.ValidationStringency validationStringency;
/** /**
* How far along is each reader? * How far along is each reader?
*/ */
@ -104,14 +110,54 @@ public class SAMDataSource implements SimpleDataSource {
/** /**
* Create a new SAM data source given the supplied read metadata. * Create a new SAM data source given the supplied read metadata.
* @param reads The read metadata. * @param samFiles list of reads files.
*/ */
public SAMDataSource(ReadProperties reads) { public SAMDataSource(List<SAMReaderID> samFiles) {
this.readProperties = reads; this(
samFiles,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<SamRecordFilter>(),
false,
false
);
}
/**
* Create a new SAM data source given the supplied read metadata.
* @param samFiles list of reads files.
* @param useOriginalBaseQualities True if original base qualities should be used.
* @param strictness Stringency of reads file parsing.
* @param readBufferSize Number of reads to hold in memory per BAM.
* @param downsamplingMethod Method for downsampling reads at a given locus.
* @param exclusionList what safety checks we're willing to let slide
* @param supplementalFilters additional filters to dynamically apply.
* @param generateExtendedEvents if true, the engine will issue an extra call to walker's map() with
* a pile of indel/noevent extended events at every locus with at least one indel associated with it
* (in addition to a "regular" call to map() at this locus performed with base pileup)
* @param includeReadsWithDeletionAtLoci if 'true', the base pileups sent to the walker's map() method
* will explicitly list reads with deletion over the current reference base; otherwise, only observed
* bases will be seen in the pileups, and the deletions will be skipped silently.
*/
public SAMDataSource(
List<SAMReaderID> samFiles,
boolean useOriginalBaseQualities,
SAMFileReader.ValidationStringency strictness,
Integer readBufferSize,
DownsamplingMethod downsamplingMethod,
ValidationExclusion exclusionList,
Collection<SamRecordFilter> supplementalFilters,
boolean includeReadsWithDeletionAtLoci,
boolean generateExtendedEvents
) {
this.readMetrics = new ReadMetrics(); this.readMetrics = new ReadMetrics();
readerIDs = reads.getSAMReaderIDs(); readerIDs = samFiles;
for (SAMReaderID readerID : reads.getSAMReaderIDs()) { validationStringency = strictness;
for (SAMReaderID readerID : samFiles) {
if (!readerID.samFile.canRead()) if (!readerID.samFile.canRead())
throw new UserException.CouldNotReadInputFile(readerID.samFile,"file is not present or user does not have appropriate permissions. " + throw new UserException.CouldNotReadInputFile(readerID.samFile,"file is not present or user does not have appropriate permissions. " +
"Please check that the file is present and readable and try again."); "Please check that the file is present and readable and try again.");
@ -136,10 +182,23 @@ public class SAMDataSource implements SimpleDataSource {
initializeReaderPositions(readers); initializeReaderPositions(readers);
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true); SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
mergedHeader = headerMerger.getMergedHeader(); mergedHeader = headerMerger.getMergedHeader();
hasReadGroupCollisions = headerMerger.hasReadGroupCollisions(); hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();
readProperties = new ReadProperties(
samFiles,
mergedHeader,
useOriginalBaseQualities,
strictness,
readBufferSize,
downsamplingMethod,
exclusionList,
supplementalFilters,
includeReadsWithDeletionAtLoci,
generateExtendedEvents
);
// cache the read group id (original) -> read group id (merged) mapping. // cache the read group id (original) -> read group id (merged) mapping.
for(SAMReaderID id: readerIDs) { for(SAMReaderID id: readerIDs) {
SAMFileReader reader = readers.getReader(id); SAMFileReader reader = readers.getReader(id);
@ -371,10 +430,10 @@ public class SAMDataSource implements SimpleDataSource {
* @return An iterator over the selected data. * @return An iterator over the selected data.
*/ */
private StingSAMIterator getIterator(SAMReaders readers, BAMFormatAwareShard shard, boolean enableVerification) { private StingSAMIterator getIterator(SAMReaders readers, BAMFormatAwareShard shard, boolean enableVerification) {
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true); SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
// Set up merging to dynamically merge together multiple BAMs. // Set up merging to dynamically merge together multiple BAMs.
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,true); MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);
for(SAMReaderID id: getReaderIDs()) { for(SAMReaderID id: getReaderIDs()) {
if(shard.getFileSpans().get(id) == null) if(shard.getFileSpans().get(id) == null)
continue; continue;
@ -388,6 +447,7 @@ public class SAMDataSource implements SimpleDataSource {
return applyDecoratingIterators(shard.getReadMetrics(), return applyDecoratingIterators(shard.getReadMetrics(),
enableVerification, enableVerification,
readProperties.useOriginalBaseQualities(),
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
readProperties.getDownsamplingMethod().toFraction, readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
@ -403,13 +463,14 @@ public class SAMDataSource implements SimpleDataSource {
SAMReaders readers = resourcePool.getAvailableReaders(); SAMReaders readers = resourcePool.getAvailableReaders();
// Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set. // Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(readers.values(),SAMFileHeader.SortOrder.coordinate,true); SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,true); MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);
for(SAMReaderID id: getReaderIDs()) for(SAMReaderID id: getReaderIDs())
mergingIterator.addIterator(readers.getReader(id),readers.getReader(id).iterator()); mergingIterator.addIterator(readers.getReader(id),readers.getReader(id).iterator());
return applyDecoratingIterators(shard.getReadMetrics(), return applyDecoratingIterators(shard.getReadMetrics(),
shard instanceof ReadShard, shard instanceof ReadShard,
readProperties.useOriginalBaseQualities(),
new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)), new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
readProperties.getDownsamplingMethod().toFraction, readProperties.getDownsamplingMethod().toFraction,
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION), readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
@ -433,6 +494,7 @@ public class SAMDataSource implements SimpleDataSource {
* *
* @param readMetrics metrics to track when using this iterator. * @param readMetrics metrics to track when using this iterator.
* @param enableVerification Verify the order of reads. * @param enableVerification Verify the order of reads.
* @param useOriginalBaseQualities True if original base qualities should be used.
* @param wrappedIterator the raw data source. * @param wrappedIterator the raw data source.
* @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus). * @param downsamplingFraction whether and how much to downsample the reads themselves (not at a locus).
* @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this. * @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this.
@ -441,11 +503,12 @@ public class SAMDataSource implements SimpleDataSource {
*/ */
protected StingSAMIterator applyDecoratingIterators(ReadMetrics readMetrics, protected StingSAMIterator applyDecoratingIterators(ReadMetrics readMetrics,
boolean enableVerification, boolean enableVerification,
boolean useOriginalBaseQualities,
StingSAMIterator wrappedIterator, StingSAMIterator wrappedIterator,
Double downsamplingFraction, Double downsamplingFraction,
Boolean noValidationOfReadOrder, Boolean noValidationOfReadOrder,
Collection<SamRecordFilter> supplementalFilters) { Collection<SamRecordFilter> supplementalFilters) {
wrappedIterator = new ReadFormattingIterator(wrappedIterator); wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities);
// NOTE: this (and other filtering) should be done before on-the-fly sorting // NOTE: this (and other filtering) should be done before on-the-fly sorting
// as there is no reason to sort something that we will end of throwing away // as there is no reason to sort something that we will end of throwing away
@ -530,7 +593,7 @@ public class SAMDataSource implements SimpleDataSource {
private synchronized void createNewResource() { private synchronized void createNewResource() {
if(allResources.size() > maxEntries) if(allResources.size() > maxEntries)
throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use."); throw new ReviewedStingException("Cannot create a new resource pool. All resources are in use.");
SAMReaders readers = new SAMReaders(readProperties); SAMReaders readers = new SAMReaders(readerIDs, validationStringency);
allResources.add(readers); allResources.add(readers);
availableResources.add(readers); availableResources.add(readers);
} }
@ -548,14 +611,15 @@ public class SAMDataSource implements SimpleDataSource {
/** /**
* Derive a new set of readers from the Reads metadata. * Derive a new set of readers from the Reads metadata.
* @param sourceInfo Metadata for the reads to load. * @param readerIDs reads to load.
* @param validationStringency validation stringency.
*/ */
public SAMReaders(ReadProperties sourceInfo) { public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
for(SAMReaderID readerID: sourceInfo.getSAMReaderIDs()) { for(SAMReaderID readerID: readerIDs) {
SAMFileReader reader = new SAMFileReader(readerID.samFile); SAMFileReader reader = new SAMFileReader(readerID.samFile);
reader.enableFileSource(true); reader.enableFileSource(true);
reader.enableIndexCaching(true); reader.enableIndexCaching(true);
reader.setValidationStringency(sourceInfo.getValidationStringency()); reader.setValidationStringency(validationStringency);
// If no read group is present, hallucinate one. // If no read group is present, hallucinate one.
// TODO: Straw poll to see whether this is really required. // TODO: Straw poll to see whether this is really required.
@ -614,6 +678,17 @@ public class SAMDataSource implements SimpleDataSource {
public Collection<SAMFileReader> values() { public Collection<SAMFileReader> values() {
return readers.values(); return readers.values();
} }
/**
* Gets all the actual readers out of this data structure.
* @return A collection of the readers.
*/
public Collection<SAMFileHeader> headers() {
ArrayList<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
for (SAMFileReader reader : values())
headers.add(reader.getFileHeader());
return headers;
}
} }
private class ReleasingIterator implements StingSAMIterator { private class ReleasingIterator implements StingSAMIterator {

View File

@ -122,7 +122,7 @@ public abstract class MicroScheduler {
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type."); throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
} }
traversalEngine.initialize(); traversalEngine.initialize(engine);
} }
/** /**
@ -147,7 +147,7 @@ public abstract class MicroScheduler {
* @return an iterator over the reads specified in the shard. * @return an iterator over the reads specified in the shard.
*/ */
protected StingSAMIterator getReadIterator(Shard shard) { protected StingSAMIterator getReadIterator(Shard shard) {
return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator(new ReadProperties(Collections.<SAMReaderID>emptyList())); return (!reads.isEmpty()) ? reads.seek(shard) : new NullSAMIterator();
} }
/** /**

View File

@ -36,11 +36,18 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
* @author mhanna * @author mhanna
* @version 0.1 * @version 0.1
*/ */
public class MalformedReadFilter implements SamRecordFilter { public class MalformedReadFilter implements SamRecordHeaderFilter {
private SAMFileHeader header;
@Override
public void setHeader(SAMFileHeader header) {
this.header = header;
}
public boolean filterOut(SAMRecord read) { public boolean filterOut(SAMRecord read) {
return !checkInvalidAlignmentStart(read) || return !checkInvalidAlignmentStart(read) ||
!checkInvalidAlignmentEnd(read) || !checkInvalidAlignmentEnd(read) ||
!checkAlignmentDisagreesWithHeader(GenomeAnalysisEngine.instance.getSAMFileHeader(),read) || !checkAlignmentDisagreesWithHeader(this.header,read) ||
!checkCigarDisagreesWithAlignment(read); !checkCigarDisagreesWithAlignment(read);
} }

View File

@ -0,0 +1,15 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader;
/**
* A SamRecordFilter that also depends on the header.
*/
public interface SamRecordHeaderFilter extends SamRecordFilter {
/**
* Sets the header for use by this filter.
* @param header the header for use by this filter.
*/
void setHeader(SAMFileHeader header);
}

View File

@ -25,12 +25,12 @@
package org.broadinstitute.sting.gatk.io; package org.broadinstitute.sting.gatk.io;
import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.commandline.ArgumentSource; import org.broadinstitute.sting.commandline.ArgumentSource;
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub;
import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.io.storage.StorageFactory; import org.broadinstitute.sting.gatk.io.storage.StorageFactory;
@ -74,7 +74,7 @@ public abstract class OutputTracker {
*/ */
public abstract <T> T getStorage( Stub<T> stub ); public abstract <T> T getStorage( Stub<T> stub );
public void prepareWalker( Walker walker ) { public void prepareWalker( Walker walker, SAMFileReader.ValidationStringency strictnessLevel ) {
for( Map.Entry<ArgumentSource,Object> io: inputs.entrySet() ) { for( Map.Entry<ArgumentSource,Object> io: inputs.entrySet() ) {
ArgumentSource targetField = io.getKey(); ArgumentSource targetField = io.getKey();
Object targetValue = io.getValue(); Object targetValue = io.getValue();
@ -83,7 +83,7 @@ public abstract class OutputTracker {
// TODO: Generalize this, and move it to its own initialization step. // TODO: Generalize this, and move it to its own initialization step.
if( targetValue instanceof SAMFileReaderBuilder) { if( targetValue instanceof SAMFileReaderBuilder) {
SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue; SAMFileReaderBuilder builder = (SAMFileReaderBuilder)targetValue;
builder.setValidationStringency(GenomeAnalysisEngine.instance.getArguments().strictnessLevel); builder.setValidationStringency(strictnessLevel);
targetValue = builder.build(); targetValue = builder.build();
} }

View File

@ -273,16 +273,13 @@ public class LocusIteratorByState extends LocusIterator {
} }
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, List<LocusIteratorFilter> filters ) { public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, List<LocusIteratorFilter> filters ) {
this.readInfo = readInformation;
this.filters = filters;
// Aggregate all sample names. // Aggregate all sample names.
// TODO: Push in header via constructor sampleNames.addAll(SampleUtils.getSAMFileSamples(readInfo.getHeader()));
if(GenomeAnalysisEngine.instance != null && GenomeAnalysisEngine.instance.getDataSource() != null) {
sampleNames.addAll(SampleUtils.getSAMFileSamples(GenomeAnalysisEngine.instance.getSAMFileHeader()));
}
// Add a null sample name as a catch-all for reads without samples // Add a null sample name as a catch-all for reads without samples
if(!sampleNames.contains(null)) sampleNames.add(null); if(!sampleNames.contains(null)) sampleNames.add(null);
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),sampleNames); readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),sampleNames);
this.readInfo = readInformation;
this.filters = filters;
} }
public Iterator<AlignmentContext> iterator() { public Iterator<AlignmentContext> iterator() {

View File

@ -22,9 +22,7 @@ import java.util.NoSuchElementException;
* A placeholder for an iterator with no data. * A placeholder for an iterator with no data.
*/ */
public class NullSAMIterator implements StingSAMIterator { public class NullSAMIterator implements StingSAMIterator {
private ReadProperties reads = null; public NullSAMIterator() {}
public NullSAMIterator( ReadProperties reads ) { this.reads = reads; }
public Iterator<SAMRecord> iterator() { return this; } public Iterator<SAMRecord> iterator() { return this; }
public void close() { /* NO-OP */ } public void close() { /* NO-OP */ }

View File

@ -27,12 +27,19 @@ public class ReadFormattingIterator implements StingSAMIterator {
*/ */
private StingSAMIterator wrappedIterator; private StingSAMIterator wrappedIterator;
/**
* True if original base qualities should be used.
*/
private final boolean useOriginalBaseQualities;
/** /**
* Decorate the given iterator inside a ReadWrappingIterator. * Decorate the given iterator inside a ReadWrappingIterator.
* @param wrappedIterator iterator * @param wrappedIterator iterator
* @param useOriginalBaseQualities true if original base qualities should be used
*/ */
public ReadFormattingIterator(StingSAMIterator wrappedIterator) { public ReadFormattingIterator(StingSAMIterator wrappedIterator, boolean useOriginalBaseQualities) {
this.wrappedIterator = wrappedIterator; this.wrappedIterator = wrappedIterator;
this.useOriginalBaseQualities = useOriginalBaseQualities;
} }
/** /**
@ -67,7 +74,7 @@ public class ReadFormattingIterator implements StingSAMIterator {
* no next exists. * no next exists.
*/ */
public SAMRecord next() { public SAMRecord next() {
return new GATKSAMRecord(wrappedIterator.next()); return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities);
} }
/** /**

View File

@ -24,6 +24,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
/** our log, which we want to capture anything from this class */ /** our log, which we want to capture anything from this class */
protected static Logger logger = Logger.getLogger(TraversalEngine.class); protected static Logger logger = Logger.getLogger(TraversalEngine.class);
private GenomeAnalysisEngine engine;
/** /**
* Gets the named traversal type associated with the given traversal. * Gets the named traversal type associated with the given traversal.
* @return A user-friendly name for the given traversal type. * @return A user-friendly name for the given traversal type.
@ -48,7 +50,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
public void printProgress(Shard shard,GenomeLoc loc) { public void printProgress(Shard shard,GenomeLoc loc) {
// A bypass is inserted here for unit testing. // A bypass is inserted here for unit testing.
// TODO: print metrics outside of the traversal engine to more easily handle cumulative stats. // TODO: print metrics outside of the traversal engine to more easily handle cumulative stats.
ReadMetrics cumulativeMetrics = GenomeAnalysisEngine.instance != null ? GenomeAnalysisEngine.instance.getCumulativeMetrics().clone() : new ReadMetrics(); ReadMetrics cumulativeMetrics = engine != null ? engine.getCumulativeMetrics().clone() : new ReadMetrics();
cumulativeMetrics.incrementMetrics(shard.getReadMetrics()); cumulativeMetrics.incrementMetrics(shard.getReadMetrics());
printProgress(loc, cumulativeMetrics, false); printProgress(loc, cumulativeMetrics, false);
} }
@ -103,8 +105,12 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
} }
} }
/** Initialize the traversal engine. After this point traversals can be run over the data */ /**
public void initialize() { * Initialize the traversal engine. After this point traversals can be run over the data
* @param engine GenomeAnalysisEngine for this traversal
*/
public void initialize(GenomeAnalysisEngine engine) {
this.engine = engine;
lastProgressPrintTime = startTime = System.currentTimeMillis(); lastProgressPrintTime = startTime = System.currentTimeMillis();
} }

View File

@ -43,10 +43,20 @@ import org.apache.log4j.Logger;
@ReadFilters(MalformedReadFilter.class) @ReadFilters(MalformedReadFilter.class)
public abstract class Walker<MapType, ReduceType> { public abstract class Walker<MapType, ReduceType> {
final protected static Logger logger = Logger.getLogger(Walker.class); final protected static Logger logger = Logger.getLogger(Walker.class);
private GenomeAnalysisEngine toolkit;
protected Walker() { protected Walker() {
} }
/**
* Set the toolkit, for peering into internal structures that can't
* otherwise be read.
* @param toolkit The genome analysis toolkit.
*/
public void setToolkit(GenomeAnalysisEngine toolkit) {
this.toolkit = toolkit;
}
/** /**
* Retrieve the toolkit, for peering into internal structures that can't * Retrieve the toolkit, for peering into internal structures that can't
* otherwise be read. Use sparingly, and discuss uses with software engineering * otherwise be read. Use sparingly, and discuss uses with software engineering
@ -54,7 +64,7 @@ public abstract class Walker<MapType, ReduceType> {
* @return The genome analysis toolkit. * @return The genome analysis toolkit.
*/ */
protected GenomeAnalysisEngine getToolkit() { protected GenomeAnalysisEngine getToolkit() {
return GenomeAnalysisEngine.instance; return toolkit;
} }
/** /**
@ -126,7 +136,7 @@ public abstract class Walker<MapType, ReduceType> {
/** /**
* General interval reduce routine called after all of the traversals are done * General interval reduce routine called after all of the traversals are done
* @param results * @param results interval reduce results
*/ */
public void onTraversalDone(List<Pair<GenomeLoc, ReduceType>> results) { public void onTraversalDone(List<Pair<GenomeLoc, ReduceType>> results) {
for ( Pair<GenomeLoc, ReduceType> result : results ) { for ( Pair<GenomeLoc, ReduceType> result : results ) {
@ -145,6 +155,8 @@ public abstract class Walker<MapType, ReduceType> {
* However, onTraversalDone(reduce) will be called after each interval is processed. * However, onTraversalDone(reduce) will be called after each interval is processed.
* The system will call onTraversalDone( GenomeLoc -> reduce ), after all reductions are done, * The system will call onTraversalDone( GenomeLoc -> reduce ), after all reductions are done,
* which is overloaded here to call onTraversalDone(reduce) for each location * which is overloaded here to call onTraversalDone(reduce) for each location
*
* @return true if your walker wants to reduce each interval separately.
*/ */
public boolean isReduceByInterval() { public boolean isReduceByInterval() {
return false; return false;

View File

@ -31,6 +31,7 @@ import net.sf.samtools.util.SequenceUtil;
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
@ -179,7 +180,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
intervals = new IntervalFileMergingIterator( new java.io.File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY ); intervals = new IntervalFileMergingIterator( new java.io.File(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY );
} else { } else {
// read in the whole list of intervals for cleaning // read in the whole list of intervals for cleaning
GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile)), IntervalMergingRule.OVERLAPPING_ONLY); GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(IntervalUtils.parseIntervalArguments(Arrays.asList(intervalsFile),this.getToolkit().getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST), IntervalMergingRule.OVERLAPPING_ONLY);
intervals = locs.iterator(); intervals = locs.iterator();
} }
currentInterval = intervals.hasNext() ? intervals.next() : null; currentInterval = intervals.hasNext() ? intervals.next() : null;

View File

@ -83,8 +83,6 @@ public class HLACallerWalker extends ReadWalker<Integer, Integer> {
@Argument(fullName = "turnOffVerboseOutput", shortName = "noVerbose", doc = "Do not output verbose probability descriptions (INFO lines) ", required = false) @Argument(fullName = "turnOffVerboseOutput", shortName = "noVerbose", doc = "Do not output verbose probability descriptions (INFO lines) ", required = false)
protected boolean NO_VERBOSE = false; protected boolean NO_VERBOSE = false;
GATKArgumentCollection args = this.getToolkit().getArguments();
// Initializing variables // Initializing variables
HLAFileReader HLADictionaryReader = new HLAFileReader(); HLAFileReader HLADictionaryReader = new HLAFileReader();

View File

@ -53,7 +53,7 @@ public class RodSystemValidationWalker extends RodWalker<Integer,Integer> {
} }
out.println("Header:"); out.println("Header:");
// enumerate the list of ROD's we've loaded // enumerate the list of ROD's we've loaded
rodList = GenomeAnalysisEngine.instance.getRodDataSources(); rodList = this.getToolkit().getRodDataSources();
for (ReferenceOrderedDataSource rod : rodList) { for (ReferenceOrderedDataSource rod : rodList) {
out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getType()); out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getType());
out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getFile()); out.println(rod.getName() + DIVIDER + rod.getReferenceOrderedData().getFile());

View File

@ -323,16 +323,17 @@ public class GenomeLocParser {
* Where each locN can be: * Where each locN can be:
* 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000' * 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
* *
* @param file_name * @param file_name interval file
* @param allowEmptyIntervalList if false empty interval lists will return null
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file * @return List<GenomeLoc> List of Genome Locs that have been parsed from file
*/ */
public static List<GenomeLoc> intervalFileToList(final String file_name) { public static List<GenomeLoc> intervalFileToList(final String file_name, boolean allowEmptyIntervalList) {
// try to open file // try to open file
File inputFile = new File(file_name); File inputFile = new File(file_name);
// check if file is empty // check if file is empty
if (inputFile.exists() && inputFile.length() < 1) { if (inputFile.exists() && inputFile.length() < 1) {
if (GenomeAnalysisEngine.instance.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST) if (allowEmptyIntervalList)
return new ArrayList<GenomeLoc>(); return new ArrayList<GenomeLoc>();
else { else {
Utils.warnUser("The interval file " + file_name + " is empty. The GATK will continue processing but you " + Utils.warnUser("The interval file " + file_name + " is empty. The GATK will continue processing but you " +

View File

@ -76,12 +76,12 @@ public class SequenceDictionaryUtils {
} }
/** /**
* @param validationExclusion exclusions to validation
* @return Returns true if the engine is in tolerant mode and we'll let through dangerous but not fatal dictionary inconsistency * @return Returns true if the engine is in tolerant mode and we'll let through dangerous but not fatal dictionary inconsistency
*/ */
public static boolean allowNonFatalIncompabilities() { public static boolean allowNonFatalIncompabilities(ValidationExclusion.TYPE validationExclusion) {
return GenomeAnalysisEngine.instance != null && return ( validationExclusion == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY ||
( GenomeAnalysisEngine.instance.getArguments().unsafe == ValidationExclusion.TYPE.ALLOW_SEQ_DICT_INCOMPATIBILITY || validationExclusion == ValidationExclusion.TYPE.ALL );
GenomeAnalysisEngine.instance.getArguments().unsafe == ValidationExclusion.TYPE.ALL );
} }
/** /**
@ -89,12 +89,13 @@ public class SequenceDictionaryUtils {
* thrown with detailed error messages. If the engine is in permissive mode, then logger.warnings of generated instead * thrown with detailed error messages. If the engine is in permissive mode, then logger.warnings of generated instead
* *
* @param logger for warnings * @param logger for warnings
* @param validationExclusion exclusions to validation
* @param name1 name associated with dict1 * @param name1 name associated with dict1
* @param dict1 the sequence dictionary dict1 * @param dict1 the sequence dictionary dict1
* @param name2 name associated with dict2 * @param name2 name associated with dict2
* @param dict2 the sequence dictionary dict2 * @param dict2 the sequence dictionary dict2
*/ */
public static void validateDictionaries(Logger logger, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) { public static void validateDictionaries(Logger logger, ValidationExclusion.TYPE validationExclusion, String name1, SAMSequenceDictionary dict1, String name2, SAMSequenceDictionary dict2) {
SequenceDictionaryCompatability type = compareDictionaries(dict1, dict2); SequenceDictionaryCompatability type = compareDictionaries(dict1, dict2);
switch ( type ) { switch ( type ) {
case IDENTICAL: case IDENTICAL:
@ -115,7 +116,7 @@ public class SequenceDictionaryUtils {
name2, elt2.getSequenceName(), elt2.getSequenceLength()), name2, elt2.getSequenceName(), elt2.getSequenceLength()),
name1, dict1, name2, dict2); name1, dict1, name2, dict2);
if ( allowNonFatalIncompabilities() ) if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage()); logger.warn(ex.getMessage());
else else
throw ex; throw ex;
@ -129,7 +130,7 @@ public class SequenceDictionaryUtils {
else else
ex = new UserException.LexicographicallySortedSequenceDictionary(name2, dict2); ex = new UserException.LexicographicallySortedSequenceDictionary(name2, dict2);
if ( allowNonFatalIncompabilities() ) if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage()); logger.warn(ex.getMessage());
else else
throw ex; throw ex;
@ -137,7 +138,7 @@ public class SequenceDictionaryUtils {
case OUT_OF_ORDER: { case OUT_OF_ORDER: {
UserException ex = new UserException.IncompatibleSequenceDictionaries("Order of contigs differences, which is unsafe", name1, dict1, name2, dict2); UserException ex = new UserException.IncompatibleSequenceDictionaries("Order of contigs differences, which is unsafe", name1, dict1, name2, dict2);
if ( allowNonFatalIncompabilities() ) if ( allowNonFatalIncompabilities(validationExclusion) )
logger.warn(ex.getMessage()); logger.warn(ex.getMessage());
else else
throw ex; throw ex;

View File

@ -26,9 +26,10 @@ public class IntervalUtils {
* specifications. * specifications.
* *
* @param argList A list of strings containing interval data. * @param argList A list of strings containing interval data.
* @param allowEmptyIntervalList If false instead of an empty interval list will return null.
* @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used. * @return an unsorted, unmerged representation of the given intervals. Null is used to indicate that all intervals should be used.
*/ */
public static List<GenomeLoc> parseIntervalArguments(List<String> argList) { public static List<GenomeLoc> parseIntervalArguments(List<String> argList, boolean allowEmptyIntervalList) {
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to if (argList != null) { // now that we can be in this function if only the ROD-to-Intervals was provided, we need to
@ -50,7 +51,7 @@ public class IntervalUtils {
// if it's a file, add items to raw interval list // if it's a file, add items to raw interval list
if (isIntervalFile(fileOrInterval)) { if (isIntervalFile(fileOrInterval)) {
try { try {
rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval)); rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval, allowEmptyIntervalList));
} }
catch (Exception e) { catch (Exception e) {
throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in either format.", e); throw new UserException.MalformedFile(fileOrInterval, "Interval file could not be parsed in either format.", e);

View File

@ -39,7 +39,7 @@ public class GATKSAMRecord extends SAMRecord {
// These attributes exist in memory only, and are never written to disk. // These attributes exist in memory only, and are never written to disk.
private Map<Object, Object> temporaryAttributes; private Map<Object, Object> temporaryAttributes;
public GATKSAMRecord(SAMRecord record) { public GATKSAMRecord(SAMRecord record, boolean useOriginalBaseQualities) {
super(null); // it doesn't matter - this isn't used super(null); // it doesn't matter - this isn't used
if ( record == null ) if ( record == null )
throw new IllegalArgumentException("The SAMRecord argument cannot be null"); throw new IllegalArgumentException("The SAMRecord argument cannot be null");
@ -55,8 +55,7 @@ public class GATKSAMRecord extends SAMRecord {
setAttribute(attribute.tag, attribute.value); setAttribute(attribute.tag, attribute.value);
// if we are using original quals, set them now if t hey are present in the record // if we are using original quals, set them now if t hey are present in the record
if ( GenomeAnalysisEngine.instance != null && if ( useOriginalBaseQualities ) {
GenomeAnalysisEngine.instance.getArguments().useOriginalBaseQualities ) {
byte[] originalQuals = mRecord.getOriginalBaseQualities(); byte[] originalQuals = mRecord.getOriginalBaseQualities();
if ( originalQuals != null ) if ( originalQuals != null )
mRecord.setBaseQualities(originalQuals); mRecord.setBaseQualities(originalQuals);

View File

@ -50,7 +50,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(); SAMRecordIterator iterator = new SAMRecordIterator();
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5); GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(new SAMDataSource(new ReadProperties(Collections.<SAMReaderID>emptyList())),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap()); Shard shard = new LocusShard(new SAMDataSource(Collections.<SAMReaderID>emptyList()),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS); WindowMaker windowMaker = new WindowMaker(shard,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker.WindowMakerIterator window = windowMaker.next(); WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, window.getLocus(), window, null, null);

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.gatk.datasources.shards; package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
@ -40,6 +39,6 @@ import java.util.Collections;
*/ */
public class MockLocusShard extends LocusShard { public class MockLocusShard extends LocusShard {
public MockLocusShard(final List<GenomeLoc> intervals) { public MockLocusShard(final List<GenomeLoc> intervals) {
super(new SAMDataSource(new ReadProperties(Collections.<SAMReaderID>emptyList())),intervals,null); super(new SAMDataSource(Collections.<SAMReaderID>emptyList()),intervals,null);
} }
} }

View File

@ -86,10 +86,9 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
// setup the data // setup the data
readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList())); readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
ReadProperties reads = new ReadProperties(readers);
// the sharding strat. // the sharding strat.
SAMDataSource data = new SAMDataSource(reads); SAMDataSource data = new SAMDataSource(readers);
ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000);
int count = 0; int count = 0;
@ -131,10 +130,9 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
// setup the test files // setup the test files
readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList())); readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
ReadProperties reads = new ReadProperties(readers);
// the sharding strat. // the sharding strat.
SAMDataSource data = new SAMDataSource(reads); SAMDataSource data = new SAMDataSource(readers);
ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000);
ArrayList<Integer> readcountPerShard = new ArrayList<Integer>(); ArrayList<Integer> readcountPerShard = new ArrayList<Integer>();
@ -173,11 +171,10 @@ public class SAMBAMDataSourceUnitTest extends BaseTest {
readers.clear(); readers.clear();
readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList())); readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList())); readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),Collections.<String>emptyList()));
reads = new ReadProperties(readers);
count = 0; count = 0;
// the sharding strat. // the sharding strat.
data = new SAMDataSource(reads); data = new SAMDataSource(readers);
strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000); strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000);
logger.debug("Pile two:"); logger.debug("Pile two:");

View File

@ -1,11 +1,14 @@
package org.broadinstitute.sting.gatk.iterators; package org.broadinstitute.sting.gatk.iterators;
import junit.framework.Assert; import junit.framework.Assert;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -15,11 +18,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import java.io.File; import java.util.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Arrays;
/** /**
* testing of the LocusIteratorByState * testing of the LocusIteratorByState
@ -41,7 +40,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'}; final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
// create a test version of the Reads object // create a test version of the Reads object
ReadProperties readAttributes = new ReadProperties(new ArrayList<SAMReaderID>()); ReadProperties readAttributes = createTestReadProperties();
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
@ -93,7 +92,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20}; final byte[] quals = new byte[] { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20};
// create a test version of the Reads object // create a test version of the Reads object
ReadProperties readAttributes = new ReadProperties(new ArrayList<SAMReaderID>()); ReadProperties readAttributes = createTestReadProperties();
JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true); JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10); SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
@ -132,6 +131,21 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup); Assert.assertTrue("Extended event pileup not found",foundExtendedEventPileup);
} }
private static ReadProperties createTestReadProperties() {
return new ReadProperties(
Collections.<SAMReaderID>emptyList(),
new SAMFileHeader(),
false,
SAMFileReader.ValidationStringency.STRICT,
null,
null,
new ValidationExclusion(),
new ArrayList<SamRecordFilter>(),
false,
false
);
}
} }
class FakeCloseableIterator<T> implements CloseableIterator<T> { class FakeCloseableIterator<T> implements CloseableIterator<T> {

View File

@ -99,7 +99,7 @@ public class TraverseReadsUnitTest extends BaseTest {
ref = new IndexedFastaSequenceFile(refFile); ref = new IndexedFastaSequenceFile(refFile);
GenomeLocParser.setupRefContigOrdering(ref); GenomeLocParser.setupRefContigOrdering(ref);
SAMDataSource dataSource = new SAMDataSource(new ReadProperties(bamList)); SAMDataSource dataSource = new SAMDataSource(bamList);
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL, ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref,ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
ref.getSequenceDictionary(), ref.getSequenceDictionary(),
readSize); readSize);