diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index c58e67f0d..0048bfac1 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -26,11 +26,9 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.reference.ReferenceSequenceFile; -import net.sf.picard.reference.ReferenceSequenceFileFactory; import net.sf.picard.sam.SamFileHeaderMerger; import net.sf.picard.filter.SamRecordFilter; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.*; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; @@ -40,7 +38,6 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.utils.*; @@ -58,9 +55,20 @@ public class GenomeAnalysisEngine { // TODO: public static without final tends to indicate we're thinking about this the wrong way public static GenomeAnalysisEngine instance; - // our traversal engine - private TraversalEngine engine = null; - private SAMDataSource dataSource = null; + /** + * Accessor for sharded read data. + */ + private SAMDataSource readsDataSource = null; + + /** + * Accessor for sharded reference data. + */ + private IndexedFastaSequenceFile referenceDataSource = null; + + /** + * Accessor for sharded reference-ordered data. + */ + private List rodDataSources; // our argument collection private GATKArgumentCollection argCollection; @@ -105,55 +113,24 @@ public class GenomeAnalysisEngine { // save our argument parameter this.argCollection = args; - // our reference ordered data collection - List> rods = new ArrayList>(); - - // - // please don't use these in the future, use the new syntax <- if we're not using these please remove them - // - if (argCollection.DBSNPFile != null) bindConvenienceRods("dbSNP", "dbsnp", argCollection.DBSNPFile); - if (argCollection.HAPMAPFile != null) - bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", argCollection.HAPMAPFile); - if (argCollection.HAPMAPChipFile != null) - bindConvenienceRods("hapmap-chip", "GFF", argCollection.HAPMAPChipFile); - // TODO: The ROD iterator currently does not understand multiple intervals file. Fix this by cleaning the ROD system. - if (argCollection.intervals != null && argCollection.intervals.size() == 1) { - bindConvenienceRods("interval", "Intervals", argCollection.intervals.get(0).replaceAll(",", "")); - } - - // parse out the rod bindings - ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods); - - // Validate the walker inputs against the walker. - validateInputsAgainstWalker(my_walker, argCollection, rods); + // Prepare the data for traversal. + initializeDataSources( my_walker, argCollection ); // our microscheduler, which is in charge of running everything - MicroScheduler microScheduler = createMicroscheduler(my_walker, rods); + MicroScheduler microScheduler = createMicroscheduler(my_walker); // create the output streams initializeOutputStreams(my_walker); - // Prepare the sort ordering w.r.t. the sequence dictionary - if (argCollection.referenceFile != null) { - final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile); - GenomeLocParser.setupRefContigOrdering(refFile); - } - - logger.info("Strictness is " + argCollection.strictnessLevel); - - // perform validation steps that are common to all the engines - engine.setMaximumIterations(argCollection.maximumEngineIterations); - engine.initialize(); - GenomeLocSortedSet locs = null; if (argCollection.intervals != null) { locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals)); } - ShardStrategy shardStrategy = this.getShardStrategy(my_walker, microScheduler.getReference(), locs, argCollection.maximumEngineIterations); + ShardStrategy shardStrategy = getShardStrategy(my_walker, microScheduler.getReference(), locs, argCollection.maximumEngineIterations); // execute the microscheduler, storing the results - return microScheduler.execute(my_walker, shardStrategy); + return microScheduler.execute(my_walker, shardStrategy, argCollection.maximumEngineIterations); } /** @@ -182,28 +159,49 @@ public class GenomeAnalysisEngine { return walkerManager.createWalkerByName(walkerName); } + private void initializeDataSources( Walker my_walker, GATKArgumentCollection argCollection ) { + validateSuppliedReadsAgainstWalker( my_walker, argCollection ); + logger.info("Strictness is " + argCollection.strictnessLevel); + readsDataSource = createReadsDataSource(extractSourceInfo(my_walker,argCollection)); + + validateSuppliedReferenceAgainstWalker( my_walker, argCollection ); + referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); + + validateReadsAndReferenceAreCompatible(readsDataSource, referenceDataSource); + + // our reference ordered data collection + List> rods = new ArrayList>(); + + // + // please don't use these in the future, use the new syntax <- if we're not using these please remove them + // + if (argCollection.DBSNPFile != null) bindConvenienceRods("dbSNP", "dbsnp", argCollection.DBSNPFile); + if (argCollection.HAPMAPFile != null) + bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", argCollection.HAPMAPFile); + if (argCollection.HAPMAPChipFile != null) + bindConvenienceRods("hapmap-chip", "GFF", argCollection.HAPMAPChipFile); + // TODO: The ROD iterator currently does not understand multiple intervals file. Fix this by cleaning the ROD system. + if (argCollection.intervals != null && argCollection.intervals.size() == 1) { + bindConvenienceRods("interval", "Intervals", argCollection.intervals.get(0).replaceAll(",", "")); + } + + // parse out the rod bindings + ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods); + + validateSuppliedReferenceOrderedDataAgainstWalker( my_walker, rods ); + + rodDataSources = getReferenceOrderedDataSources(rods); + } /** * setup a microscheduler - * * @param my_walker our walker of type LocusWalker - * @param rods the reference order data - * * @return a new microscheduler */ - private MicroScheduler createMicroscheduler(Walker my_walker, List> rods) { + private MicroScheduler createMicroscheduler(Walker my_walker) { // the mircoscheduler to return MicroScheduler microScheduler = null; - SAMDataSource readsDataSource = createReadsDataSource(extractSourceInfo(my_walker,argCollection)); - IndexedFastaSequenceFile referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); - List rodDataSources = getReferenceOrderedDataSources(rods); - - GenomeLocSortedSet locs = null; - if (argCollection.intervals != null) { - locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals)); - } - // we need to verify different parameter based on the walker type if (my_walker instanceof LocusWalker || my_walker instanceof LocusWindowWalker) { // create the MicroScheduler @@ -216,9 +214,6 @@ public class GenomeAnalysisEngine { Utils.scareUser(String.format("Unable to create the appropriate TraversalEngine for analysis type " + argCollection.analysisName)); } - dataSource = microScheduler.getSAMDataSource(); - engine = microScheduler.getTraversalEngine(); - return microScheduler; } @@ -351,23 +346,43 @@ public class GenomeAnalysisEngine { filters ); } - private void validateInputsAgainstWalker(Walker walker, - GATKArgumentCollection arguments, - List> rods) { - String walkerName = WalkerManager.getWalkerName(walker.getClass()); - + /** + * Verifies that the supplied set of reads files mesh with what the walker says it requires. + * @param walker Walker to test. + * @param arguments Supplied reads files. + */ + private void validateSuppliedReadsAgainstWalker( Walker walker, GATKArgumentCollection arguments ) { // Check what the walker says is required against what was provided on the command line. if (WalkerManager.isRequired(walker, DataSource.READS) && (arguments.samFiles == null || arguments.samFiles.size() == 0)) - throw new ArgumentException(String.format("Walker %s requires reads but none were provided. If this is incorrect, alter the walker's @Requires annotation.", walkerName)); - if (WalkerManager.isRequired(walker, DataSource.REFERENCE) && arguments.referenceFile == null) - throw new ArgumentException(String.format("Walker %s requires a reference but none was provided. If this is incorrect, alter the walker's @Requires annotation.", walkerName)); + throw new ArgumentException("Walker requires reads but none were provided. If this is incorrect, alter the walker's @Requires annotation."); // Check what the walker says is allowed against what was provided on the command line. if ((arguments.samFiles != null && arguments.samFiles.size() > 0) && !WalkerManager.isAllowed(walker, DataSource.READS)) - throw new ArgumentException(String.format("Walker %s does not allow reads but reads were provided. If this is incorrect, alter the walker's @Allows annotation", walkerName)); - if (arguments.referenceFile != null && !WalkerManager.isAllowed(walker, DataSource.REFERENCE)) - throw new ArgumentException(String.format("Walker %s does not allow a reference but one was provided. If this is incorrect, alter the walker's @Allows annotation", walkerName)); + throw new ArgumentException("Walker does not allow reads but reads were provided. If this is incorrect, alter the walker's @Allows annotation"); + } + /** + * Verifies that the supplied reference file mesh with what the walker says it requires. + * @param walker Walker to test. + * @param arguments Supplied reads files. + */ + private void validateSuppliedReferenceAgainstWalker( Walker walker, GATKArgumentCollection arguments ) { + // Check what the walker says is required against what was provided on the command line. + if (WalkerManager.isRequired(walker, DataSource.REFERENCE) && arguments.referenceFile == null) + throw new ArgumentException("Walker requires a reference but none was provided. If this is incorrect, alter the walker's @Requires annotation."); + + // Check what the walker says is allowed against what was provided on the command line. + if (arguments.referenceFile != null && !WalkerManager.isAllowed(walker, DataSource.REFERENCE)) + throw new ArgumentException("Walker does not allow a reference but one was provided. If this is incorrect, alter the walker's @Allows annotation"); + } + + /** + * Verifies that all required reference-ordered data has been supplied, and any reference-ordered data that was not + * 'allowed' is still present. + * @param walker Walker to test. + * @param rods Reference-ordered data to load. + */ + private void validateSuppliedReferenceOrderedDataAgainstWalker( Walker walker, List> rods ) { // Check to make sure that all required metadata is present. List allRequired = WalkerManager.getRequiredMetaData(walker); for (RMD required : allRequired) { @@ -383,10 +398,73 @@ public class GenomeAnalysisEngine { // Check to see that no forbidden rods are present. for (ReferenceOrderedData rod : rods) { if (!WalkerManager.isAllowed(walker, rod)) - throw new ArgumentException(String.format("Walker does not allow access to metadata: %s. If this is correct, change the @Allows metadata", rod.getName())); + throw new ArgumentException(String.format("Walker does not allow access to metadata: %s. If this is incorrect, change the @Allows metadata", rod.getName())); } } + /** + * Now that all files are open, validate the sequence dictionaries of the reads vs. the reference. + * @param reads Reads data source. + * @param reference Reference data source. + */ + private void validateReadsAndReferenceAreCompatible( SAMDataSource reads, ReferenceSequenceFile reference ) { + if( reads == null || reference == null ) + return; + + // Compile a set of sequence names that exist in the BAM files. + SAMSequenceDictionary readsDictionary = reads.getHeader().getSequenceDictionary(); + + Set readsSequenceNames = new TreeSet(); + for( SAMSequenceRecord dictionaryEntry: readsDictionary.getSequences() ) + readsSequenceNames.add(dictionaryEntry.getSequenceName()); + + // Compile a set of sequence names that exist in the reference file. + SAMSequenceDictionary referenceDictionary = reference.getSequenceDictionary(); + + Set referenceSequenceNames = new TreeSet(); + for( SAMSequenceRecord dictionaryEntry: referenceDictionary.getSequences() ) + referenceSequenceNames.add(dictionaryEntry.getSequenceName()); + + if( readsSequenceNames.size() == 0 ) { + logger.info("Reads file is unmapped. Skipping validation against reference."); + return; + } + + // If there's no overlap between reads and reference, data will be bogus. Throw an exception. + Set intersectingSequenceNames = new HashSet(readsSequenceNames); + intersectingSequenceNames.retainAll(referenceSequenceNames); + if( intersectingSequenceNames.size() == 0 ) { + StringBuilder error = new StringBuilder(); + error.append("No overlap exists between sequence dictionary of the reads and the sequence dictionary of the reference. Perhaps you're using the wrong reference?\n"); + error.append(System.getProperty("line.separator")); + error.append(String.format("Reads contigs: %s%n", prettyPrintSequenceRecords(readsDictionary))); + error.append(String.format("Reference contigs: %s%n", prettyPrintSequenceRecords(referenceDictionary))); + logger.error(error.toString()); + Utils.scareUser("No overlap exists between sequence dictionary of the reads and the sequence dictionary of the reference."); + } + + // If the two datasets are not equal and neither is a strict subset of the other, warn the user. + if( !readsSequenceNames.equals(referenceSequenceNames) && + !readsSequenceNames.containsAll(referenceSequenceNames) && + !referenceSequenceNames.containsAll(readsSequenceNames)) { + StringBuilder warning = new StringBuilder(); + warning.append("Limited overlap exists between sequence dictionary of the reads and the sequence dictionary of the reference. Perhaps you're using the wrong reference?\n"); + warning.append(System.getProperty("line.separator")); + warning.append(String.format("Reads contigs: %s%n", prettyPrintSequenceRecords(readsDictionary))); + warning.append(String.format("Reference contigs: %s%n", prettyPrintSequenceRecords(referenceDictionary))); + logger.warn(warning.toString()); + } + } + + private String prettyPrintSequenceRecords( SAMSequenceDictionary sequenceDictionary ) { + String[] sequenceRecordNames = new String[ sequenceDictionary.size() ]; + int sequenceRecordIndex = 0; + for( SAMSequenceRecord sequenceRecord: sequenceDictionary.getSequences() ) + sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName(); + return Arrays.deepToString(sequenceRecordNames); + } + + /** * Convenience function that binds RODs using the old-style command line parser to the new style list for * a uniform processing. @@ -532,8 +610,8 @@ public class GenomeAnalysisEngine { return outputTracker; } - public TraversalEngine getEngine() { - return this.engine; + public SAMFileHeader getSAMFileHeader() { + return readsDataSource.getHeader(); } /** @@ -542,7 +620,7 @@ public class GenomeAnalysisEngine { * @return */ public SAMDataSource getDataSource() { - return this.dataSource; + return this.readsDataSource; } /** diff --git a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index e78c7475c..547d8eb5a 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -95,11 +95,15 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar } } - public Object execute( Walker walker, ShardStrategy shardStrategy ) { + public Object execute( Walker walker, ShardStrategy shardStrategy, int maxIterations ) { // Fast fail for walkers not supporting TreeReducible interface. if (!( walker instanceof TreeReducible )) throw new IllegalArgumentException("Hierarchical microscheduler only works with TreeReducible walkers"); + // Having maxiterations in the execute method is a holdover from the old TraversalEngine days. + // Lets do something else with this. + traversalEngine.setMaximumIterations(maxIterations); + ReduceTree reduceTree = new ReduceTree(this); walker.initialize(); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index f2c2c8acc..d318ab861 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -6,10 +6,6 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.util.Collection; @@ -35,7 +31,11 @@ public class LinearMicroScheduler extends MicroScheduler { * @param walker Computation to perform over dataset. * @param shardStrategy A strategy for sharding the data. */ - public Object execute(Walker walker, ShardStrategy shardStrategy) { + public Object execute(Walker walker, ShardStrategy shardStrategy, int maxIterations) { + // Having maxiterations in the execute method is a holdover from the old TraversalEngine days. + // Lets do something else with this. + traversalEngine.setMaximumIterations(maxIterations); + walker.initialize(); Accumulator accumulator = Accumulator.create(walker); diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index 975daff81..32f3af31f 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -25,9 +25,6 @@ package org.broadinstitute.sting.gatk.executive; -import net.sf.picard.reference.ReferenceSequenceFile; -import net.sf.samtools.SAMSequenceDictionary; -import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; @@ -36,7 +33,6 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import java.util.*; @@ -91,6 +87,10 @@ public abstract class MicroScheduler { * @param rods the rods to include in the traversal */ protected MicroScheduler(Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection rods) { + this.reads = reads; + this.reference = reference; + this.rods = rods; + if (walker instanceof ReadWalker) { traversalEngine = new TraverseReads(); } else if (walker instanceof LocusWalker) { @@ -101,30 +101,11 @@ public abstract class MicroScheduler { traversalEngine = new TraverseDuplicates(); } else { throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type."); - } - this.reads = reads; - this.reference = reference; - this.rods = rods; + } - validate(this.reads,this.reference); - - // Side effect: initialize the traversal engine with reads data. - // TODO: Give users a dedicated way of getting the header so that the MicroScheduler - // doesn't have to bend over backward providing legacy getters and setters. - traversalEngine.setSAMHeader(reads.getHeader()); traversalEngine.initialize(); } - /** - * A temporary getter for the traversal engine. In the future, clients - * of the microscheduler shouldn't need to know anything about the traversal engine. - * - * @return The traversal engine. - */ - public TraversalEngine getTraversalEngine() { - return traversalEngine; - } - /** * Walks a walker over the given list of intervals. * @@ -133,7 +114,7 @@ public abstract class MicroScheduler { * * @return the return type of the walker */ - public abstract Object execute(Walker walker, ShardStrategy shardStrategy); + public abstract Object execute(Walker walker, ShardStrategy shardStrategy, int iterations ); /** @@ -174,68 +155,4 @@ public abstract class MicroScheduler { * @return The reference maintained by this scheduler. */ public IndexedFastaSequenceFile getReference() { return reference; } - - /** - * Now that all files are open, validate the sequence dictionaries of the reads vs. the reference. - * TODO: Doing this in the MicroScheduler is a bit late, but this is where data sources are initialized. - * TODO: Move the initialization of data sources back to the GenomeAnalysisEngine. - * @param reads Reads data source. - * @param reference Reference data source. - */ - private void validate( SAMDataSource reads, ReferenceSequenceFile reference ) { - if( reads == null || reference == null ) - return; - - // Compile a set of sequence names that exist in the BAM files. - SAMSequenceDictionary readsDictionary = reads.getHeader().getSequenceDictionary(); - - Set readsSequenceNames = new TreeSet(); - for( SAMSequenceRecord dictionaryEntry: readsDictionary.getSequences() ) - readsSequenceNames.add(dictionaryEntry.getSequenceName()); - - // Compile a set of sequence names that exist in the reference file. - SAMSequenceDictionary referenceDictionary = reference.getSequenceDictionary(); - - Set referenceSequenceNames = new TreeSet(); - for( SAMSequenceRecord dictionaryEntry: referenceDictionary.getSequences() ) - referenceSequenceNames.add(dictionaryEntry.getSequenceName()); - - if( readsSequenceNames.size() == 0 ) { - logger.info("Reads file is unmapped. Skipping validation against reference."); - return; - } - - // If there's no overlap between reads and reference, data will be bogus. Throw an exception. - Set intersectingSequenceNames = new HashSet(readsSequenceNames); - intersectingSequenceNames.retainAll(referenceSequenceNames); - if( intersectingSequenceNames.size() == 0 ) { - StringBuilder error = new StringBuilder(); - error.append("No overlap exists between sequence dictionary of the reads and the sequence dictionary of the reference. Perhaps you're using the wrong reference?\n"); - error.append(System.getProperty("line.separator")); - error.append(String.format("Reads contigs: %s%n", prettyPrintSequenceRecords(readsDictionary))); - error.append(String.format("Reference contigs: %s%n", prettyPrintSequenceRecords(referenceDictionary))); - logger.error(error.toString()); - Utils.scareUser("No overlap exists between sequence dictionary of the reads and the sequence dictionary of the reference."); - } - - // If the two datasets are not equal and neither is a strict subset of the other, warn the user. - if( !readsSequenceNames.equals(referenceSequenceNames) && - !readsSequenceNames.containsAll(referenceSequenceNames) && - !referenceSequenceNames.containsAll(readsSequenceNames)) { - StringBuilder warning = new StringBuilder(); - warning.append("Limited overlap exists between sequence dictionary of the reads and the sequence dictionary of the reference. Perhaps you're using the wrong reference?\n"); - warning.append(System.getProperty("line.separator")); - warning.append(String.format("Reads contigs: %s%n", prettyPrintSequenceRecords(readsDictionary))); - warning.append(String.format("Reference contigs: %s%n", prettyPrintSequenceRecords(referenceDictionary))); - logger.warn(warning.toString()); - } - } - - private String prettyPrintSequenceRecords( SAMSequenceDictionary sequenceDictionary ) { - String[] sequenceRecordNames = new String[ sequenceDictionary.size() ]; - int sequenceRecordIndex = 0; - for( SAMSequenceRecord sequenceRecord: sequenceDictionary.getSequences() ) - sequenceRecordNames[sequenceRecordIndex++] = sequenceRecord.getSequenceName(); - return Arrays.deepToString(sequenceRecordNames); - } } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index f9e18ee8b..97f6fb37a 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -1,10 +1,8 @@ package org.broadinstitute.sting.gatk.traversals; -import net.sf.samtools.SAMFileHeader; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; @@ -20,9 +18,6 @@ public abstract class TraversalEngine { // Maximum number of reads to process before finishing protected long maximumIterations = -1; - // the stored header - private SAMFileHeader myHeader = null; - /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); @@ -34,27 +29,6 @@ public abstract class TraversalEngine { this.maximumIterations = maximumIterations; } - /** - * get the associated SAM header for our run - * - * @return the header if it's stored, null if not - */ - public SAMFileHeader getSAMHeader() { - return myHeader; - } - - /** - * set's the SAM header for this traversal, which should - * be the merged header in the multiple BAM file case. - * - * @param myHeader the passed in header - */ - - public void setSAMHeader(SAMFileHeader myHeader) { - this.myHeader = myHeader; - } - - /** * @param curTime (current runtime, in millisecs) * diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 9de6de5ec..e27fe6f45 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -75,7 +75,7 @@ public class PrintReadsWalker extends ReadWalker { if ( platform != null ) { Object readGroupAttr = read.getAttribute("RG"); if ( readGroupAttr != null ) { - SAMReadGroupRecord readGroup = getToolkit().getEngine().getSAMHeader().getReadGroup(readGroupAttr.toString()); + SAMReadGroupRecord readGroup = getToolkit().getSAMFileHeader().getReadGroup(readGroupAttr.toString()); if ( readGroup != null ) { Object readPlatformAttr = readGroup.getAttribute("PL"); if ( readPlatformAttr != null ) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java index 7ee79540e..91607e630 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java @@ -26,20 +26,11 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.*; -import org.broadinstitute.sting.gatk.walkers.WalkerName; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.recalibration.*; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.*; import org.apache.log4j.Logger; import java.util.*; -import java.util.regex.Pattern; -import java.util.regex.Matcher; -import java.io.File; -import java.io.FileNotFoundException; @WalkerName("SplitSamFile") @Requires({DataSource.READS}) @@ -74,10 +65,10 @@ public class SplitSamFileWalker extends ReadWalker reduceInit() { HashMap headers = new HashMap(); - for ( SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) { + for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) { final String sample = readGroup.getSample(); if ( ! headers.containsKey(sample) ) { - SAMFileHeader header = Utils.copySAMFileHeader(this.getToolkit().getEngine().getSAMHeader()); + SAMFileHeader header = Utils.copySAMFileHeader(this.getToolkit().getSAMFileHeader()); logger.debug(String.format("Creating BAM header for sample %s", sample)); ArrayList readGroups = new ArrayList(); header.setReadGroups(readGroups); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java index 2628aebf6..8b9ba01e4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IntervalCleanerWalker.java @@ -61,7 +61,6 @@ public class IntervalCleanerWalker extends LocusWindowWalker if ( MISMATCH_THRESHOLD <= 0.0 || MISMATCH_THRESHOLD > 1.0 ) throw new RuntimeException("Entropy threshold must be a fraction between 0 and 1"); - SAMFileHeader header = getToolkit().getEngine().getSAMHeader(); if ( writer != null ) { readsToWrite = new TreeSet(); } @@ -888,7 +887,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker String reference = "AAAAAACCCCCCAAAAAA"; // the alternate reference is: "AAAAAACCCTTCCCAAAAAA"; ArrayList reads = new ArrayList(); - SAMFileHeader header = getToolkit().getEngine().getSAMHeader(); + SAMFileHeader header = getToolkit().getSAMFileHeader(); SAMRecord r1 = new SAMRecord(header); r1.setReadName("1"); r1.setReadString("AACCCCCC"); @@ -938,7 +937,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker String reference = "AAAAAACCCTTCCCAAAAAA"; // the alternate reference is: "AAAAAACCCCCCAAAAAA"; ArrayList reads = new ArrayList(); - SAMFileHeader header = getToolkit().getEngine().getSAMHeader(); + SAMFileHeader header = getToolkit().getSAMFileHeader(); SAMRecord r1 = new SAMRecord(header); r1.setReadName("1"); r1.setReadString("ACCCTTCC"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index d6cf705e3..eab6dd059 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -50,7 +50,7 @@ public class CovariateCounterWalker extends LocusWalker { */ public void initialize() { Set readGroups = new HashSet(); - for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) { + for (SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) { if( readGroup.getAttribute("PL") == null ) Utils.warnUser(String.format("PL attribute for read group %s is unset; assuming all reads are supported",readGroup.getReadGroupId())); if( !isSupportedReadGroup(readGroup) ) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageBySample.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageBySample.java index 3f0825d44..eff224ada 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageBySample.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageBySample.java @@ -25,7 +25,7 @@ public class CoverageBySample extends LocusWalker public void initialize() { GenomeAnalysisEngine toolkit = this.getToolkit(); - this.header = toolkit.getEngine().getSAMHeader(); + this.header = toolkit.getSAMFileHeader(); List read_groups = header.getReadGroups(); sample_names = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java index 5cfa8a973..b84d79c6e 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java @@ -53,7 +53,7 @@ public class IOCrusherWalker extends ReadWalker reduceInit() { ArrayList outputs = new ArrayList(nWaysOut); for ( int i = 0; i < nWaysOut; i++ ) { - SAMFileHeader header = this.getToolkit().getEngine().getSAMHeader(); + SAMFileHeader header = this.getToolkit().getSAMFileHeader(); outputs.add(Utils.createSAMFileWriterWithCompression(header, true, outputBase + "." + i + ".bam", BAMcompression)); } return outputs; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ListSampleIds.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ListSampleIds.java index b0c7b0dce..0e3a5e58a 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ListSampleIds.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ListSampleIds.java @@ -16,7 +16,7 @@ public class ListSampleIds extends LocusWalker public void initialize() { GenomeAnalysisEngine toolkit = this.getToolkit(); - SAMFileHeader header = toolkit.getEngine().getSAMHeader(); + SAMFileHeader header = toolkit.getSAMFileHeader(); List read_groups = header.getReadGroups(); for (int i = 0; i < read_groups.size(); i++) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MultiSampleCaller.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MultiSampleCaller.java index e545375b8..e16419580 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MultiSampleCaller.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MultiSampleCaller.java @@ -90,7 +90,7 @@ public class MultiSampleCaller extends LocusWalker read_groups = header.getReadGroups(); sample_names = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/PoolCallingExperiment.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/PoolCallingExperiment.java index 27d7590f6..d51a843be 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/PoolCallingExperiment.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/PoolCallingExperiment.java @@ -34,7 +34,7 @@ public class PoolCallingExperiment extends LocusWalker read_groups = header.getReadGroups(); sample_names = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java index 1582bd9f3..e630815d7 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java @@ -259,7 +259,7 @@ public class SingleSampleGenotyper extends LocusWalker