diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 978251e10..1457a5ae8 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -27,20 +27,18 @@ package org.broadinstitute.sting.gatk; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequenceFileFactory; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMFileReader.ValidationStringency; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.traversals.*; +import org.broadinstitute.sting.gatk.traversals.TraversalEngine; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.cmdLine.ArgumentException; +import java.io.File; import java.util.ArrayList; import java.util.List; -import java.io.*; public class GenomeAnalysisEngine { @@ -100,7 +98,7 @@ public class GenomeAnalysisEngine { if (argCollection.HAPMAPChipFile != null) bindConvenienceRods("hapmap-chip", "GFF", argCollection.HAPMAPChipFile); // TODO: The ROD iterator currently does not understand multiple intervals file. Fix this by cleaning the ROD system. - if ( argCollection.intervals != null && argCollection.intervals.size() == 1) { + if (argCollection.intervals != null && argCollection.intervals.size() == 1) { bindConvenienceRods("interval", "Intervals", argCollection.intervals.get(0).replaceAll(",", "")); } @@ -111,7 +109,7 @@ public class GenomeAnalysisEngine { validateInputsAgainstWalker(my_walker, argCollection, rods); // create the output streams - initializeOutputStreams( my_walker ); + initializeOutputStreams(my_walker); // our microscheduler, which is in charge of running everything MicroScheduler microScheduler = null; @@ -129,13 +127,10 @@ public class GenomeAnalysisEngine { // perform validation steps that are common to all the engines genericEngineSetup(); - // parse out any genomic location they've provided - //List locationsList = setupIntervalRegion(); - List locationsList = engine.getLocations(); GenomeLocSortedSet locs = null; - if (locationsList != null) - locs = GenomeLocSortedSet.createSetFromList(locationsList); - + if (argCollection.intervals != null) { + locs = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals)); + } // excute the microscheduler, storing the results walkerReturn = microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations); } @@ -145,6 +140,7 @@ public class GenomeAnalysisEngine { * * @param my_walker our walker of type LocusWalker * @param rods the reference order data + * * @return a new microscheduler */ private MicroScheduler createMicroscheduler(Walker my_walker, List> rods) { @@ -156,38 +152,23 @@ public class GenomeAnalysisEngine { // create the MicroScheduler microScheduler = MicroScheduler.create(my_walker, extractSourceInfoFromArguments(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads); engine = microScheduler.getTraversalEngine(); - } - else if (my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) { + } else if (my_walker instanceof ReadWalker || my_walker instanceof DuplicateWalker) { if (argCollection.referenceFile == null) Utils.scareUser(String.format("Read-based traversals require a reference file but none was given")); microScheduler = MicroScheduler.create(my_walker, extractSourceInfoFromArguments(argCollection), argCollection.referenceFile, rods, argCollection.numberOfThreads); engine = microScheduler.getTraversalEngine(); } else { - Utils.scareUser(String.format("Unable to create the appropriate TraversalEngine for analysis type " + argCollection.analysisName)); + Utils.scareUser(String.format("Unable to create the appropriate TraversalEngine for analysis type " + argCollection.analysisName)); } return microScheduler; } - /** - * commands that get executed for each engine, regardless of the type - */ + /** commands that get executed for each engine, regardless of the type */ private void genericEngineSetup() { Reads sourceInfo = extractSourceInfoFromArguments(argCollection); - - engine.setStrictness(sourceInfo.getValidationStringency()); - engine.setMaxReads(argCollection.maximumEngineIterations); - engine.setFilterZeroMappingQualityReads(argCollection.filterZeroMappingQualityReads); - - // we default interval files over the genome region string - if (argCollection.intervals != null) { - engine.setLocation(parseIntervalRegion(argCollection.intervals)); - } - - engine.setReadFilters(sourceInfo); - engine.initialize(); } @@ -196,9 +177,9 @@ public class GenomeAnalysisEngine { * * @return a list of genomeLoc representing the interval file */ - public static List parseIntervalRegion(final List intervals ) { + public static List parseIntervalRegion(final List intervals) { List locs = new ArrayList(); - for( String interval: intervals ) { + for (String interval : intervals) { if (new File(interval).exists()) { locs.addAll(GenomeLocParser.intervalFileToList(interval)); } else { @@ -211,9 +192,12 @@ public class GenomeAnalysisEngine { /** * Bundles all the source information about the reads into a unified data structure. + * * @param argCollection The collection of arguments passed to the engine. + * * @return The reads object providing reads source info. */ + private Reads extractSourceInfoFromArguments( GATKArgumentCollection argCollection ) { return new Reads( argCollection.samFiles, argCollection.strictnessLevel, @@ -229,33 +213,33 @@ public class GenomeAnalysisEngine { String walkerName = WalkerManager.getWalkerName(walker.getClass()); // Check what the walker says is required against what was provided on the command line. - if( WalkerManager.isRequired(walker,DataSource.READS) && (arguments.samFiles == null || arguments.samFiles.size() == 0) ) - throw new ArgumentException(String.format("Walker %s requires reads but none were provided. If this is incorrect, alter the walker's @Requires annotation.",walkerName)); - if( WalkerManager.isRequired(walker,DataSource.REFERENCE) && arguments.referenceFile == null ) - throw new ArgumentException(String.format("Walker %s requires a reference but none was provided. If this is incorrect, alter the walker's @Requires annotation.",walkerName)); + if (WalkerManager.isRequired(walker, DataSource.READS) && (arguments.samFiles == null || arguments.samFiles.size() == 0)) + throw new ArgumentException(String.format("Walker %s requires reads but none were provided. If this is incorrect, alter the walker's @Requires annotation.", walkerName)); + if (WalkerManager.isRequired(walker, DataSource.REFERENCE) && arguments.referenceFile == null) + throw new ArgumentException(String.format("Walker %s requires a reference but none was provided. If this is incorrect, alter the walker's @Requires annotation.", walkerName)); // Check what the walker says is allowed against what was provided on the command line. - if( (arguments.samFiles != null && arguments.samFiles.size() > 0) && !WalkerManager.isAllowed(walker,DataSource.READS) ) - throw new ArgumentException(String.format("Walker %s does not allow reads but reads were provided. If this is incorrect, alter the walker's @Allows annotation",walkerName)); - if( arguments.referenceFile != null && !WalkerManager.isAllowed(walker,DataSource.REFERENCE) ) - throw new ArgumentException(String.format("Walker %s does not allow a reference but one was provided. If this is incorrect, alter the walker's @Allows annotation",walkerName)); + if ((arguments.samFiles != null && arguments.samFiles.size() > 0) && !WalkerManager.isAllowed(walker, DataSource.READS)) + throw new ArgumentException(String.format("Walker %s does not allow reads but reads were provided. If this is incorrect, alter the walker's @Allows annotation", walkerName)); + if (arguments.referenceFile != null && !WalkerManager.isAllowed(walker, DataSource.REFERENCE)) + throw new ArgumentException(String.format("Walker %s does not allow a reference but one was provided. If this is incorrect, alter the walker's @Allows annotation", walkerName)); // Check to make sure that all required metadata is present. List allRequired = WalkerManager.getRequiredMetaData(walker); - for( RMD required: allRequired ) { + for (RMD required : allRequired) { boolean found = false; - for( ReferenceOrderedData rod: rods ) { - if( rod.matches(required.name(),required.type()) ) + for (ReferenceOrderedData rod : rods) { + if (rod.matches(required.name(), required.type())) found = true; } - if( !found ) - throw new ArgumentException(String.format("Unable to find reference metadata (%s,%s)",required.name(),required.type())); + if (!found) + throw new ArgumentException(String.format("Unable to find reference metadata (%s,%s)", required.name(), required.type())); } // Check to see that no forbidden rods are present. - for( ReferenceOrderedData rod: rods ) { - if( !WalkerManager.isAllowed(walker,rod) ) - throw new ArgumentException(String.format("Walker does not allow access to metadata: %s. If this is correct, change the @Allows metadata",rod.getName())); + for (ReferenceOrderedData rod : rods) { + if (!WalkerManager.isAllowed(walker, rod)) + throw new ArgumentException(String.format("Walker does not allow access to metadata: %s. If this is correct, change the @Allows metadata", rod.getName())); } } @@ -266,7 +250,7 @@ public class GenomeAnalysisEngine { */ public int getBAMCompression() { return (argCollection.BAMcompression == null || - argCollection.BAMcompression < 1 || + argCollection.BAMcompression < 1 || argCollection.BAMcompression > 8) ? 5 : argCollection.BAMcompression; } @@ -284,7 +268,7 @@ public class GenomeAnalysisEngine { /** Initialize the output streams as specified by the user. */ - private void initializeOutputStreams( Walker walker ) { + private void initializeOutputStreams(Walker walker) { outputTracker = (argCollection.outErrFileName != null) ? new OutputTracker(argCollection.outErrFileName, argCollection.outErrFileName) : new OutputTracker(argCollection.outFileName, argCollection.errFileName); walker.initializeOutputStreams(outputTracker); @@ -299,17 +283,6 @@ public class GenomeAnalysisEngine { return outputTracker; } - /** - * This function is deprecated in the new traversal engines, if you need to get the header - * please get the engine and then use the getHeader function - * - * @return - */ - @Deprecated - public SAMFileReader getSamReader() { - return this.engine.getSamReader(); - } - public TraversalEngine getEngine() { return this.engine; } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index b67ecb87f..ce5507386 100755 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -101,13 +101,13 @@ public abstract class MicroScheduler { */ protected MicroScheduler(Walker walker, Reads reads, File refFile, List> rods) { if (walker instanceof ReadWalker) { - traversalEngine = new TraverseReads(reads.getReadsFiles(), refFile, rods); + traversalEngine = new TraverseReads(); } else if (walker instanceof LocusWalker) { - traversalEngine = new TraverseLoci(reads.getReadsFiles(), refFile, rods); + traversalEngine = new TraverseLoci(); } else if (walker instanceof LocusWindowWalker) { - traversalEngine = new TraverseLocusWindows(reads.getReadsFiles(), refFile, rods); + traversalEngine = new TraverseLocusWindows(); } else if (walker instanceof DuplicateWalker) { - traversalEngine = new TraverseDuplicates(reads.getReadsFiles(), refFile, rods); + traversalEngine = new TraverseDuplicates(); } else { throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type."); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SNPCallFromGenotypes.java b/java/src/org/broadinstitute/sting/gatk/refdata/SNPCallFromGenotypes.java index 6b9a190c7..5a5cacdf5 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SNPCallFromGenotypes.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SNPCallFromGenotypes.java @@ -1,19 +1,6 @@ package org.broadinstitute.sting.gatk.refdata; -import java.util.*; -import java.util.regex.MatchResult; -import java.util.regex.Pattern; -import java.util.regex.Matcher; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; - -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.xReadLines; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.apache.log4j.Logger; +import java.util.List; /** * loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java index 6bc3a5481..bedd7fcc4 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java @@ -1,218 +1,63 @@ package org.broadinstitute.sting.gatk.traversals; -import net.sf.picard.filter.SamRecordFilter; -import net.sf.picard.filter.FilteringIterator; -import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMFileReader; -import net.sf.samtools.SAMFileReader.ValidationStringency; -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.util.RuntimeIOException; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.iterators.*; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.Reads; -import org.broadinstitute.sting.utils.*; - -import java.io.File; -import java.util.*; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.GenomeLoc; public abstract class TraversalEngine { - // list of reference ordered data objects - protected List> rods = null; - - // Iterator over rods - List>> rodIters; - - // How strict should we be with SAM/BAM parsing? - protected ValidationStringency strictness = ValidationStringency.STRICT; - // Time in milliseconds since we initialized this engine - protected long startTime = -1; - protected long lastProgressPrintTime = -1; // When was the last time we printed our progress? + private long startTime = -1; + private long lastProgressPrintTime = -1; // When was the last time we printed our progress? // How long can we go without printing some progress info? - protected long MAX_PROGRESS_PRINT_TIME = 30 * 1000; // 10 seconds in millisecs - protected long N_RECORDS_TO_PRINT = 1000000; + private final long MAX_PROGRESS_PRINT_TIME = 30 * 1000; // 10 seconds in millisecs + private final long N_RECORDS_TO_PRINT = 1000000; // Maximum number of reads to process before finishing protected long maxReads = -1; - // Name of the reads file, in BAM/SAM format - protected List readsFiles = null; // the name of the reads file - protected SAMFileReader samReader = null; - // iterator over the sam records in the readsFile - protected Iterator samReadIter = null; - - // The verifying iterator, it does checking - protected VerifyingSamIterator verifyingSamReadIter = null; - - // The reference data -- filename, refSeqFile, and iterator - protected File refFileName = null; // the name of the reference file - - // Progress tracker for the sam file - protected FileProgressTracker samReadingTracker = null; - - protected boolean DEBUGGING = false; - protected boolean beSafeP = true; - protected boolean SORT_ON_FLY = false; - protected boolean DOWNSAMPLE_BY_FRACTION = false; - protected boolean DOWNSAMPLE_BY_COVERAGE = false; - protected boolean FILTER_UNSORTED_READS = false; - protected boolean walkOverAllSites = false; - protected int maxOnFlySorts = 100000; - protected double downsamplingFraction = 1.0; - protected int downsamplingCoverage = 0; - protected boolean filterZeroMappingQualityReads = false; - - // the stored header - protected SAMFileHeader myHeader = null; + private SAMFileHeader myHeader = null; - /** - * our log, which we want to capture anything from this class - */ + /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); - - // Locations we are going to process during the traversal - private List locs = null; - - // -------------------------------------------------------------------------------------------------------------- - // - // Setting up the engine - // - // -------------------------------------------------------------------------------------------------------------- - - /** - * Creates a new, uninitialized TraversalEngine - * - * @param reads SAM/BAM file of reads - * @param ref Reference file in FASTA format, assumes a .dict file is also available - * @param rods Array of reference ordered data sets - */ - public TraversalEngine(List reads, File ref, List> rods) { - readsFiles = reads; - refFileName = ref; - this.rods = rods; - } - // -------------------------------------------------------------------------------------------------------------- // // Manipulating the underlying engine parameters // // -------------------------------------------------------------------------------------------------------------- - //public void setRegion(final String reg) { regionStr = regionStr; } - //public void setTraversalType(final String type) { traversalType = type; } - public void setStrictness(final ValidationStringency s) { - strictness = s; - } - public void setMaxReads(final int maxReads) { this.maxReads = maxReads; } - public void setFilterZeroMappingQualityReads(final boolean filterZeroMappingQualityReads) { - this.filterZeroMappingQualityReads = filterZeroMappingQualityReads; - } - - private void setDebugging(final boolean d) { - DEBUGGING = d; - } - - private void setSafetyChecking(final boolean beSafeP) { - if (!beSafeP) - logger.warn("*** Turning off safety checking, I hope you know what you are doing. Errors will result in debugging assert failures and other inscrutable messages..."); - this.beSafeP = beSafeP; - } - - public void setFilterUnsortedReads(final boolean filterUnsorted) { - if (!filterUnsorted) - logger.warn("*** Turning on filtering of out of order reads, I *really* hope you know what you are doing, as you are removing data..."); - this.FILTER_UNSORTED_READS = filterUnsorted; - } - - private void setSortOnFly(final int maxReadsToSort) { - logger.info("Sorting read file on the fly: max reads allowed is " + maxReadsToSort); - SORT_ON_FLY = true; - maxOnFlySorts = maxReadsToSort; - } - - private void setSortOnFly() { setSortOnFly(100000); } - - private void setDownsampleByFraction(final double fraction) { - logger.info("Downsampling to approximately " + (fraction * 100.0) + "% of filtered reads"); - DOWNSAMPLE_BY_FRACTION = true; - downsamplingFraction = fraction; - } - - private void setDownsampleByCoverage(final int coverage) { - logger.info("Downsampling to coverage " + coverage); - DOWNSAMPLE_BY_COVERAGE = true; - downsamplingCoverage = coverage; - } - - /** - * Sets up read filtering performed by the traversal engine. - * @param reads Read filters to instantiate as the traversal engine walks over the data. - * @deprecated Should only be used by old-style traversals. - */ - @Deprecated - public void setReadFilters( Reads reads) { - if( reads.getDownsamplingFraction() != null ) setDownsampleByFraction(reads.getDownsamplingFraction()); - if( reads.getDownsampleToCoverage() != null ) setDownsampleByCoverage(reads.getDownsampleToCoverage()); - if( reads.getSafetyChecking() != null ) setSafetyChecking(reads.getSafetyChecking()); - } - // -------------------------------------------------------------------------------------------------------------- // // functions for dealing locations (areas of the genome we're traversing over) // // -------------------------------------------------------------------------------------------------------------- - - /** * get the associated SAM header for our run + * * @return the header if it's stored, null if not */ public SAMFileHeader getSAMHeader() { - if ( myHeader == null ) - myHeader = samReader.getFileHeader(); return myHeader; } /** * set's the SAM header for this traversal, which should * be the merged header in the multiple BAM file case. - * + * * @param myHeader the passed in header */ public void setSAMHeader(SAMFileHeader myHeader) { this.myHeader = myHeader; } - - /** - * Sets the intervals over which the traversal(s) should happen. - * @param locs - */ - public void setLocation(final List locs) { - this.locs = locs; - } - - - public boolean hasLocations() { - return this.locs != null && !this.locs.isEmpty(); - } - - public List getLocations() { - return this.locs; - } - // -------------------------------------------------------------------------------------------------------------- // // printing @@ -221,17 +66,18 @@ public abstract class TraversalEngine { /** * @param curTime (current runtime, in millisecs) + * * @return true if the maximum interval (in millisecs) has passed since the last printing */ - protected boolean maxElapsedIntervalForPrinting(final long curTime) { + private boolean maxElapsedIntervalForPrinting(final long curTime) { return (curTime - this.lastProgressPrintTime) > MAX_PROGRESS_PRINT_TIME; } /** * Forward request to printProgress * - * @param type - * @param loc + * @param type the type of traversal + * @param loc the location */ public void printProgress(final String type, GenomeLoc loc) { printProgress(false, type, loc); @@ -245,7 +91,7 @@ public abstract class TraversalEngine { * @param type String to print out describing our atomic traversal type ("read", "locus", etc) * @param loc Current location */ - public void printProgress(boolean mustPrint, final String type, GenomeLoc loc) { + private void printProgress(boolean mustPrint, final String type, GenomeLoc loc) { final long nRecords = TraversalStatistics.nRecords; final long curTime = System.currentTimeMillis(); final double elapsed = (curTime - startTime) / 1000.0; @@ -259,22 +105,16 @@ public abstract class TraversalEngine { else logger.info(String.format("[PROGRESS] Traversed %,d %s in %.2f secs (%.2f secs per 1M %s)", nRecords, type, elapsed, secsPer1MReads, type)); - // Currently samReadingTracker will print misleading info if we're not processing the whole file - - // If an index is enabled, file read progress is meaningless because a linear - // traversal is not being performed. For now, don't bother printing progress. - // TODO: Create a sam indexed read tracker that tracks based on percentage through the query. - if (samReadingTracker != null && this.locs == null) - logger.info(String.format("[PROGRESS] -> %s", samReadingTracker.progressMeter())); } } /** * A passthrough method so that subclasses can report which types of traversals they're using. + * * @param sum Result of the computation. * @param Type of the computation. */ - public abstract void printOnTraversalDone( T sum ); + public abstract void printOnTraversalDone(T sum); /** * Called after a traversal to print out information about the traversal process @@ -290,9 +130,9 @@ public abstract class TraversalEngine { final double elapsed = (curTime - startTime) / 1000.0; logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours%n", elapsed, elapsed / 60, elapsed / 3600)); logger.info(String.format("Traversal skipped %d valid reads out of %d total (%.2f%%)", - TraversalStatistics.nSkippedReads, - TraversalStatistics.nReads, - (TraversalStatistics.nSkippedReads * 100.0) / TraversalStatistics.nReads)); + TraversalStatistics.nSkippedReads, + TraversalStatistics.nReads, + (TraversalStatistics.nSkippedReads * 100.0) / TraversalStatistics.nReads)); logger.info(String.format(" -> %d unmapped reads", TraversalStatistics.nUnmappedReads)); logger.info(String.format(" -> %d duplicate reads", TraversalStatistics.nDuplicates)); logger.info(String.format(" -> %d non-primary reads", TraversalStatistics.nNotPrimary)); @@ -306,147 +146,26 @@ public abstract class TraversalEngine { // // -------------------------------------------------------------------------------------------------------------- - /** - * Initialize the traversal engine. After this point traversals can be run over the data - * - * @return true on success - */ - public boolean initialize() { + /** Initialize the traversal engine. After this point traversals can be run over the data */ + public void initialize() { lastProgressPrintTime = startTime = System.currentTimeMillis(); // Initial the reference ordered data iterators - initializeRODs(); - - return true; - } - - protected SAMFileReader initializeSAMFile(File samFile) { - // todo: fixme, this is a hack to try out dynamic merging - if ( samFile.toString().endsWith(".list") ) { - return null; - // todo: omg, this is just scary, just it's just for testing purposes. fix with the new DataSource system - } else { - SAMFileReader samReader = new SAMFileReader(samFile, true); - samReader.setValidationStringency(strictness); - - final SAMFileHeader header = samReader.getFileHeader(); - logger.debug(String.format("Sort order is: " + header.getSortOrder())); - - // Kludge filename into sam file header. - if (samReader.getFileHeader().getReadGroups().size() < 1) { - //logger.warn("Setting header in reader " + f.getName()); - SAMReadGroupRecord rec = new SAMReadGroupRecord(samFile.getName()); - rec.setLibrary(samFile.getName()); - rec.setSample(samFile.getName()); - - samReader.getFileHeader().addReadGroup(rec); - } - - return samReader; - } } /** - * Prepare the list of reference ordered data iterators for each of the rods + * this method must be implemented by all traversal engines * - * @return A list of ROD iterators for getting data from each ROD - */ - protected void initializeRODs() { - // set up reference ordered data - rodIters = new ArrayList>>(); - for (ReferenceOrderedData data : rods) { - rodIters.add(new Pair>(data.getName(), data.iterator())); - } - } - - // -------------------------------------------------------------------------------------------------------------- - // - // shutdown - // - // -------------------------------------------------------------------------------------------------------------- - - public boolean shutdown() { - // todo: actually shutdown the resources - - return true; - } - - - // -------------------------------------------------------------------------------------------------------------- - // - // dealing with reference ordered data - // - // -------------------------------------------------------------------------------------------------------------- - - /** - * Builds a list of the reference ordered datum at loc from each of the iterators. This function - * assumes you are accessing the data in order. You can't use this function for random access. Each - * successive call moves you along the file, consuming all data before loc. + * @param walker the walker to run with + * @param shard a shard of data + * @param dataProvider the data provider that generates data given the shard + * @param sum the accumulator + * @param an object of the map type + * @param an object of the reduce type * - * @param loc The location to get the rods at - * @return A list of ReferenceOrderDatum at loc. ROD without a datum at loc will be null in the list + * @return an object of the reduce type */ - protected RefMetaDataTracker getReferenceOrderedDataAtLocus(final GenomeLoc loc) { - RefMetaDataTracker tracks = new RefMetaDataTracker(); - for (Pair> pair : rodIters) { - String name = pair.getFirst(); - tracks.bind(name, pair.getSecond().seekForward(loc)); - } - - return tracks; - } - - // -------------------------------------------------------------------------------------------------------------- - // - // processing - // - // -------------------------------------------------------------------------------------------------------------- - - public T traverse(Walker walker) { - T sum = null; - if ( hasLocations() && walker.isReduceByInterval() ) { - logger.info("Doing reduce by interval processing"); - if ( ! hasLocations() ) - Utils.scareUser("ReduceByInterval requested by no interval was provided"); - List> map = new ArrayList>(locs.size()); - for ( GenomeLoc loc : locs ) { - ArrayList l = new ArrayList(); - l.add(loc); - T intervalSum = traverse(walker, l); - sum = intervalSum; - map.add(new Pair(loc, intervalSum)); - } - walker.onTraversalDone(map); - } else { - List l = new ArrayList(); - if ( hasLocations() ) - l = locs; - sum = traverse(walker, l); - } - - printOnTraversalDone("elements", sum); - return sum; - } - - public T traverse(Walker walker, List locations) { - return null; - } - - public T traverse( Walker walker, - Shard shard, - ShardDataProvider dataProvider, - T sum ) { - throw new UnsupportedOperationException( "This method is a required override for new traversal engines. Please port your traversal engine to the new style." ); - } - - public void verifySortOrder(final boolean requiresSortedOrder) { - if (beSafeP && !SORT_ON_FLY && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { - final String msg = "SAM file is not sorted in coordinate order (according to header) Walker type with given arguments requires a sorted file for correct processing"; - if (requiresSortedOrder || strictness == SAMFileReader.ValidationStringency.STRICT) - throw new RuntimeIOException(msg); - else if (strictness == SAMFileReader.ValidationStringency.LENIENT) - logger.warn(msg); - } - } - @Deprecated - public SAMFileReader getSamReader() { return this.samReader; } + public abstract T traverse(Walker walker, + Shard shard, + ShardDataProvider dataProvider, + T sum); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 0f1bd9ce1..03711ba92 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -35,15 +35,12 @@ import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Pair; -import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; @@ -71,17 +68,6 @@ public class TraverseDuplicates extends TraversalEngine { private final boolean DEBUG = false; - /** - * Creates a new, uninitialized TraversalEngine - * - * @param reads SAM/BAM file of reads - * @param ref Reference file in FASTA format, assumes a .dict file is also available - * @param rods Array of reference ordered data sets - */ - public TraverseDuplicates(List reads, File ref, List> rods) { - super(reads, ref, rods); - } - private List readsAtLoc(final SAMRecord read, PushbackIterator iter) { GenomeLoc site = GenomeLocParser.createGenomeLoc(read); ArrayList l = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 79abd003e..70f8d2cae 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -1,27 +1,18 @@ package org.broadinstitute.sting.gatk.traversals; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.datasources.providers.*; import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.AllLocusView; -import org.broadinstitute.sting.gatk.datasources.providers.CoveredLocusView; -import org.broadinstitute.sting.gatk.datasources.providers.LocusView; -import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView; -import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; -import org.apache.log4j.Logger; -import java.util.List; import java.util.ArrayList; -import java.io.File; /** * A simple solution to iterating over all reference positions over a series of genomic locations. @@ -33,10 +24,6 @@ public class TraverseLoci extends TraversalEngine { */ protected static Logger logger = Logger.getLogger(TraversalEngine.class); - public TraverseLoci(List reads, File ref, List> rods) { - super( reads, ref, rods ); - } - public T traverse(Walker walker, ArrayList locations) { if ( locations.isEmpty() ) Utils.scareUser("Requested all locations be processed without providing locations to be processed!"); diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java index 1f094e9c2..20dda7636 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLocusWindows.java @@ -1,20 +1,20 @@ package org.broadinstitute.sting.gatk.traversals; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.LocusContext; +import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView; +import org.broadinstitute.sting.gatk.datasources.providers.ReadView; +import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.shards.Shard; +import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.providers.*; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.util.*; -import java.io.File; - -import net.sf.samtools.SAMRecord; +import java.util.ArrayList; /** * Created by IntelliJ IDEA. @@ -25,10 +25,6 @@ import net.sf.samtools.SAMRecord; */ public class TraverseLocusWindows extends TraversalEngine { - public TraverseLocusWindows(List reads, File ref, List> rods) { - super(reads, ref, rods); - } - public T traverse( Walker walker, Shard shard, ShardDataProvider dataProvider, @@ -70,7 +66,7 @@ public class TraverseLocusWindows extends TraversalEngine { return sum; } - private LocusContext getLocusContext(Iterator readIter, GenomeLoc interval) { + private LocusContext getLocusContext(StingSAMIterator readIter, GenomeLoc interval) { ArrayList reads = new ArrayList(); boolean done = false; long leftmostIndex = interval.getStart(), @@ -92,8 +88,8 @@ public class TraverseLocusWindows extends TraversalEngine { GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex); LocusContext locus = new LocusContext(window, reads, null); - if ( DOWNSAMPLE_BY_COVERAGE ) - locus.downsampleToCoverage(downsamplingCoverage); + if ( readIter.getSourceInfo().getDownsampleToCoverage() != null ) + locus.downsampleToCoverage(readIter.getSourceInfo().getDownsampleToCoverage()); return locus; } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index b74ea7223..0deb2a84c 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -4,24 +4,20 @@ import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.WalkerManager; -import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; -import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView; +import org.broadinstitute.sting.gatk.datasources.providers.ReadView; +import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.datasources.shards.IntervalShard; import org.broadinstitute.sting.gatk.datasources.shards.ReadShard; import org.broadinstitute.sting.gatk.datasources.shards.Shard; -import org.broadinstitute.sting.gatk.datasources.shards.IntervalShard; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; -import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import java.io.File; import java.util.ArrayList; import java.util.Arrays; -import java.util.List; /* * Copyright (c) 2009 The Broad Institute @@ -63,22 +59,9 @@ public class TraverseReads extends TraversalEngine { /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(TraverseReads.class); - - /** - * Creates a new, uninitialized TraversalEngine - * - * @param reads SAM/BAM file of reads - * @param ref Reference file in FASTA format, assumes a .dict file is also available - * @param rods Array of reference ordered data sets - */ - public TraverseReads(List reads, File ref, List> rods) { - super(reads, ref, rods); - } - - /** * Traverse by reads, given the data and the walker - * + * * @param walker the walker to traverse with * @param shard the shard, specifying the range of data to iterate over * @param dataProvider the provider of the reads data @@ -107,7 +90,7 @@ public class TraverseReads extends TraversalEngine { ReadWalker readWalker = (ReadWalker) walker; boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES); - + ReadView reads = new ReadView(dataProvider); ReadReferenceView reference = new ReadReferenceView(dataProvider); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelInspector.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelInspector.java deleted file mode 100755 index 67bdda7cf..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelInspector.java +++ /dev/null @@ -1,249 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; - -import org.broadinstitute.sting.gatk.LocusContext; -import org.broadinstitute.sting.gatk.walkers.ReadWalker; -import org.broadinstitute.sting.gatk.walkers.WalkerName; -import org.broadinstitute.sting.utils.cmdLine.Argument; -import org.broadinstitute.sting.playground.indels.*; -import net.sf.samtools.*; - -import net.sf.picard.reference.ReferenceSequence; - -// Investigates indels called in the alignment data - -@WalkerName("InspectIndels") -public class IndelInspector extends ReadWalker { - @Argument(fullName="IndelVerbosity", shortName="iverb", required=false, doc="Verbosity level: SILENT, PILESUMMARY, ALIGNMENTS") public String VERBOSITY_LEVEL = "SILENT"; - @Argument(fullName="OutputNormal", shortName="outNorm", required=true, doc="Output file (sam or bam) for non-indel related reads and indel reads that were not improved") public String OUT1; - @Argument(fullName="OutputIndel", shortName="outIndel", required=true, doc="Output file (sam or bam) for improved (realigned) indel related reads") public String OUT2; - @Argument(fullName="ControlRun", shortName="paranoid", required=false, doc="If true, all reads that would be otherwise picked and processed by this tool will be saved, unmodified, into the OutputNormal file") public boolean CONTROL_RUN = false; - @Argument(fullName="ErrorCheckMode", shortName="error", required=false, doc="Error counting mode: MM (mismatches only (from sam tags)), MC (mismatches only doing actual mismatch count on the fly (use this if tags are incorrectly set)), ERR (errors (arachne style: mm+gap lengths)), MG (count mismatches and gaps as one error each)") public String ERR_MODE; - @Argument(fullName="MaxErrors", shortName="maxError", required=false, doc="Maximum number of errors allowed (see ERR_MODE)") public Integer MAX_ERRS = 10000; - @Argument(fullName="MaxReadLength", shortName="maxRead", required=false, doc="Ignore reads that are longer than the specified cutoff (not a good way to do things but might be necessary because of performance issues)") public int MAX_READ_LENGTH = -1; - - private int discarded_cigar_count = 0; - private int discarded_long_read_count = 0; - private int discarded_maxerr_count = 0; - private int unmapped_read_count = 0; - private int total_reads = 0; - private IndelRecordPileCollector collector; - private PileBuilder pileBuilder = null; - private PassThroughWriter ptWriter; - private String cur_contig = null; - - - // public boolean filter(LocusContext context, SAMRecord read) { - // // we only want aligned reads - // return !read.getReadUnmappedFlag(); - // } - - public void initialize() { - - if ( ERR_MODE != null && ! ERR_MODE.equals("MM") && ! ERR_MODE.equals("MG") && ! ERR_MODE.equals("ERR") && ! ERR_MODE.equals("MC") ) { - err.println("Unknown value specified for ERR_MODE: "+ERR_MODE+ "... skipping it."); - ERR_MODE = null; - } - - SAMFileHeader header = getToolkit().getSamReader().getFileHeader(); - header.setSortOrder(SAMFileHeader.SortOrder.unsorted); - ptWriter = new PassThroughWriter(OUT1, header); - if ( ! CONTROL_RUN ) pileBuilder = new PileBuilder(OUT2, header, ptWriter); - - try { - if ( CONTROL_RUN ) collector = new IndelRecordPileCollector(ptWriter, new DiscardingPileReceiver() ); - else collector = new IndelRecordPileCollector(ptWriter, pileBuilder ); - } catch(Exception e) { - err.println(e.getMessage()); - } - if ( collector == null ) - return; - - collector.setControlRun(CONTROL_RUN); - - if ( ! CONTROL_RUN ) { - if ( VERBOSITY_LEVEL.toUpperCase().equals("SILENT")) pileBuilder.setVerbosity(PileBuilder.SILENT); - else if ( VERBOSITY_LEVEL.toUpperCase().equals("PILESUMMARY") ) pileBuilder.setVerbosity(PileBuilder.PILESUMMARY); - else if ( VERBOSITY_LEVEL.toUpperCase().equals("ALIGNMENTS") ) pileBuilder.setVerbosity(PileBuilder.ALIGNMENTS); - else { - err.println("Unrecognized VERBOSITY_LEVEL setting... skipping it."); - pileBuilder.setVerbosity(PileBuilder.SILENT); - } - } - } - - public Integer map(char[] ref, SAMRecord read) { - - total_reads++; - - if ( read.getReadUnmappedFlag() ) { - unmapped_read_count++; - ptWriter.receive(read); // make sure we still write the read to the output, we do not want to lose data! - return 0; - } - - /* 11 May 2009 - Commented out because we no longer have the full reference sequence for sanity checking. - ReferenceSequence contig_seq = context.getReferenceContig(); - if ( read.getReferenceName() != cur_contig) { - cur_contig = read.getReferenceName(); - out.println("Contig "+cur_contig); - String refstr = new String(contig_seq.getBases()); - if ( !CONTROL_RUN ) - pileBuilder.setReferenceSequence(refstr); - out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString()); - } - */ - - // if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now - - if ( MAX_READ_LENGTH > 0 && read.getReadLength() > MAX_READ_LENGTH ) { - ptWriter.receive(read); - discarded_long_read_count++; - return 0; - } - - // we currently do not know how to deal with cigars containing elements other than M,I,D, so - // let's just skip the reads that contain those other elements (clipped reads?) - Cigar c = read.getCigar(); - boolean cigar_acceptable = true; - for ( int z = 0 ; z < c.numCigarElements() ; z++ ) { - CigarElement ce = c.getCigarElement(z); - switch ( ce.getOperator() ) { - case M: - case I: - case D: break; - default: - cigar_acceptable = false; - } - } - if ( ! cigar_acceptable ) { - ptWriter.receive(read); - discarded_cigar_count++; - return 0; - } - - int err = -1; -/* - System.out.println("MM: "+numMismatches(r)); - System.out.println("direct: "+numMismatchesDirect(r,contig_seq)); - System.out.print(" "); - for ( int i = read.getAlignmentStart() - 1 ; i < read.getAlignmentEnd() ; i++ ) System.out.print((char)contig_seq.getBases()[i]); - System.out.println(); - System.out.println((read.getReadNegativeStrandFlag()?"<-":"->")+read.getReadString()); - System.out.println("cigar: "+read.getCigarString()); - System.out.println(); - if (counter++ == 20 ) break; - continue; -*/ - - if ( ERR_MODE != null ) { - if ( ERR_MODE.equals("MM")) err = numMismatches(read,ref); - else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,ref); - else if ( ERR_MODE.equals("ERR")) err = numErrors(read,ref); - else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,ref); - if ( err > MAX_ERRS ) { - ptWriter.receive(read); - discarded_maxerr_count++; - return 0; - } - } - - collector.receive(read); - return 1; - } - - /** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD - * - * @param r SAM record that must specify an alignment - * @return number of errors (number of mismatches plus total length of all insertions/deletions - * @throws RuntimeException - */ - private static int numErrors(SAMRecord r, char[] ref) throws RuntimeException { - - // NM currently stores the total number of mismatches in all blocks + 1 - int errs = numMismatches(r,ref); - - // now we have to add the total length of all indels: - Cigar c = r.getCigar(); - for ( int i = 0 ; i < c.numCigarElements() ; i++ ) { - CigarElement ce = c.getCigarElement(i); - switch( ce.getOperator()) { - case M : break; // we already have correct number of mismatches - case I : - case D : - errs += ce.getLength(); - break; - default: throw new RuntimeException("Unrecognized cigar element"); - } - } - return errs; - } - - /** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD - * - * @param r SAM record that must specify an alignment - * @return number of errors (number of mismatches plus total number of all insertions/deletions (each insertion or - * deletion will be counted as a single error regardless of the length) - * @throws RuntimeException - */ - private static int numMismatchesGaps(SAMRecord r,char[] ref) throws RuntimeException { - - // NM currently stores the total number of mismatches in all blocks + 1 - int errs = numMismatches(r,ref); - - // now we have to add the total length of all indels: - Cigar c = r.getCigar(); - for ( int i = 0 ; i < c.numCigarElements() ; i++ ) { - CigarElement ce = c.getCigarElement(i); - switch( ce.getOperator()) { - case M : break; // we already have correct number of mismatches - case I : - case D : - errs++; - break; - default: throw new RuntimeException("Unrecognized cigar element"); - } - } - return errs; - } - - /** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */ - private static int numMismatches(SAMRecord r, char[] refseq) throws RuntimeException { - - // NM currently stores the total number of mismatches in all blocks + 1 - Integer i = (Integer)r.getAttribute("NM"); - if ( i == null ) return AlignmentUtils.numMismatches(r,refseq); - return ((Integer)r.getAttribute("NM")).intValue() - 1; - - } - - // Given result of map function - public Integer reduceInit() { return 0; } - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - public void onTraversalDone(Integer result) { - collector.close(); // write the reads collector might be still holding - - if ( ! CONTROL_RUN ) { - pileBuilder.printStats(); - pileBuilder.close(); - } - out.println("done."); - out.println("Total reads processed: "+ total_reads); - out.println("Unmodified: "+ ptWriter.getNumReadsReceived()); - if ( pileBuilder != null) out.println("Written into realigned piles: "+ pileBuilder.getNumReadsWritten()); - if ( pileBuilder != null) out.println("Received for realignment: "+ pileBuilder.getNumReadsReceived()); - out.println("Discarded reads with non-M,I,D cigar elements: "+ discarded_cigar_count); - out.println("Discarded long reads (above "+MAX_READ_LENGTH+" bp): "+ discarded_long_read_count); - out.println("Discarded reads with error counts exceeding the threshold: "+ discarded_maxerr_count); - out.println("Unmapped reads (passed through to the output): "+ unmapped_read_count); - out.println(); - collector.printLengthHistograms(); - ptWriter.close(); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java index 0622a50f8..87ab360af 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -56,7 +56,6 @@ public class ArtificialReadsTraversal extends TraversalEngine { /** Creates a new, uninitialized ArtificialReadsTraversal */ public ArtificialReadsTraversal() { - super(null, null, null); } // what read ordering are we using diff --git a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java index c96395bbc..a09e7b1d7 100755 --- a/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java +++ b/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsTest.java @@ -7,8 +7,6 @@ import org.broadinstitute.sting.gatk.datasources.shards.Shard; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy; import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.CountReadsWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -98,9 +96,8 @@ public class TraverseReadsTest extends BaseTest { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. fail("Couldn't set the walkers printstream"); } - List> rods = new ArrayList>(); - - traversalEngine = new TraverseReads(bamList, refFile, rods); + + traversalEngine = new TraverseReads(); }