diff --git a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index e0e1b3892..5ce4985dc 100644 --- a/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -11,10 +11,9 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescripto import java.io.File; import java.io.FileNotFoundException; -import java.util.List; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Arrays; +import java.util.*; + +import net.sf.picard.filter.SamRecordFilter; /* * Copyright (c) 2009 The Broad Institute @@ -77,16 +76,19 @@ public abstract class CommandLineExecutable extends CommandLineProgram { protected Object executeGATK() { Walker mWalker = GATKEngine.getWalkerByName(getAnalysisName()); + Collection filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),mWalker); - // load the arguments into the walkers + // load the arguments into the walker / filters. loadArgumentsIntoObject(mWalker); + for(SamRecordFilter filter: filters) + loadArgumentsIntoObject(filter); // process any arguments that need a second pass GATKArgumentCollection arguments = getArgumentCollection(); processArguments(arguments); // set the analysis name in the argument collection - return GATKEngine.execute(arguments, mWalker); + return GATKEngine.execute(arguments, mWalker, filters); } /** @@ -118,7 +120,18 @@ public abstract class CommandLineExecutable extends CommandLineProgram { protected Class[] getArgumentSources() { // No walker info? No plugins. if (getAnalysisName() == null) return new Class[] {}; - return new Class[] { GATKEngine.getWalkerByName(getAnalysisName()).getClass() }; + + Collection argumentSources = new ArrayList(); + + Walker walker = GATKEngine.getWalkerByName(getAnalysisName()); + argumentSources.add(walker.getClass()); + + Collection filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),walker); + for(SamRecordFilter filter: filters) + argumentSources.add(filter.getClass()); + + Class[] argumentSourcesAsArray = new Class[argumentSources.size()]; + return argumentSources.toArray(argumentSourcesAsArray); } @Override diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 5d8297a4e..aef85d491 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -123,7 +123,7 @@ public class GenomeAnalysisEngine { * @param my_walker Walker to run over the dataset. Must not be null. * @return the value of this traversal. */ - public Object execute(GATKArgumentCollection args, Walker my_walker) { + public Object execute(GATKArgumentCollection args, Walker my_walker, Collection filters) { // validate our parameters if (args == null) { throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null."); @@ -137,7 +137,7 @@ public class GenomeAnalysisEngine { this.argCollection = args; // Prepare the data for traversal. - initializeDataSources(my_walker, argCollection); + initializeDataSources(my_walker, filters, argCollection); // our microscheduler, which is in charge of running everything MicroScheduler microScheduler = createMicroscheduler(my_walker); @@ -203,10 +203,26 @@ public class GenomeAnalysisEngine { return walkerManager.getName(walkerType); } - private void initializeDataSources(Walker my_walker, GATKArgumentCollection argCollection) { + /** + * Gets a list of the filters to associate with the given walker. Will NOT initialize the engine with this filters; + * the caller must handle that directly. + * @param walker Walker to use when determining which filters to apply. + * @return A collection of available filters. + */ + public Collection getFiltersForWalker(GATKArgumentCollection args, Walker walker) { + Set filters = new HashSet(); + filters.addAll(WalkerManager.getReadFilters(walker,filterManager)); + if (args.filterZeroMappingQualityReads != null && args.filterZeroMappingQualityReads) + filters.add(new ZeroMappingQualityReadFilter()); + for(String filterName: args.readFilters) + filters.add(filterManager.createByName(filterName)); + return Collections.unmodifiableSet(filters); + } + + private void initializeDataSources(Walker my_walker, Collection filters, GATKArgumentCollection argCollection) { validateSuppliedReadsAgainstWalker(my_walker, argCollection); logger.info("Strictness is " + argCollection.strictnessLevel); - readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, argCollection)); + readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, filters, argCollection)); validateSuppliedReferenceAgainstWalker(my_walker, argCollection); referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); @@ -390,15 +406,7 @@ public class GenomeAnalysisEngine { * @param argCollection The collection of arguments passed to the engine. * @return The reads object providing reads source info. */ - private Reads extractSourceInfo(Walker walker, GATKArgumentCollection argCollection) { - List filters = new ArrayList(); - - filters.addAll(WalkerManager.getReadFilters(walker,filterManager)); - if (argCollection.filterZeroMappingQualityReads != null && argCollection.filterZeroMappingQualityReads) - filters.add(new ZeroMappingQualityReadFilter()); - for(String filterName: argCollection.readFilters) - filters.add(filterManager.createByName(filterName)); - + private Reads extractSourceInfo(Walker walker, Collection filters, GATKArgumentCollection argCollection) { return new Reads(argCollection.samFiles, argCollection.strictnessLevel, argCollection.downsampleFraction, diff --git a/java/src/org/broadinstitute/sting/gatk/Reads.java b/java/src/org/broadinstitute/sting/gatk/Reads.java index 3d8459743..31b42d77a 100755 --- a/java/src/org/broadinstitute/sting/gatk/Reads.java +++ b/java/src/org/broadinstitute/sting/gatk/Reads.java @@ -6,6 +6,7 @@ import net.sf.samtools.SAMFileReader; import java.io.File; import java.util.ArrayList; import java.util.List; +import java.util.Collection; /** * User: hanna * Date: May 14, 2009 @@ -29,7 +30,7 @@ public class Reads { private Double downsamplingFraction = null; private Integer downsampleToCoverage = null; private Boolean beSafe = null; - private List supplementalFilters = null; + private Collection supplementalFilters = null; private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT private boolean includeReadsWithDeletionAtLoci = false; @@ -91,7 +92,7 @@ public class Reads { return beSafe; } - public List getSupplementalFilters() { + public Collection getSupplementalFilters() { return supplementalFilters; } @@ -120,7 +121,7 @@ public class Reads { Double downsampleFraction, Integer downsampleCoverage, Boolean beSafe, - List supplementalFilters, + Collection supplementalFilters, int maximumReadsAtLocus, boolean includeReadsWithDeletionAtLoci) { this.readsFiles = samFiles; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 12d7c7a7e..58c1fd461 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -19,6 +19,7 @@ import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram; import java.io.File; import java.util.List; +import java.util.Collection; /* * Copyright (c) 2009 The Broad Institute @@ -419,7 +420,7 @@ public class SAMDataSource implements SimpleDataSource { StingSAMIterator wrappedIterator, Double downsamplingFraction, Boolean beSafeP, - List supplementalFilters) { + Collection supplementalFilters) { // NOTE: this (and other filtering) should be done before on-the-fly sorting // as there is no reason to sort something that we will end of throwing away if (downsamplingFraction != null) diff --git a/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java new file mode 100644 index 000000000..598794d82 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/filters/MaxReadLengthFilter.java @@ -0,0 +1,22 @@ +package org.broadinstitute.sting.gatk.filters; + +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.cmdLine.Argument; + +/** + * Filters out reads whose length is >= some value. + * + * @author mhanna + * @version 0.1 + */ +public class MaxReadLengthFilter implements SamRecordFilter { + @Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required=true) + private int maxReadLength; + + public boolean filterOut(SAMRecord read) { + // check the length + return read.getReadLength() > maxReadLength; + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 25e1c50b1..8e625e9b8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -42,8 +42,6 @@ public class PrintReadsWalker extends ReadWalker { /** an optional argument to dump the reads out to a BAM file */ @Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false) SAMFileWriter outputBamFile = null; - @Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false) - Integer maxLength = null; @Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false) String readGroup = null; @Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false) @@ -65,10 +63,6 @@ public class PrintReadsWalker extends ReadWalker { * @return true if the read passes the filter, false if it doesn't */ public boolean filter(char[] ref, SAMRecord read) { - // check the length - if ( maxLength != null && read.getReadLength() > maxLength ) - return false; - // check the read group if ( readGroup != null ) { SAMReadGroupRecord myReadGroup = read.getReadGroup();