Command-line arguments for SamReadFilters.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2014 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-11-10 23:36:17 +00:00
parent a13cbe1df0
commit 2e4782f202
6 changed files with 69 additions and 30 deletions

View File

@ -11,10 +11,9 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescripto
import java.io.File;
import java.io.FileNotFoundException;
import java.util.List;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Arrays;
import java.util.*;
import net.sf.picard.filter.SamRecordFilter;
/*
* Copyright (c) 2009 The Broad Institute
@ -77,16 +76,19 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
protected Object executeGATK() {
Walker<?,?> mWalker = GATKEngine.getWalkerByName(getAnalysisName());
Collection<SamRecordFilter> filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),mWalker);
// load the arguments into the walkers
// load the arguments into the walker / filters.
loadArgumentsIntoObject(mWalker);
for(SamRecordFilter filter: filters)
loadArgumentsIntoObject(filter);
// process any arguments that need a second pass
GATKArgumentCollection arguments = getArgumentCollection();
processArguments(arguments);
// set the analysis name in the argument collection
return GATKEngine.execute(arguments, mWalker);
return GATKEngine.execute(arguments, mWalker, filters);
}
/**
@ -118,7 +120,18 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
protected Class[] getArgumentSources() {
// No walker info? No plugins.
if (getAnalysisName() == null) return new Class[] {};
return new Class[] { GATKEngine.getWalkerByName(getAnalysisName()).getClass() };
Collection<Class> argumentSources = new ArrayList<Class>();
Walker walker = GATKEngine.getWalkerByName(getAnalysisName());
argumentSources.add(walker.getClass());
Collection<SamRecordFilter> filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),walker);
for(SamRecordFilter filter: filters)
argumentSources.add(filter.getClass());
Class[] argumentSourcesAsArray = new Class[argumentSources.size()];
return argumentSources.toArray(argumentSourcesAsArray);
}
@Override

View File

@ -123,7 +123,7 @@ public class GenomeAnalysisEngine {
* @param my_walker Walker to run over the dataset. Must not be null.
* @return the value of this traversal.
*/
public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker) {
public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker, Collection<SamRecordFilter> filters) {
// validate our parameters
if (args == null) {
throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
@ -137,7 +137,7 @@ public class GenomeAnalysisEngine {
this.argCollection = args;
// Prepare the data for traversal.
initializeDataSources(my_walker, argCollection);
initializeDataSources(my_walker, filters, argCollection);
// our microscheduler, which is in charge of running everything
MicroScheduler microScheduler = createMicroscheduler(my_walker);
@ -203,10 +203,26 @@ public class GenomeAnalysisEngine {
return walkerManager.getName(walkerType);
}
private void initializeDataSources(Walker my_walker, GATKArgumentCollection argCollection) {
/**
* Gets a list of the filters to associate with the given walker. Will NOT initialize the engine with this filters;
* the caller must handle that directly.
* @param walker Walker to use when determining which filters to apply.
* @return A collection of available filters.
*/
public Collection<SamRecordFilter> getFiltersForWalker(GATKArgumentCollection args, Walker walker) {
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
if (args.filterZeroMappingQualityReads != null && args.filterZeroMappingQualityReads)
filters.add(new ZeroMappingQualityReadFilter());
for(String filterName: args.readFilters)
filters.add(filterManager.createByName(filterName));
return Collections.unmodifiableSet(filters);
}
private void initializeDataSources(Walker my_walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
validateSuppliedReadsAgainstWalker(my_walker, argCollection);
logger.info("Strictness is " + argCollection.strictnessLevel);
readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, argCollection));
readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, filters, argCollection));
validateSuppliedReferenceAgainstWalker(my_walker, argCollection);
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
@ -390,15 +406,7 @@ public class GenomeAnalysisEngine {
* @param argCollection The collection of arguments passed to the engine.
* @return The reads object providing reads source info.
*/
private Reads extractSourceInfo(Walker walker, GATKArgumentCollection argCollection) {
List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
if (argCollection.filterZeroMappingQualityReads != null && argCollection.filterZeroMappingQualityReads)
filters.add(new ZeroMappingQualityReadFilter());
for(String filterName: argCollection.readFilters)
filters.add(filterManager.createByName(filterName));
private Reads extractSourceInfo(Walker walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
return new Reads(argCollection.samFiles,
argCollection.strictnessLevel,
argCollection.downsampleFraction,

View File

@ -6,6 +6,7 @@ import net.sf.samtools.SAMFileReader;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
/**
* User: hanna
* Date: May 14, 2009
@ -29,7 +30,7 @@ public class Reads {
private Double downsamplingFraction = null;
private Integer downsampleToCoverage = null;
private Boolean beSafe = null;
private List<SamRecordFilter> supplementalFilters = null;
private Collection<SamRecordFilter> supplementalFilters = null;
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
private boolean includeReadsWithDeletionAtLoci = false;
@ -91,7 +92,7 @@ public class Reads {
return beSafe;
}
public List<SamRecordFilter> getSupplementalFilters() {
public Collection<SamRecordFilter> getSupplementalFilters() {
return supplementalFilters;
}
@ -120,7 +121,7 @@ public class Reads {
Double downsampleFraction,
Integer downsampleCoverage,
Boolean beSafe,
List<SamRecordFilter> supplementalFilters,
Collection<SamRecordFilter> supplementalFilters,
int maximumReadsAtLocus,
boolean includeReadsWithDeletionAtLoci) {
this.readsFiles = samFiles;

View File

@ -19,6 +19,7 @@ import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.io.File;
import java.util.List;
import java.util.Collection;
/*
* Copyright (c) 2009 The Broad Institute
@ -419,7 +420,7 @@ public class SAMDataSource implements SimpleDataSource {
StingSAMIterator wrappedIterator,
Double downsamplingFraction,
Boolean beSafeP,
List<SamRecordFilter> supplementalFilters) {
Collection<SamRecordFilter> supplementalFilters) {
// NOTE: this (and other filtering) should be done before on-the-fly sorting
// as there is no reason to sort something that we will end of throwing away
if (downsamplingFraction != null)

View File

@ -0,0 +1,22 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.cmdLine.Argument;
/**
* Filters out reads whose length is >= some value.
*
* @author mhanna
* @version 0.1
*/
public class MaxReadLengthFilter implements SamRecordFilter {
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required=true)
private int maxReadLength;
public boolean filterOut(SAMRecord read) {
// check the length
return read.getReadLength() > maxReadLength;
}
}

View File

@ -42,8 +42,6 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** an optional argument to dump the reads out to a BAM file */
@Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
SAMFileWriter outputBamFile = null;
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false)
Integer maxLength = null;
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false)
String readGroup = null;
@Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false)
@ -65,10 +63,6 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* @return true if the read passes the filter, false if it doesn't
*/
public boolean filter(char[] ref, SAMRecord read) {
// check the length
if ( maxLength != null && read.getReadLength() > maxLength )
return false;
// check the read group
if ( readGroup != null ) {
SAMReadGroupRecord myReadGroup = read.getReadGroup();