Command-line arguments for SamReadFilters.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2014 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-11-10 23:36:17 +00:00
parent a13cbe1df0
commit 2e4782f202
6 changed files with 69 additions and 30 deletions

View File

@ -11,10 +11,9 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescripto
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.List; import java.util.*;
import java.util.ArrayList;
import java.util.Collection; import net.sf.picard.filter.SamRecordFilter;
import java.util.Arrays;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -77,16 +76,19 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
protected Object executeGATK() { protected Object executeGATK() {
Walker<?,?> mWalker = GATKEngine.getWalkerByName(getAnalysisName()); Walker<?,?> mWalker = GATKEngine.getWalkerByName(getAnalysisName());
Collection<SamRecordFilter> filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),mWalker);
// load the arguments into the walkers // load the arguments into the walker / filters.
loadArgumentsIntoObject(mWalker); loadArgumentsIntoObject(mWalker);
for(SamRecordFilter filter: filters)
loadArgumentsIntoObject(filter);
// process any arguments that need a second pass // process any arguments that need a second pass
GATKArgumentCollection arguments = getArgumentCollection(); GATKArgumentCollection arguments = getArgumentCollection();
processArguments(arguments); processArguments(arguments);
// set the analysis name in the argument collection // set the analysis name in the argument collection
return GATKEngine.execute(arguments, mWalker); return GATKEngine.execute(arguments, mWalker, filters);
} }
/** /**
@ -118,7 +120,18 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
protected Class[] getArgumentSources() { protected Class[] getArgumentSources() {
// No walker info? No plugins. // No walker info? No plugins.
if (getAnalysisName() == null) return new Class[] {}; if (getAnalysisName() == null) return new Class[] {};
return new Class[] { GATKEngine.getWalkerByName(getAnalysisName()).getClass() };
Collection<Class> argumentSources = new ArrayList<Class>();
Walker walker = GATKEngine.getWalkerByName(getAnalysisName());
argumentSources.add(walker.getClass());
Collection<SamRecordFilter> filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),walker);
for(SamRecordFilter filter: filters)
argumentSources.add(filter.getClass());
Class[] argumentSourcesAsArray = new Class[argumentSources.size()];
return argumentSources.toArray(argumentSourcesAsArray);
} }
@Override @Override

View File

@ -123,7 +123,7 @@ public class GenomeAnalysisEngine {
* @param my_walker Walker to run over the dataset. Must not be null. * @param my_walker Walker to run over the dataset. Must not be null.
* @return the value of this traversal. * @return the value of this traversal.
*/ */
public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker) { public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker, Collection<SamRecordFilter> filters) {
// validate our parameters // validate our parameters
if (args == null) { if (args == null) {
throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null."); throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
@ -137,7 +137,7 @@ public class GenomeAnalysisEngine {
this.argCollection = args; this.argCollection = args;
// Prepare the data for traversal. // Prepare the data for traversal.
initializeDataSources(my_walker, argCollection); initializeDataSources(my_walker, filters, argCollection);
// our microscheduler, which is in charge of running everything // our microscheduler, which is in charge of running everything
MicroScheduler microScheduler = createMicroscheduler(my_walker); MicroScheduler microScheduler = createMicroscheduler(my_walker);
@ -203,10 +203,26 @@ public class GenomeAnalysisEngine {
return walkerManager.getName(walkerType); return walkerManager.getName(walkerType);
} }
private void initializeDataSources(Walker my_walker, GATKArgumentCollection argCollection) { /**
* Gets a list of the filters to associate with the given walker. Will NOT initialize the engine with this filters;
* the caller must handle that directly.
* @param walker Walker to use when determining which filters to apply.
* @return A collection of available filters.
*/
public Collection<SamRecordFilter> getFiltersForWalker(GATKArgumentCollection args, Walker walker) {
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
if (args.filterZeroMappingQualityReads != null && args.filterZeroMappingQualityReads)
filters.add(new ZeroMappingQualityReadFilter());
for(String filterName: args.readFilters)
filters.add(filterManager.createByName(filterName));
return Collections.unmodifiableSet(filters);
}
private void initializeDataSources(Walker my_walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
validateSuppliedReadsAgainstWalker(my_walker, argCollection); validateSuppliedReadsAgainstWalker(my_walker, argCollection);
logger.info("Strictness is " + argCollection.strictnessLevel); logger.info("Strictness is " + argCollection.strictnessLevel);
readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, argCollection)); readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, filters, argCollection));
validateSuppliedReferenceAgainstWalker(my_walker, argCollection); validateSuppliedReferenceAgainstWalker(my_walker, argCollection);
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile); referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
@ -390,15 +406,7 @@ public class GenomeAnalysisEngine {
* @param argCollection The collection of arguments passed to the engine. * @param argCollection The collection of arguments passed to the engine.
* @return The reads object providing reads source info. * @return The reads object providing reads source info.
*/ */
private Reads extractSourceInfo(Walker walker, GATKArgumentCollection argCollection) { private Reads extractSourceInfo(Walker walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
if (argCollection.filterZeroMappingQualityReads != null && argCollection.filterZeroMappingQualityReads)
filters.add(new ZeroMappingQualityReadFilter());
for(String filterName: argCollection.readFilters)
filters.add(filterManager.createByName(filterName));
return new Reads(argCollection.samFiles, return new Reads(argCollection.samFiles,
argCollection.strictnessLevel, argCollection.strictnessLevel,
argCollection.downsampleFraction, argCollection.downsampleFraction,

View File

@ -6,6 +6,7 @@ import net.sf.samtools.SAMFileReader;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Collection;
/** /**
* User: hanna * User: hanna
* Date: May 14, 2009 * Date: May 14, 2009
@ -29,7 +30,7 @@ public class Reads {
private Double downsamplingFraction = null; private Double downsamplingFraction = null;
private Integer downsampleToCoverage = null; private Integer downsampleToCoverage = null;
private Boolean beSafe = null; private Boolean beSafe = null;
private List<SamRecordFilter> supplementalFilters = null; private Collection<SamRecordFilter> supplementalFilters = null;
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
private boolean includeReadsWithDeletionAtLoci = false; private boolean includeReadsWithDeletionAtLoci = false;
@ -91,7 +92,7 @@ public class Reads {
return beSafe; return beSafe;
} }
public List<SamRecordFilter> getSupplementalFilters() { public Collection<SamRecordFilter> getSupplementalFilters() {
return supplementalFilters; return supplementalFilters;
} }
@ -120,7 +121,7 @@ public class Reads {
Double downsampleFraction, Double downsampleFraction,
Integer downsampleCoverage, Integer downsampleCoverage,
Boolean beSafe, Boolean beSafe,
List<SamRecordFilter> supplementalFilters, Collection<SamRecordFilter> supplementalFilters,
int maximumReadsAtLocus, int maximumReadsAtLocus,
boolean includeReadsWithDeletionAtLoci) { boolean includeReadsWithDeletionAtLoci) {
this.readsFiles = samFiles; this.readsFiles = samFiles;

View File

@ -19,6 +19,7 @@ import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
import java.io.File; import java.io.File;
import java.util.List; import java.util.List;
import java.util.Collection;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -419,7 +420,7 @@ public class SAMDataSource implements SimpleDataSource {
StingSAMIterator wrappedIterator, StingSAMIterator wrappedIterator,
Double downsamplingFraction, Double downsamplingFraction,
Boolean beSafeP, Boolean beSafeP,
List<SamRecordFilter> supplementalFilters) { Collection<SamRecordFilter> supplementalFilters) {
// NOTE: this (and other filtering) should be done before on-the-fly sorting // NOTE: this (and other filtering) should be done before on-the-fly sorting
// as there is no reason to sort something that we will end of throwing away // as there is no reason to sort something that we will end of throwing away
if (downsamplingFraction != null) if (downsamplingFraction != null)

View File

@ -0,0 +1,22 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.cmdLine.Argument;
/**
* Filters out reads whose length is >= some value.
*
* @author mhanna
* @version 0.1
*/
public class MaxReadLengthFilter implements SamRecordFilter {
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required=true)
private int maxReadLength;
public boolean filterOut(SAMRecord read) {
// check the length
return read.getReadLength() > maxReadLength;
}
}

View File

@ -42,8 +42,6 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** an optional argument to dump the reads out to a BAM file */ /** an optional argument to dump the reads out to a BAM file */
@Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false) @Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
SAMFileWriter outputBamFile = null; SAMFileWriter outputBamFile = null;
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false)
Integer maxLength = null;
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false) @Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false)
String readGroup = null; String readGroup = null;
@Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false) @Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false)
@ -65,10 +63,6 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* @return true if the read passes the filter, false if it doesn't * @return true if the read passes the filter, false if it doesn't
*/ */
public boolean filter(char[] ref, SAMRecord read) { public boolean filter(char[] ref, SAMRecord read) {
// check the length
if ( maxLength != null && read.getReadLength() > maxLength )
return false;
// check the read group // check the read group
if ( readGroup != null ) { if ( readGroup != null ) {
SAMReadGroupRecord myReadGroup = read.getReadGroup(); SAMReadGroupRecord myReadGroup = read.getReadGroup();