Command-line arguments for SamReadFilters.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2014 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a13cbe1df0
commit
2e4782f202
|
|
@ -11,10 +11,9 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescripto
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Arrays;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -77,16 +76,19 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
|
||||
protected Object executeGATK() {
|
||||
Walker<?,?> mWalker = GATKEngine.getWalkerByName(getAnalysisName());
|
||||
Collection<SamRecordFilter> filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),mWalker);
|
||||
|
||||
// load the arguments into the walkers
|
||||
// load the arguments into the walker / filters.
|
||||
loadArgumentsIntoObject(mWalker);
|
||||
for(SamRecordFilter filter: filters)
|
||||
loadArgumentsIntoObject(filter);
|
||||
|
||||
// process any arguments that need a second pass
|
||||
GATKArgumentCollection arguments = getArgumentCollection();
|
||||
processArguments(arguments);
|
||||
|
||||
// set the analysis name in the argument collection
|
||||
return GATKEngine.execute(arguments, mWalker);
|
||||
return GATKEngine.execute(arguments, mWalker, filters);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -118,7 +120,18 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
protected Class[] getArgumentSources() {
|
||||
// No walker info? No plugins.
|
||||
if (getAnalysisName() == null) return new Class[] {};
|
||||
return new Class[] { GATKEngine.getWalkerByName(getAnalysisName()).getClass() };
|
||||
|
||||
Collection<Class> argumentSources = new ArrayList<Class>();
|
||||
|
||||
Walker walker = GATKEngine.getWalkerByName(getAnalysisName());
|
||||
argumentSources.add(walker.getClass());
|
||||
|
||||
Collection<SamRecordFilter> filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),walker);
|
||||
for(SamRecordFilter filter: filters)
|
||||
argumentSources.add(filter.getClass());
|
||||
|
||||
Class[] argumentSourcesAsArray = new Class[argumentSources.size()];
|
||||
return argumentSources.toArray(argumentSourcesAsArray);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ public class GenomeAnalysisEngine {
|
|||
* @param my_walker Walker to run over the dataset. Must not be null.
|
||||
* @return the value of this traversal.
|
||||
*/
|
||||
public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker) {
|
||||
public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker, Collection<SamRecordFilter> filters) {
|
||||
// validate our parameters
|
||||
if (args == null) {
|
||||
throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
|
||||
|
|
@ -137,7 +137,7 @@ public class GenomeAnalysisEngine {
|
|||
this.argCollection = args;
|
||||
|
||||
// Prepare the data for traversal.
|
||||
initializeDataSources(my_walker, argCollection);
|
||||
initializeDataSources(my_walker, filters, argCollection);
|
||||
|
||||
// our microscheduler, which is in charge of running everything
|
||||
MicroScheduler microScheduler = createMicroscheduler(my_walker);
|
||||
|
|
@ -203,10 +203,26 @@ public class GenomeAnalysisEngine {
|
|||
return walkerManager.getName(walkerType);
|
||||
}
|
||||
|
||||
private void initializeDataSources(Walker my_walker, GATKArgumentCollection argCollection) {
|
||||
/**
|
||||
* Gets a list of the filters to associate with the given walker. Will NOT initialize the engine with this filters;
|
||||
* the caller must handle that directly.
|
||||
* @param walker Walker to use when determining which filters to apply.
|
||||
* @return A collection of available filters.
|
||||
*/
|
||||
public Collection<SamRecordFilter> getFiltersForWalker(GATKArgumentCollection args, Walker walker) {
|
||||
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
|
||||
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
|
||||
if (args.filterZeroMappingQualityReads != null && args.filterZeroMappingQualityReads)
|
||||
filters.add(new ZeroMappingQualityReadFilter());
|
||||
for(String filterName: args.readFilters)
|
||||
filters.add(filterManager.createByName(filterName));
|
||||
return Collections.unmodifiableSet(filters);
|
||||
}
|
||||
|
||||
private void initializeDataSources(Walker my_walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
|
||||
validateSuppliedReadsAgainstWalker(my_walker, argCollection);
|
||||
logger.info("Strictness is " + argCollection.strictnessLevel);
|
||||
readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, argCollection));
|
||||
readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, filters, argCollection));
|
||||
|
||||
validateSuppliedReferenceAgainstWalker(my_walker, argCollection);
|
||||
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
|
||||
|
|
@ -390,15 +406,7 @@ public class GenomeAnalysisEngine {
|
|||
* @param argCollection The collection of arguments passed to the engine.
|
||||
* @return The reads object providing reads source info.
|
||||
*/
|
||||
private Reads extractSourceInfo(Walker walker, GATKArgumentCollection argCollection) {
|
||||
List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
|
||||
|
||||
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
|
||||
if (argCollection.filterZeroMappingQualityReads != null && argCollection.filterZeroMappingQualityReads)
|
||||
filters.add(new ZeroMappingQualityReadFilter());
|
||||
for(String filterName: argCollection.readFilters)
|
||||
filters.add(filterManager.createByName(filterName));
|
||||
|
||||
private Reads extractSourceInfo(Walker walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
|
||||
return new Reads(argCollection.samFiles,
|
||||
argCollection.strictnessLevel,
|
||||
argCollection.downsampleFraction,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import net.sf.samtools.SAMFileReader;
|
|||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 14, 2009
|
||||
|
|
@ -29,7 +30,7 @@ public class Reads {
|
|||
private Double downsamplingFraction = null;
|
||||
private Integer downsampleToCoverage = null;
|
||||
private Boolean beSafe = null;
|
||||
private List<SamRecordFilter> supplementalFilters = null;
|
||||
private Collection<SamRecordFilter> supplementalFilters = null;
|
||||
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
|
||||
private boolean includeReadsWithDeletionAtLoci = false;
|
||||
|
||||
|
|
@ -91,7 +92,7 @@ public class Reads {
|
|||
return beSafe;
|
||||
}
|
||||
|
||||
public List<SamRecordFilter> getSupplementalFilters() {
|
||||
public Collection<SamRecordFilter> getSupplementalFilters() {
|
||||
return supplementalFilters;
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +121,7 @@ public class Reads {
|
|||
Double downsampleFraction,
|
||||
Integer downsampleCoverage,
|
||||
Boolean beSafe,
|
||||
List<SamRecordFilter> supplementalFilters,
|
||||
Collection<SamRecordFilter> supplementalFilters,
|
||||
int maximumReadsAtLocus,
|
||||
boolean includeReadsWithDeletionAtLoci) {
|
||||
this.readsFiles = samFiles;
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -419,7 +420,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
StingSAMIterator wrappedIterator,
|
||||
Double downsamplingFraction,
|
||||
Boolean beSafeP,
|
||||
List<SamRecordFilter> supplementalFilters) {
|
||||
Collection<SamRecordFilter> supplementalFilters) {
|
||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||
// as there is no reason to sort something that we will end of throwing away
|
||||
if (downsamplingFraction != null)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
/**
|
||||
* Filters out reads whose length is >= some value.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class MaxReadLengthFilter implements SamRecordFilter {
|
||||
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required=true)
|
||||
private int maxReadLength;
|
||||
|
||||
public boolean filterOut(SAMRecord read) {
|
||||
// check the length
|
||||
return read.getReadLength() > maxReadLength;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -42,8 +42,6 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
/** an optional argument to dump the reads out to a BAM file */
|
||||
@Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
|
||||
SAMFileWriter outputBamFile = null;
|
||||
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false)
|
||||
Integer maxLength = null;
|
||||
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false)
|
||||
String readGroup = null;
|
||||
@Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false)
|
||||
|
|
@ -65,10 +63,6 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
* @return true if the read passes the filter, false if it doesn't
|
||||
*/
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
// check the length
|
||||
if ( maxLength != null && read.getReadLength() > maxLength )
|
||||
return false;
|
||||
|
||||
// check the read group
|
||||
if ( readGroup != null ) {
|
||||
SAMReadGroupRecord myReadGroup = read.getReadGroup();
|
||||
|
|
|
|||
Loading…
Reference in New Issue