Command-line arguments for SamReadFilters.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2014 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a13cbe1df0
commit
2e4782f202
|
|
@ -11,10 +11,9 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescripto
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.util.List;
|
import java.util.*;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -77,16 +76,19 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
|
|
||||||
protected Object executeGATK() {
|
protected Object executeGATK() {
|
||||||
Walker<?,?> mWalker = GATKEngine.getWalkerByName(getAnalysisName());
|
Walker<?,?> mWalker = GATKEngine.getWalkerByName(getAnalysisName());
|
||||||
|
Collection<SamRecordFilter> filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),mWalker);
|
||||||
|
|
||||||
// load the arguments into the walkers
|
// load the arguments into the walker / filters.
|
||||||
loadArgumentsIntoObject(mWalker);
|
loadArgumentsIntoObject(mWalker);
|
||||||
|
for(SamRecordFilter filter: filters)
|
||||||
|
loadArgumentsIntoObject(filter);
|
||||||
|
|
||||||
// process any arguments that need a second pass
|
// process any arguments that need a second pass
|
||||||
GATKArgumentCollection arguments = getArgumentCollection();
|
GATKArgumentCollection arguments = getArgumentCollection();
|
||||||
processArguments(arguments);
|
processArguments(arguments);
|
||||||
|
|
||||||
// set the analysis name in the argument collection
|
// set the analysis name in the argument collection
|
||||||
return GATKEngine.execute(arguments, mWalker);
|
return GATKEngine.execute(arguments, mWalker, filters);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -118,7 +120,18 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
protected Class[] getArgumentSources() {
|
protected Class[] getArgumentSources() {
|
||||||
// No walker info? No plugins.
|
// No walker info? No plugins.
|
||||||
if (getAnalysisName() == null) return new Class[] {};
|
if (getAnalysisName() == null) return new Class[] {};
|
||||||
return new Class[] { GATKEngine.getWalkerByName(getAnalysisName()).getClass() };
|
|
||||||
|
Collection<Class> argumentSources = new ArrayList<Class>();
|
||||||
|
|
||||||
|
Walker walker = GATKEngine.getWalkerByName(getAnalysisName());
|
||||||
|
argumentSources.add(walker.getClass());
|
||||||
|
|
||||||
|
Collection<SamRecordFilter> filters = GATKEngine.getFiltersForWalker(getArgumentCollection(),walker);
|
||||||
|
for(SamRecordFilter filter: filters)
|
||||||
|
argumentSources.add(filter.getClass());
|
||||||
|
|
||||||
|
Class[] argumentSourcesAsArray = new Class[argumentSources.size()];
|
||||||
|
return argumentSources.toArray(argumentSourcesAsArray);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,7 @@ public class GenomeAnalysisEngine {
|
||||||
* @param my_walker Walker to run over the dataset. Must not be null.
|
* @param my_walker Walker to run over the dataset. Must not be null.
|
||||||
* @return the value of this traversal.
|
* @return the value of this traversal.
|
||||||
*/
|
*/
|
||||||
public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker) {
|
public Object execute(GATKArgumentCollection args, Walker<?, ?> my_walker, Collection<SamRecordFilter> filters) {
|
||||||
// validate our parameters
|
// validate our parameters
|
||||||
if (args == null) {
|
if (args == null) {
|
||||||
throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
|
throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
|
||||||
|
|
@ -137,7 +137,7 @@ public class GenomeAnalysisEngine {
|
||||||
this.argCollection = args;
|
this.argCollection = args;
|
||||||
|
|
||||||
// Prepare the data for traversal.
|
// Prepare the data for traversal.
|
||||||
initializeDataSources(my_walker, argCollection);
|
initializeDataSources(my_walker, filters, argCollection);
|
||||||
|
|
||||||
// our microscheduler, which is in charge of running everything
|
// our microscheduler, which is in charge of running everything
|
||||||
MicroScheduler microScheduler = createMicroscheduler(my_walker);
|
MicroScheduler microScheduler = createMicroscheduler(my_walker);
|
||||||
|
|
@ -203,10 +203,26 @@ public class GenomeAnalysisEngine {
|
||||||
return walkerManager.getName(walkerType);
|
return walkerManager.getName(walkerType);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initializeDataSources(Walker my_walker, GATKArgumentCollection argCollection) {
|
/**
|
||||||
|
* Gets a list of the filters to associate with the given walker. Will NOT initialize the engine with this filters;
|
||||||
|
* the caller must handle that directly.
|
||||||
|
* @param walker Walker to use when determining which filters to apply.
|
||||||
|
* @return A collection of available filters.
|
||||||
|
*/
|
||||||
|
public Collection<SamRecordFilter> getFiltersForWalker(GATKArgumentCollection args, Walker walker) {
|
||||||
|
Set<SamRecordFilter> filters = new HashSet<SamRecordFilter>();
|
||||||
|
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
|
||||||
|
if (args.filterZeroMappingQualityReads != null && args.filterZeroMappingQualityReads)
|
||||||
|
filters.add(new ZeroMappingQualityReadFilter());
|
||||||
|
for(String filterName: args.readFilters)
|
||||||
|
filters.add(filterManager.createByName(filterName));
|
||||||
|
return Collections.unmodifiableSet(filters);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeDataSources(Walker my_walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
|
||||||
validateSuppliedReadsAgainstWalker(my_walker, argCollection);
|
validateSuppliedReadsAgainstWalker(my_walker, argCollection);
|
||||||
logger.info("Strictness is " + argCollection.strictnessLevel);
|
logger.info("Strictness is " + argCollection.strictnessLevel);
|
||||||
readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, argCollection));
|
readsDataSource = createReadsDataSource(extractSourceInfo(my_walker, filters, argCollection));
|
||||||
|
|
||||||
validateSuppliedReferenceAgainstWalker(my_walker, argCollection);
|
validateSuppliedReferenceAgainstWalker(my_walker, argCollection);
|
||||||
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
|
referenceDataSource = openReferenceSequenceFile(argCollection.referenceFile);
|
||||||
|
|
@ -390,15 +406,7 @@ public class GenomeAnalysisEngine {
|
||||||
* @param argCollection The collection of arguments passed to the engine.
|
* @param argCollection The collection of arguments passed to the engine.
|
||||||
* @return The reads object providing reads source info.
|
* @return The reads object providing reads source info.
|
||||||
*/
|
*/
|
||||||
private Reads extractSourceInfo(Walker walker, GATKArgumentCollection argCollection) {
|
private Reads extractSourceInfo(Walker walker, Collection<SamRecordFilter> filters, GATKArgumentCollection argCollection) {
|
||||||
List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
|
|
||||||
|
|
||||||
filters.addAll(WalkerManager.getReadFilters(walker,filterManager));
|
|
||||||
if (argCollection.filterZeroMappingQualityReads != null && argCollection.filterZeroMappingQualityReads)
|
|
||||||
filters.add(new ZeroMappingQualityReadFilter());
|
|
||||||
for(String filterName: argCollection.readFilters)
|
|
||||||
filters.add(filterManager.createByName(filterName));
|
|
||||||
|
|
||||||
return new Reads(argCollection.samFiles,
|
return new Reads(argCollection.samFiles,
|
||||||
argCollection.strictnessLevel,
|
argCollection.strictnessLevel,
|
||||||
argCollection.downsampleFraction,
|
argCollection.downsampleFraction,
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import net.sf.samtools.SAMFileReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Collection;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: May 14, 2009
|
* Date: May 14, 2009
|
||||||
|
|
@ -29,7 +30,7 @@ public class Reads {
|
||||||
private Double downsamplingFraction = null;
|
private Double downsamplingFraction = null;
|
||||||
private Integer downsampleToCoverage = null;
|
private Integer downsampleToCoverage = null;
|
||||||
private Boolean beSafe = null;
|
private Boolean beSafe = null;
|
||||||
private List<SamRecordFilter> supplementalFilters = null;
|
private Collection<SamRecordFilter> supplementalFilters = null;
|
||||||
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
|
private int maximumReadsAtLocus = Integer.MAX_VALUE; // this should always be set, so we'll default it MAX_INT
|
||||||
private boolean includeReadsWithDeletionAtLoci = false;
|
private boolean includeReadsWithDeletionAtLoci = false;
|
||||||
|
|
||||||
|
|
@ -91,7 +92,7 @@ public class Reads {
|
||||||
return beSafe;
|
return beSafe;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<SamRecordFilter> getSupplementalFilters() {
|
public Collection<SamRecordFilter> getSupplementalFilters() {
|
||||||
return supplementalFilters;
|
return supplementalFilters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -120,7 +121,7 @@ public class Reads {
|
||||||
Double downsampleFraction,
|
Double downsampleFraction,
|
||||||
Integer downsampleCoverage,
|
Integer downsampleCoverage,
|
||||||
Boolean beSafe,
|
Boolean beSafe,
|
||||||
List<SamRecordFilter> supplementalFilters,
|
Collection<SamRecordFilter> supplementalFilters,
|
||||||
int maximumReadsAtLocus,
|
int maximumReadsAtLocus,
|
||||||
boolean includeReadsWithDeletionAtLoci) {
|
boolean includeReadsWithDeletionAtLoci) {
|
||||||
this.readsFiles = samFiles;
|
this.readsFiles = samFiles;
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@ import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -419,7 +420,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
StingSAMIterator wrappedIterator,
|
StingSAMIterator wrappedIterator,
|
||||||
Double downsamplingFraction,
|
Double downsamplingFraction,
|
||||||
Boolean beSafeP,
|
Boolean beSafeP,
|
||||||
List<SamRecordFilter> supplementalFilters) {
|
Collection<SamRecordFilter> supplementalFilters) {
|
||||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||||
// as there is no reason to sort something that we will end of throwing away
|
// as there is no reason to sort something that we will end of throwing away
|
||||||
if (downsamplingFraction != null)
|
if (downsamplingFraction != null)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
package org.broadinstitute.sting.gatk.filters;
|
||||||
|
|
||||||
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filters out reads whose length is >= some value.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class MaxReadLengthFilter implements SamRecordFilter {
|
||||||
|
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required=true)
|
||||||
|
private int maxReadLength;
|
||||||
|
|
||||||
|
public boolean filterOut(SAMRecord read) {
|
||||||
|
// check the length
|
||||||
|
return read.getReadLength() > maxReadLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -42,8 +42,6 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
/** an optional argument to dump the reads out to a BAM file */
|
/** an optional argument to dump the reads out to a BAM file */
|
||||||
@Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
|
@Argument(fullName = "outputBamFile", shortName = "of", doc = "Write output to this BAM filename instead of STDOUT", required = false)
|
||||||
SAMFileWriter outputBamFile = null;
|
SAMFileWriter outputBamFile = null;
|
||||||
@Argument(fullName = "maxReadLength", shortName = "maxRead", doc="Discard reads with length greater than the specified value", required = false)
|
|
||||||
Integer maxLength = null;
|
|
||||||
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false)
|
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Discard reads not belonging to the specified read group", required = false)
|
||||||
String readGroup = null;
|
String readGroup = null;
|
||||||
@Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false)
|
@Argument(fullName = "platform", shortName = "platform", doc="Discard reads not generated by the specified platform", required = false)
|
||||||
|
|
@ -65,10 +63,6 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
* @return true if the read passes the filter, false if it doesn't
|
* @return true if the read passes the filter, false if it doesn't
|
||||||
*/
|
*/
|
||||||
public boolean filter(char[] ref, SAMRecord read) {
|
public boolean filter(char[] ref, SAMRecord read) {
|
||||||
// check the length
|
|
||||||
if ( maxLength != null && read.getReadLength() > maxLength )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// check the read group
|
// check the read group
|
||||||
if ( readGroup != null ) {
|
if ( readGroup != null ) {
|
||||||
SAMReadGroupRecord myReadGroup = read.getReadGroup();
|
SAMReadGroupRecord myReadGroup = read.getReadGroup();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue