Filter that discards reads from specific lanes; and also its friend that helps blacklisting a set of lanes from GATK command line a one-liner.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1681 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
db9390811a
commit
57d31b8e9b
|
|
@ -0,0 +1,65 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Sep 21, 2009
|
||||
* Time: 2:54:23 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PlatformUnitFilter implements SamRecordFilter {
|
||||
// a hack: use static in order to be able to fill it with the data from command line at runtime
|
||||
static private Set<String> blackListedLanes = new HashSet<String>();
|
||||
|
||||
public boolean filterOut(SAMRecord samRecord) {
|
||||
|
||||
if ( blackListedLanes.size() == 0 ) return false; // no filters set, nothing to do
|
||||
|
||||
Object pu_attr = samRecord.getAttribute("PU");
|
||||
|
||||
if ( pu_attr == null ) {
|
||||
// no platform unit in the record, go get the header if we have at least read group
|
||||
final String rgId = (String)samRecord.getAttribute("RG");
|
||||
if (rgId == null) return false; // we do not have read group either, can not filter
|
||||
|
||||
SAMReadGroupRecord rgr = samRecord.getHeader().getReadGroup(rgId);
|
||||
|
||||
pu_attr = rgr.getAttribute("PU") ;
|
||||
}
|
||||
if ( pu_attr == null ) return false; // could not get PU, forget about the filtering...
|
||||
return blackListedLanes.contains((String)pu_attr);
|
||||
}
|
||||
|
||||
/**
|
||||
* The argument is interpreted as a comma-separated list of lanes (platform units) to be filtered
|
||||
* out. All the specified names will be registered with the filter and filterOut(r) for any SAMRecord r
|
||||
* belonging to one of the specified lanes will thereafter return true.
|
||||
* The names can be surrounded by additional spaces, the latters will be trimmed by this method.
|
||||
* This method can be called multiple times to add more lanes. Re-registering the same lane again is safe.
|
||||
* @param arg
|
||||
*/
|
||||
public static void setBlackListedLanes(String arg) {
|
||||
String[] lanes = arg.split(",");
|
||||
for ( int i = 0; i < lanes.length ; i++ ) {
|
||||
blackListedLanes.add(lanes[i].trim());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a single name of a lane (platform unit) to be filtered out by this filter. The name can be surrounded
|
||||
* by spaces, the latters will be trimmed out. This method can be called multiple times to add more lanes.
|
||||
* Re-registering the same lane again is safe.
|
||||
* @param arg
|
||||
*/
|
||||
public static void addBlackListedLane(String arg) {
|
||||
blackListedLanes.add(arg.trim());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* This is a utility class, its sole purpose is to populate PlatformUnitFilter with data. When a command line argument
|
||||
* (@Argument) of the type PlatformUnitFilterHelper is declared in an application (walker), its constuctor
|
||||
* PlatformUnitFilterHelper(String) automatically called by the argument system will parse its String argument
|
||||
* and set up static fields of PlatformUnitFilter object.
|
||||
*
|
||||
* The String argument can be either a name of existing file, or a list of comma-separated lane (Platform Unit) names.
|
||||
* First, the constructor will check if a file with specified name exists. If it does, then it is assumed that each line
|
||||
* in the file contains one name of a lane (Platfor Unit) to filter out. If such file does not exist, then the argument is
|
||||
* interpreted as a comma-separated list. Blank spaces around lane names are allowed in both cases and will be trimmed out.
|
||||
*
|
||||
* In other words, all it takes to request filtering out reads from specific lane(s) is
|
||||
*
|
||||
* 1) declare filter usage in the walker
|
||||
*
|
||||
* @ReadFilters({PlatformUnitFilter.class,...})
|
||||
*
|
||||
* 2) specify the argument that will take the list of lanes to filter:
|
||||
*
|
||||
* @Argument(fullName="filterLanes", shortName="FL", doc="all specified lanes will be ignored", required=false)
|
||||
* PlatformUnitFilterHelper dummy;
|
||||
*
|
||||
* After that, the walker can be invoked with "--filterLanes 302UBAAXX090508.8,302YAAAXX090427.8" argument.
|
||||
*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Sep 22, 2009
|
||||
* Time: 11:11:48 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PlatformUnitFilterHelper {
|
||||
|
||||
public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$");
|
||||
|
||||
public PlatformUnitFilterHelper(String arg) {
|
||||
File f = new File(arg);
|
||||
|
||||
if ( f.exists() ) {
|
||||
try {
|
||||
xReadLines reader = new xReadLines(f);
|
||||
for ( String line : reader ) {
|
||||
if ( EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty lines
|
||||
PlatformUnitFilter.addBlackListedLane(line); // PlatformUnitFilter will trim the line as needed
|
||||
}
|
||||
} catch ( FileNotFoundException e) { throw new StingException("File " + f + " does not exist."); } // this should NEVER happen
|
||||
return;
|
||||
}
|
||||
|
||||
// no such file, must be a comma-separated list:
|
||||
|
||||
PlatformUnitFilter.setBlackListedLanes(arg); // PlatformUnitFilter will split on commas and trim as needed
|
||||
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue