Filter that discards reads from specific lanes; and also its friend that helps blacklisting a set of lanes from GATK command line a one-liner.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1681 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2009-09-22 16:46:06 +00:00
parent db9390811a
commit 57d31b8e9b
2 changed files with 128 additions and 0 deletions

View File

@ -0,0 +1,65 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMReadGroupRecord;
import java.util.Set;
import java.util.HashSet;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Sep 21, 2009
* Time: 2:54:23 PM
* To change this template use File | Settings | File Templates.
*/
public class PlatformUnitFilter implements SamRecordFilter {
// a hack: use static in order to be able to fill it with the data from command line at runtime
static private Set<String> blackListedLanes = new HashSet<String>();
public boolean filterOut(SAMRecord samRecord) {
if ( blackListedLanes.size() == 0 ) return false; // no filters set, nothing to do
Object pu_attr = samRecord.getAttribute("PU");
if ( pu_attr == null ) {
// no platform unit in the record, go get the header if we have at least read group
final String rgId = (String)samRecord.getAttribute("RG");
if (rgId == null) return false; // we do not have read group either, can not filter
SAMReadGroupRecord rgr = samRecord.getHeader().getReadGroup(rgId);
pu_attr = rgr.getAttribute("PU") ;
}
if ( pu_attr == null ) return false; // could not get PU, forget about the filtering...
return blackListedLanes.contains((String)pu_attr);
}
/**
* The argument is interpreted as a comma-separated list of lanes (platform units) to be filtered
* out. All the specified names will be registered with the filter and filterOut(r) for any SAMRecord r
* belonging to one of the specified lanes will thereafter return true.
* The names can be surrounded by additional spaces, the latters will be trimmed by this method.
* This method can be called multiple times to add more lanes. Re-registering the same lane again is safe.
* @param arg
*/
public static void setBlackListedLanes(String arg) {
String[] lanes = arg.split(",");
for ( int i = 0; i < lanes.length ; i++ ) {
blackListedLanes.add(lanes[i].trim());
}
}
/**
* Adds a single name of a lane (platform unit) to be filtered out by this filter. The name can be surrounded
* by spaces, the latters will be trimmed out. This method can be called multiple times to add more lanes.
* Re-registering the same lane again is safe.
* @param arg
*/
public static void addBlackListedLane(String arg) {
blackListedLanes.add(arg.trim());
}
}

View File

@ -0,0 +1,63 @@
package org.broadinstitute.sting.gatk.filters;
import org.broadinstitute.sting.utils.xReadLines;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.regex.Pattern;
/**
* This is a utility class, its sole purpose is to populate PlatformUnitFilter with data. When a command line argument
* (@Argument) of the type PlatformUnitFilterHelper is declared in an application (walker), its constuctor
* PlatformUnitFilterHelper(String) automatically called by the argument system will parse its String argument
* and set up static fields of PlatformUnitFilter object.
*
* The String argument can be either a name of existing file, or a list of comma-separated lane (Platform Unit) names.
* First, the constructor will check if a file with specified name exists. If it does, then it is assumed that each line
* in the file contains one name of a lane (Platfor Unit) to filter out. If such file does not exist, then the argument is
* interpreted as a comma-separated list. Blank spaces around lane names are allowed in both cases and will be trimmed out.
*
* In other words, all it takes to request filtering out reads from specific lane(s) is
*
* 1) declare filter usage in the walker
*
* @ReadFilters({PlatformUnitFilter.class,...})
*
* 2) specify the argument that will take the list of lanes to filter:
*
* @Argument(fullName="filterLanes", shortName="FL", doc="all specified lanes will be ignored", required=false)
* PlatformUnitFilterHelper dummy;
*
* After that, the walker can be invoked with "--filterLanes 302UBAAXX090508.8,302YAAAXX090427.8" argument.
*
* Created by IntelliJ IDEA.
* User: asivache
* Date: Sep 22, 2009
* Time: 11:11:48 AM
* To change this template use File | Settings | File Templates.
*/
public class PlatformUnitFilterHelper {
public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$");
public PlatformUnitFilterHelper(String arg) {
File f = new File(arg);
if ( f.exists() ) {
try {
xReadLines reader = new xReadLines(f);
for ( String line : reader ) {
if ( EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty lines
PlatformUnitFilter.addBlackListedLane(line); // PlatformUnitFilter will trim the line as needed
}
} catch ( FileNotFoundException e) { throw new StingException("File " + f + " does not exist."); } // this should NEVER happen
return;
}
// no such file, must be a comma-separated list:
PlatformUnitFilter.setBlackListedLanes(arg); // PlatformUnitFilter will split on commas and trim as needed
}
}