diff --git a/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java new file mode 100644 index 000000000..eaeefdcb6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilter.java @@ -0,0 +1,65 @@ +package org.broadinstitute.sting.gatk.filters; + +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMReadGroupRecord; + +import java.util.Set; +import java.util.HashSet; + +/** + * Created by IntelliJ IDEA. + * User: asivache + * Date: Sep 21, 2009 + * Time: 2:54:23 PM + * To change this template use File | Settings | File Templates. + */ +public class PlatformUnitFilter implements SamRecordFilter { + // a hack: use static in order to be able to fill it with the data from command line at runtime + static private Set blackListedLanes = new HashSet(); + + public boolean filterOut(SAMRecord samRecord) { + + if ( blackListedLanes.size() == 0 ) return false; // no filters set, nothing to do + + Object pu_attr = samRecord.getAttribute("PU"); + + if ( pu_attr == null ) { + // no platform unit in the record, go get the header if we have at least read group + final String rgId = (String)samRecord.getAttribute("RG"); + if (rgId == null) return false; // we do not have read group either, can not filter + + SAMReadGroupRecord rgr = samRecord.getHeader().getReadGroup(rgId); + + pu_attr = rgr.getAttribute("PU") ; + } + if ( pu_attr == null ) return false; // could not get PU, forget about the filtering... + return blackListedLanes.contains((String)pu_attr); + } + + /** + * The argument is interpreted as a comma-separated list of lanes (platform units) to be filtered + * out. All the specified names will be registered with the filter and filterOut(r) for any SAMRecord r + * belonging to one of the specified lanes will thereafter return true. + * The names can be surrounded by additional spaces, the latters will be trimmed by this method. + * This method can be called multiple times to add more lanes. Re-registering the same lane again is safe. + * @param arg + */ + public static void setBlackListedLanes(String arg) { + String[] lanes = arg.split(","); + for ( int i = 0; i < lanes.length ; i++ ) { + blackListedLanes.add(lanes[i].trim()); + } + } + + /** + * Adds a single name of a lane (platform unit) to be filtered out by this filter. The name can be surrounded + * by spaces, the latters will be trimmed out. This method can be called multiple times to add more lanes. + * Re-registering the same lane again is safe. + * @param arg + */ + public static void addBlackListedLane(String arg) { + blackListedLanes.add(arg.trim()); + } + +} diff --git a/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilterHelper.java b/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilterHelper.java new file mode 100644 index 000000000..922d0fa4e --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/filters/PlatformUnitFilterHelper.java @@ -0,0 +1,63 @@ +package org.broadinstitute.sting.gatk.filters; + +import org.broadinstitute.sting.utils.xReadLines; +import org.broadinstitute.sting.utils.StingException; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.regex.Pattern; + +/** + * This is a utility class, its sole purpose is to populate PlatformUnitFilter with data. When a command line argument + * (@Argument) of the type PlatformUnitFilterHelper is declared in an application (walker), its constuctor + * PlatformUnitFilterHelper(String) automatically called by the argument system will parse its String argument + * and set up static fields of PlatformUnitFilter object. + * + * The String argument can be either a name of existing file, or a list of comma-separated lane (Platform Unit) names. + * First, the constructor will check if a file with specified name exists. If it does, then it is assumed that each line + * in the file contains one name of a lane (Platfor Unit) to filter out. If such file does not exist, then the argument is + * interpreted as a comma-separated list. Blank spaces around lane names are allowed in both cases and will be trimmed out. + * + * In other words, all it takes to request filtering out reads from specific lane(s) is + * + * 1) declare filter usage in the walker + * + * @ReadFilters({PlatformUnitFilter.class,...}) + * + * 2) specify the argument that will take the list of lanes to filter: + * + * @Argument(fullName="filterLanes", shortName="FL", doc="all specified lanes will be ignored", required=false) + * PlatformUnitFilterHelper dummy; + * + * After that, the walker can be invoked with "--filterLanes 302UBAAXX090508.8,302YAAAXX090427.8" argument. + * + * Created by IntelliJ IDEA. + * User: asivache + * Date: Sep 22, 2009 + * Time: 11:11:48 AM + * To change this template use File | Settings | File Templates. + */ +public class PlatformUnitFilterHelper { + + public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$"); + + public PlatformUnitFilterHelper(String arg) { + File f = new File(arg); + + if ( f.exists() ) { + try { + xReadLines reader = new xReadLines(f); + for ( String line : reader ) { + if ( EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty lines + PlatformUnitFilter.addBlackListedLane(line); // PlatformUnitFilter will trim the line as needed + } + } catch ( FileNotFoundException e) { throw new StingException("File " + f + " does not exist."); } // this should NEVER happen + return; + } + + // no such file, must be a comma-separated list: + + PlatformUnitFilter.setBlackListedLanes(arg); // PlatformUnitFilter will split on commas and trim as needed + + } +}