A very simple standalone filter for fooling around with the data: can extract only mapped or only unmapped reads, only reads with mapping quals > X, reads with average base qual > Y, reads with min base qual > Z, reads with edit distance from the ref > MIN and/or < MAX
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1420 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ebec0ec171
commit
2a01e71277
|
|
@ -0,0 +1,112 @@
|
||||||
|
package org.broadinstitute.sting.playground.tools;
|
||||||
|
|
||||||
|
import net.sf.picard.cmdline.CommandLineProgram;
|
||||||
|
import net.sf.picard.cmdline.Usage;
|
||||||
|
import net.sf.picard.cmdline.Option;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.SAMFileWriter;
|
||||||
|
import net.sf.samtools.SAMFileWriterFactory;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: asivache
|
||||||
|
* Date: Aug 11, 2009
|
||||||
|
* Time: 5:13:58 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
public class FilterReads extends CommandLineProgram {
|
||||||
|
@Usage(programVersion="1.0") public String USAGE = "Filters reads: the output file will contain only reads satisfying all the selected criteria";
|
||||||
|
@Option(shortName="I", doc="Input file (bam or sam) to extract reads from.",
|
||||||
|
optional=false) public File IN = null;
|
||||||
|
@Option(shortName="O",doc="Output file (bam or sam) to write extracted reads to.",
|
||||||
|
optional=false) public File OUT = null;
|
||||||
|
@Option(shortName="U", doc="Select only unmapped reads if true; only mapped reads if false; both if not specified.",
|
||||||
|
optional=true) public Boolean UNMAPPED = null;
|
||||||
|
@Option(shortName="MINQ", doc="Select only reads with minimum base quality across all bases at or above the specified value.",
|
||||||
|
optional=true) public Integer MIN_QUAL = 0;
|
||||||
|
@Option(shortName="AVQ", doc="Select only reads with average base quality at or above the specified value.",
|
||||||
|
optional=true) public Double AVERAGE_QUAL = 0.0;
|
||||||
|
@Option(shortName="MAPQ", doc="Select only reads with mapping quality at or above the specified value (does not affect unmapped reads, use 'U').",
|
||||||
|
optional=true) public Integer MAPPING_QUAL = 0;
|
||||||
|
@Option(shortName="MAXE",doc="Select only reads with edit distance from the reference at or below the specified value ('NM' tags must be present in the input file).",
|
||||||
|
optional = true) public Integer MAX_ERRORS = INFINITY;
|
||||||
|
@Option(shortName="MINE",doc="Select only reads with edit distance from the reference at or above the specified value ('NM' tags must be present in the input file).",
|
||||||
|
optional = true) public Integer MIN_ERRORS = 0;
|
||||||
|
|
||||||
|
private static int INFINITY = 1000000;
|
||||||
|
UnmappedFilter uFilter;
|
||||||
|
|
||||||
|
/** Required main method implementation. */
|
||||||
|
public static void main(final String[] argv) {
|
||||||
|
System.exit(new FilterReads().instanceMain(argv));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int doWork() {
|
||||||
|
|
||||||
|
if ( UNMAPPED == null ) uFilter = UnmappedFilter.BOTH;
|
||||||
|
else {
|
||||||
|
if ( UNMAPPED.booleanValue() ) uFilter = UnmappedFilter.UNMAPPED;
|
||||||
|
else uFilter = UnmappedFilter.MAPPED;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SAMFileReader inReader = new SAMFileReader(IN);
|
||||||
|
|
||||||
|
SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(inReader.getFileHeader(), true, OUT) ;
|
||||||
|
|
||||||
|
for ( SAMRecord read : inReader ) {
|
||||||
|
switch ( uFilter ) {
|
||||||
|
case UNMAPPED: if ( ! read.getReadUnmappedFlag() ) continue; break;
|
||||||
|
case MAPPED: if ( read.getReadUnmappedFlag() ) continue; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! read.getReadUnmappedFlag() ) {
|
||||||
|
// these filters are applicable only to mapped reads:
|
||||||
|
if ( read.getMappingQuality() < MAPPING_QUAL ) continue;
|
||||||
|
if ( MAX_ERRORS < INFINITY ) {
|
||||||
|
Object attr = read.getAttribute("NM");
|
||||||
|
if ( attr != null ) {
|
||||||
|
int nm = (Integer)attr;
|
||||||
|
if ( nm > MAX_ERRORS ) continue;
|
||||||
|
if ( nm < MIN_ERRORS ) continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ( MIN_QUAL > 0 || AVERAGE_QUAL > 0 ) {
|
||||||
|
byte[] quals = read.getBaseQualities();
|
||||||
|
double av_q = 0.0;
|
||||||
|
boolean passed = true;
|
||||||
|
for ( int i = 0 ; i < quals.length ; i++ ) {
|
||||||
|
if ( quals[i] < MIN_QUAL ) {
|
||||||
|
passed = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
av_q += (double)quals[i];
|
||||||
|
}
|
||||||
|
if ( ! passed ) continue;
|
||||||
|
if ( av_q / read.getReadLength() < AVERAGE_QUAL ) continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
outWriter.addAlignment(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
inReader.close();
|
||||||
|
outWriter.close();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum UnmappedFilter {
|
||||||
|
UNMAPPED, MAPPED, BOTH
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue