Added new ReadFilter that allows users to specifically reassign one single mapping quality to a different value. Useful for TopHat and other RNA-seq software users.

This commit is contained in:
Geraldine Van der Auwera 2013-02-19 16:53:14 -05:00
parent 76810465aa
commit e674b4a524
1 changed files with 88 additions and 0 deletions

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
/**
* A read filter (transformer) that changes a given read mapping quality to a different value.
*
* <p>
* This 'filter' will change a certain read mapping quality to a different value without affecting reads that
* have other mapping qualities. This is intended primarily for users of RNA-Seq data handling programs such
* as TopHat, which use MAPQ = 255 to designate uniquely aligned reads. According to convention, 255 normally
* designates "unknown" quality, and most GATK tools automatically ignore such reads. By reassigning a different
* mapping quality to those specific reads, users of TopHat and other tools can circumvent this problem without
* affecting the rest of their dataset.
* </p>
*
* <p>
* This differs from the ReassignMappingQuality filter by its selectivity -- only one mapping quality is targeted.
* ReassignMappingQuality will change ALL mapping qualities to a single one, and is typically used for datasets
* that have no assigned mapping qualities.
* </p>
*
*
* <h2>Input</h2>
* <p>
* BAM file(s)
* </p>
*
*
* <h2>Output</h2>
* <p>
* BAM file(s) with one read mapping quality selectively reassigned as desired
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -rf ReassignOneMappingQuality
* -RMQF 255
* -RMQT 60
* </pre>
*
* @author vdauwera
* @since 2/19/13
*/
public class ReassignOneMappingQualityFilter extends ReadFilter {
@Argument(fullName = "reassign_mapping_quality_from", shortName = "RMQF", doc = "Original mapping quality", required = false)
public int reassignMappingQualityFrom = 255;
@Argument(fullName = "reassign_mapping_quality_to", shortName = "RMQT", doc = "Desired mapping quality", required = false)
public int reassignMappingQualityTo = 60;
public boolean filterOut(SAMRecord rec) {
if (rec.getMappingQuality() == reassignMappingQualityFrom)
rec.setMappingQuality(reassignMappingQualityTo);
return false;
}
}