Improved, public version of ErrorRateByCycle
-- A cleaner table output (molten). For those interested in seeing how this can be done with GATKReports look here for a nice clean example -- Integration tests -- Minor improvements to GATKReportTable with methods to getPrimaryKeys
This commit is contained in:
parent
a6a8fc0521
commit
0376d73ece
|
|
@ -296,6 +296,10 @@ public class GATKReportTable {
|
|||
return primaryKeyColumn.contains(primaryKey);
|
||||
}
|
||||
|
||||
public Collection<Object> getPrimaryKeys() {
|
||||
return Collections.unmodifiableCollection(primaryKeyColumn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value for a given position in the table
|
||||
*
|
||||
|
|
|
|||
|
|
@ -0,0 +1,162 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Computes the read error rate per position in read (in the original 5'->3' orientation that the read had coming off the machine)
|
||||
*
|
||||
* Emits a GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate for each read
|
||||
* group in the input BAMs FOR ONLY THE FIRST OF PAIR READS.
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* Any number of BAM files
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* GATKReport containing readgroup, cycle, mismatches, counts, qual, and error rate.
|
||||
*
|
||||
* For example, running this tool on the NA12878 data sets:
|
||||
*
|
||||
* <pre>
|
||||
* ##:GATKReport.v0.2 ErrorRatePerCycle : The error rate per sequenced position in the reads
|
||||
* readgroup cycle mismatches counts qual errorrate
|
||||
* 20FUK.1 0 80 23368 25 3.47e-03
|
||||
* 20FUK.1 1 40 23433 28 1.75e-03
|
||||
* 20FUK.1 2 36 23453 28 1.58e-03
|
||||
* 20FUK.1 3 26 23476 29 1.15e-03
|
||||
* 20FUK.1 4 32 23495 29 1.40e-03
|
||||
* up to 101 cycles
|
||||
* 20FUK.2 0 77 20886 24 3.73e-03
|
||||
* 20FUK.2 1 28 20920 29 1.39e-03
|
||||
* 20FUK.2 2 24 20931 29 1.19e-03
|
||||
* 20FUK.2 3 30 20940 28 1.48e-03
|
||||
* 20FUK.2 4 25 20948 29 1.24e-03
|
||||
* up to 101 cycles
|
||||
* 20FUK.3 0 78 22038 24 3.58e-03
|
||||
* 20FUK.3 1 40 22091 27 1.86e-03
|
||||
* 20FUK.3 2 23 22108 30 1.09e-03
|
||||
* 20FUK.3 3 36 22126 28 1.67e-03
|
||||
* </pre>
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T ErrorRatePerCycle
|
||||
* -I bundle/current/b37/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam
|
||||
* -R bundle/current/b37/human_g1k_v37.fasta
|
||||
* -o example.gatkreport.txt
|
||||
* </pre>
|
||||
*
|
||||
* @author Kiran Garimella, Mark DePristo
|
||||
*/
|
||||
public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
|
||||
@Output PrintStream out;
|
||||
@Argument(fullName="min_base_quality_score", shortName="mbq", doc="Minimum base quality required to consider a base for calling", required=false)
|
||||
public Integer MIN_BASE_QUAL = 0;
|
||||
@Argument(fullName="min_mapping_quality_score", shortName="mmq", doc="Minimum read mapping quality required to consider a read for calling", required=false)
|
||||
public Integer MIN_MAPPING_QUAL = 20;
|
||||
|
||||
private GATKReport report;
|
||||
private GATKReportTable table;
|
||||
private final static String reportName = "ErrorRatePerCycle";
|
||||
private final static String reportDescription = "The error rate per sequenced position in the reads";
|
||||
|
||||
/**
|
||||
* Allows us to use multiple records for the key (read group x cycle)
|
||||
*/
|
||||
private static class TableKey implements Comparable<TableKey> {
|
||||
final String readGroup;
|
||||
final int cycle;
|
||||
|
||||
private TableKey(final String readGroup, final int cycle) {
|
||||
this.readGroup = readGroup;
|
||||
this.cycle = cycle;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final TableKey tableKey) {
|
||||
final int scmp = readGroup.compareTo(tableKey.readGroup);
|
||||
if ( scmp == 0 )
|
||||
return Integer.valueOf(cycle).compareTo(tableKey.cycle);
|
||||
else
|
||||
return scmp;
|
||||
}
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
report = new GATKReport();
|
||||
report.addTable(reportName, reportDescription);
|
||||
table = report.getTable(reportName);
|
||||
table.addPrimaryKey("key", false);
|
||||
table.addColumn("readgroup", 0);
|
||||
table.addColumn("cycle", 0);
|
||||
table.addColumn("mismatches", 0);
|
||||
table.addColumn("counts", 0);
|
||||
table.addColumn("qual", 0);
|
||||
table.addColumn("errorrate", 0.0f, "%.2e");
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
for ( final PileupElement p : context.getBasePileup() ) {
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
final int offset = p.getOffset();
|
||||
final boolean firstOfPair = ! read.getReadPairedFlag() || read.getFirstOfPairFlag();
|
||||
|
||||
if ( firstOfPair && read.getMappingQuality() >= MIN_MAPPING_QUAL && p.getQual() >= MIN_BASE_QUAL ) {
|
||||
final byte readBase = p.getBase();
|
||||
final byte refBase = ref.getBase();
|
||||
final int cycle = offset;
|
||||
|
||||
if ( BaseUtils.isRegularBase(readBase) && BaseUtils.isRegularBase(refBase) ) {
|
||||
final TableKey key = new TableKey(read.getReadGroup().getReadGroupId(), cycle);
|
||||
|
||||
if ( ! table.containsKey(key) ) {
|
||||
table.set(key, "cycle", cycle);
|
||||
table.set(key, "readgroup", read.getReadGroup().getReadGroupId());
|
||||
}
|
||||
|
||||
table.increment(key, "counts");
|
||||
if (readBase != refBase)
|
||||
table.increment(key, "mismatches");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return null; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) { return null; }
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
for ( final Object key : table.getPrimaryKeys() ) {
|
||||
final int mismatches = (Integer)table.get(key, "mismatches");
|
||||
final int count = (Integer)table.get(key, "counts");
|
||||
final double errorRate = (mismatches + 1) / (1.0*(count + 1));
|
||||
final int qual = QualityUtils.probToQual(1-errorRate, 0.0);
|
||||
table.set(key, "qual", qual);
|
||||
table.set(key, "errorrate", errorRate);
|
||||
}
|
||||
|
||||
report.print(out);
|
||||
}
|
||||
}
|
||||
|
|
@ -94,9 +94,6 @@ import java.util.Map;
|
|||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
|
||||
|
||||
|
||||
public class ReadGroupProperties extends ReadWalker<Integer, Integer> {
|
||||
@Output
|
||||
public PrintStream out;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class ErrorRatePerCycleIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void basicTest() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s",
|
||||
1,
|
||||
Arrays.asList("0cc212ecb6df300e321784039ff29f13"));
|
||||
executeTest("ErrorRatePerCycle:", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue