From 107b5d73b5b87a39c45d70debe974683dbcffe4f Mon Sep 17 00:00:00 2001 From: aaron Date: Tue, 2 Jun 2009 21:23:56 +0000 Subject: [PATCH] The flagStatReadWalker generates the exact same statistical output as the samtools flagstat command, so the two outputs can be diff'ed. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@883 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/FlagStatReadWalker.java | 179 ++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/FlagStatReadWalker.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/FlagStatReadWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/FlagStatReadWalker.java new file mode 100644 index 000000000..03fe72bbc --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/FlagStatReadWalker.java @@ -0,0 +1,179 @@ +package org.broadinstitute.sting.playground.gatk.walkers; + +import net.sf.samtools.SAMRecord; + +import java.text.DecimalFormat; +import java.text.NumberFormat; + +import org.broadinstitute.sting.gatk.walkers.ReadWalker; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author aaron + *

+ * Class FlagStatReadWalker + *

+ * This walker mirrors that stats that are generated by the flagstat + * command of samtools. + */ +public class FlagStatReadWalker extends ReadWalker { + + + // what comes out of the flagstat + class FlagStat { + int readCount = 0; + int QC_failure = 0; + int duplicates = 0; + int mapped = 0; + int paired_in_sequencing = 0; + int read1 = 0; + int read2 = 0; + int properly_paired = 0; + int with_itself_and_mate_mapped = 0; + int singletons = 0; + int with_mate_mapped_to_a_different_chr = 0; + int with_mate_mapped_to_a_different_chr_maq_greaterequal_than_5 = 0; + + public String toString() { + String ret = ""; + StringBuilder builder = new StringBuilder(ret); + NumberFormat percentFormatter = new DecimalFormat("#0.00"); + builder.append(readCount); + builder.append(" in total\n"); + + builder.append(QC_failure); + builder.append(" QC failure\n"); + + + builder.append(duplicates); + builder.append(" duplicates\n"); + + builder.append(mapped); + builder.append(" mapped ("); + builder.append(percentFormatter.format(( (float)mapped / (float)readCount ) * 100.0)); + builder.append("%)\n"); + + builder.append(paired_in_sequencing); + builder.append(" paired in sequencing\n"); + + + builder.append(read1); + builder.append(" read1\n"); + + builder.append(read2); + builder.append(" read2\n"); + + builder.append(properly_paired); + builder.append(" properly paired ("); + builder.append(percentFormatter.format(( (float)properly_paired / (float)readCount ) * 100.0)); + builder.append("%)\n"); + + + builder.append(with_itself_and_mate_mapped); + builder.append(" with itself and mate mapped\n"); + + builder.append(singletons); + builder.append(" singletons ("); + builder.append(percentFormatter.format(( (float)singletons / (float)readCount ) * 100.0)); + builder.append("%)\n"); + + + builder.append(with_mate_mapped_to_a_different_chr); + builder.append(" with mate mapped to a different chr\n"); + + builder.append(with_mate_mapped_to_a_different_chr_maq_greaterequal_than_5); + builder.append(" with mate mapped to a different chr (mapQ>=5)"); + + return builder.toString(); + } + + } + + + private FlagStat myStat = new FlagStat(); + + public Integer map( char[] ref, SAMRecord read ) { + myStat.readCount++; + if (read.getReadFailsVendorQualityCheckFlag()) { + myStat.QC_failure++; + } + if (read.getDuplicateReadFlag()) { + myStat.duplicates++; + } + if (read.getReferenceIndex() >= 0) { + myStat.mapped++; + } + if (read.getReadPairedFlag()) { + myStat.paired_in_sequencing++; + + if (read.getSecondOfPairFlag()) { + myStat.read2++; + } else if (read.getReadPairedFlag()) { + myStat.read1++; + } + if (read.getProperPairFlag()) { + + myStat.properly_paired++; + } + if (!read.getMateUnmappedFlag() && read.getReferenceIndex() >= 0) { + myStat.with_itself_and_mate_mapped++; + } + if (read.getMateUnmappedFlag()) { + myStat.singletons++; + } + } + if (read.getReferenceIndex() >= 0 && read.getMateReferenceIndex() >= 0 && read.getReferenceIndex() != read.getMateReferenceIndex()) { + myStat.with_mate_mapped_to_a_different_chr++; + + if (read.getMappingQuality() >= 5) { + myStat.with_mate_mapped_to_a_different_chr_maq_greaterequal_than_5++; + } + } + return 1; + + } + + public Integer reduceInit + () { + return 0; + } + + public Integer reduce + ( Integer + value, Integer + sum ) { + return value + sum; + } + + public void onTraversalDone + ( Integer + result ) { + //out.println("[REDUCE RESULT] Traversal result is: " + result); + out.println(myStat.toString()); + } +} \ No newline at end of file