From 7527f950d172f9d25bd4b7af2ba4c25d34896af4 Mon Sep 17 00:00:00 2001 From: kiran Date: Mon, 10 May 2010 14:49:38 +0000 Subject: [PATCH] Computes the quality score distribution per readgroup (one column per readgroup) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3335 348d0f76-0448-11de-a6fe-93d51630548a --- .../diagnostics/QualityScoreDistribution.java | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/QualityScoreDistribution.java diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/QualityScoreDistribution.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/QualityScoreDistribution.java new file mode 100755 index 000000000..cce5774fc --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/QualityScoreDistribution.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.playground.gatk.walkers.diagnostics; + +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import net.sf.samtools.SAMRecord; + +import java.util.*; + +/** + * Compute quality score distribution + */ +public class QualityScoreDistribution extends LocusWalker { + private HashMap qualDists; + + public void initialize() { + qualDists = new HashMap(); + + qualDists.put("all", new long[QualityUtils.MAX_QUAL_SCORE]); + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + List reads = context.getReads(); + List offsets = context.getOffsets(); + + for (int i = 0; i < reads.size(); i++) { + byte qual = reads.get(i).getBaseQualities()[offsets.get(i)]; + String name = reads.get(i).getReadGroup().getReadGroupId(); + + if (!qualDists.containsKey(name)) { + qualDists.put(name, new long[QualityUtils.MAX_QUAL_SCORE]); + } + + qualDists.get(name)[qual]++; + qualDists.get("all")[qual]++; + } + + return null; + } + + public Integer reduceInit() { + return null; + } + + public Integer reduce(Integer value, Integer sum) { + return null; + } + + public void onTraversalDone(Integer result) { + Set names = qualDists.keySet(); + HashMap norms = new HashMap(); + + for (String name : names) { + long norm = 0; + for (int qual = 0; qual < QualityUtils.MAX_QUAL_SCORE; qual++) { + norm += qualDists.get(name)[qual]; + } + + norms.put(name, norm); + } + + out.printf("Q"); + for (String name : names) { + out.printf("\t%s", name); + } + out.println(); + + for (int qual = 0; qual < QualityUtils.MAX_QUAL_SCORE; qual++) { + out.printf("%d", qual); + + for (String name : names) { + out.printf("\t%f", ((float) qualDists.get(name)[qual])/((float) norms.get(name))); + } + + out.println(); + } + } +} \ No newline at end of file