From 98718d0faa5beb3ada59516b6f9257e841330ee2 Mon Sep 17 00:00:00 2001 From: kiran Date: Mon, 10 May 2010 14:50:22 +0000 Subject: [PATCH] Computes the error rate per cycle git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3336 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/ErrorRatePerReadPosition.java | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/ErrorRatePerReadPosition.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ErrorRatePerReadPosition.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ErrorRatePerReadPosition.java new file mode 100755 index 000000000..9e664e0e8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ErrorRatePerReadPosition.java @@ -0,0 +1,153 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.utils.BaseUtils; + +import java.util.List; +import java.util.HashMap; +import java.util.HashSet; + +import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMReadGroupRecord; + +public class ErrorRatePerReadPosition extends LocusWalker { + private HashMap mismatches; + private HashMap counts; + private HashMap quals; + private HashMap> readLengthsPerReadGroup; + private HashSet readLengths; + private int readLength = 10000; + + + public void initialize() { + mismatches = new HashMap(); + counts = new HashMap(); + quals = new HashMap(); + readLengthsPerReadGroup = new HashMap>(); + readLengths = new HashSet(); + + for (SAMReadGroupRecord rg : this.getToolkit().getSAMFileHeader().getReadGroups()) { + int[] mm = new int[readLength]; + int[] cm = new int[readLength]; + int[] qm = new int[readLength]; + + mismatches.put(rg.getReadGroupId(), mm); + counts.put(rg.getReadGroupId(), cm); + quals.put(rg.getReadGroupId(), qm); + } + } + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + List offsets = context.getOffsets(); + List reads = context.getReads(); + + for (int i = 0; i < offsets.size(); i++) { + int offset = offsets.get(i); + char readBase = reads.get(i).getReadString().charAt(offset); + + int refIndex = ref.getBaseIndex(); + int readIndex = BaseUtils.simpleBaseToBaseIndex(readBase); + + + if (!reads.get(i).getReadNegativeStrandFlag() && (!reads.get(i).getReadPairedFlag() || reads.get(i).getFirstOfPairFlag())) { + String keyName = reads.get(i).getReadGroup().getReadGroupId(); + + //out.printf("%d %d%n", refIndex, readIndex); + + mismatches.get(keyName)[offset] += (refIndex != readIndex) ? 1 : 0; + counts.get(keyName)[offset]++; + quals.get(keyName)[offset] += reads.get(i).getBaseQualities()[offset]; + + //out.println(reads.get(i).getBaseQualities()[offset]); + + int readLength = reads.get(i).getReadLength(); + if (!readLengthsPerReadGroup.containsKey(keyName)) { + readLengthsPerReadGroup.put(keyName, new HashSet()); + } + + readLengthsPerReadGroup.get(keyName).add(readLength); + readLengths.add(readLength); + } + } + + return null; + } + + public Integer reduceInit() { + return null; + } + + public Integer reduce(Integer value, Integer sum) { + return null; + } + + public void onTraversalDone(Integer sum) { + String[] rgs = mismatches.keySet().toArray(new String[1]); + + out.printf("position"); + for (String rg : rgs) { out.printf("\t%s", rg); } + //out.printf("\tmean\tsd\tmin\tmax"); + + //for (int readLength : readLengths) { + // out.printf("\t%dreadLength", readLength); + //} + + out.println(); + + for (int i = 0; i < readLength; i++) { + boolean print = false; + String row = ""; + + row += String.format("%d", i); + + double rsum = 0.0; + double min = 1.00; + double max = 0.00; + + for (String rg : rgs) { + double value = ((double) mismatches.get(rg)[i])/((double) counts.get(rg)[i]); + //double value = ((double) quals.get(rg)[i])/((double) counts.get(rg)[i]); + + if (Double.isInfinite(value) || Double.isNaN(value)) { + value = 0.0; + } else { + print = true; + } + + row += String.format("\t%f", value); + + rsum += value; + if (value > max) { max = value; } + if (value < min) { min = value; } + } + + double mean = rsum/rgs.length; + + double squareDeviationSum = 0.0; + for (String rg : rgs) { + double value = ((double) mismatches.get(rg)[i])/((double) counts.get(rg)[i]); + + if (Double.isInfinite(value) || Double.isNaN(value)) { + value = 0.0; + } else { + print = true; + } + + squareDeviationSum += Math.pow(value - mean, 2.0); + } + + double sd = Math.sqrt(squareDeviationSum/rgs.length); + + //row += String.format("\t%f\t%f\t%f\t%f", mean, sd, min, max); + + row += String.format("%n"); + + if (print) { + out.print(row); + } + } + } +}