Computes the error rate per cycle
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3336 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7527f950d1
commit
98718d0faa
|
|
@ -0,0 +1,153 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
public class ErrorRatePerReadPosition extends LocusWalker<Integer, Integer> {
|
||||
private HashMap<String, int[]> mismatches;
|
||||
private HashMap<String, int[]> counts;
|
||||
private HashMap<String, int[]> quals;
|
||||
private HashMap<String, HashSet<Integer>> readLengthsPerReadGroup;
|
||||
private HashSet<Integer> readLengths;
|
||||
private int readLength = 10000;
|
||||
|
||||
|
||||
public void initialize() {
|
||||
mismatches = new HashMap<String, int[]>();
|
||||
counts = new HashMap<String, int[]>();
|
||||
quals = new HashMap<String, int[]>();
|
||||
readLengthsPerReadGroup = new HashMap<String, HashSet<Integer>>();
|
||||
readLengths = new HashSet<Integer>();
|
||||
|
||||
for (SAMReadGroupRecord rg : this.getToolkit().getSAMFileHeader().getReadGroups()) {
|
||||
int[] mm = new int[readLength];
|
||||
int[] cm = new int[readLength];
|
||||
int[] qm = new int[readLength];
|
||||
|
||||
mismatches.put(rg.getReadGroupId(), mm);
|
||||
counts.put(rg.getReadGroupId(), cm);
|
||||
quals.put(rg.getReadGroupId(), qm);
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
|
||||
for (int i = 0; i < offsets.size(); i++) {
|
||||
int offset = offsets.get(i);
|
||||
char readBase = reads.get(i).getReadString().charAt(offset);
|
||||
|
||||
int refIndex = ref.getBaseIndex();
|
||||
int readIndex = BaseUtils.simpleBaseToBaseIndex(readBase);
|
||||
|
||||
|
||||
if (!reads.get(i).getReadNegativeStrandFlag() && (!reads.get(i).getReadPairedFlag() || reads.get(i).getFirstOfPairFlag())) {
|
||||
String keyName = reads.get(i).getReadGroup().getReadGroupId();
|
||||
|
||||
//out.printf("%d %d%n", refIndex, readIndex);
|
||||
|
||||
mismatches.get(keyName)[offset] += (refIndex != readIndex) ? 1 : 0;
|
||||
counts.get(keyName)[offset]++;
|
||||
quals.get(keyName)[offset] += reads.get(i).getBaseQualities()[offset];
|
||||
|
||||
//out.println(reads.get(i).getBaseQualities()[offset]);
|
||||
|
||||
int readLength = reads.get(i).getReadLength();
|
||||
if (!readLengthsPerReadGroup.containsKey(keyName)) {
|
||||
readLengthsPerReadGroup.put(keyName, new HashSet<Integer>());
|
||||
}
|
||||
|
||||
readLengthsPerReadGroup.get(keyName).add(readLength);
|
||||
readLengths.add(readLength);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
String[] rgs = mismatches.keySet().toArray(new String[1]);
|
||||
|
||||
out.printf("position");
|
||||
for (String rg : rgs) { out.printf("\t%s", rg); }
|
||||
//out.printf("\tmean\tsd\tmin\tmax");
|
||||
|
||||
//for (int readLength : readLengths) {
|
||||
// out.printf("\t%dreadLength", readLength);
|
||||
//}
|
||||
|
||||
out.println();
|
||||
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
boolean print = false;
|
||||
String row = "";
|
||||
|
||||
row += String.format("%d", i);
|
||||
|
||||
double rsum = 0.0;
|
||||
double min = 1.00;
|
||||
double max = 0.00;
|
||||
|
||||
for (String rg : rgs) {
|
||||
double value = ((double) mismatches.get(rg)[i])/((double) counts.get(rg)[i]);
|
||||
//double value = ((double) quals.get(rg)[i])/((double) counts.get(rg)[i]);
|
||||
|
||||
if (Double.isInfinite(value) || Double.isNaN(value)) {
|
||||
value = 0.0;
|
||||
} else {
|
||||
print = true;
|
||||
}
|
||||
|
||||
row += String.format("\t%f", value);
|
||||
|
||||
rsum += value;
|
||||
if (value > max) { max = value; }
|
||||
if (value < min) { min = value; }
|
||||
}
|
||||
|
||||
double mean = rsum/rgs.length;
|
||||
|
||||
double squareDeviationSum = 0.0;
|
||||
for (String rg : rgs) {
|
||||
double value = ((double) mismatches.get(rg)[i])/((double) counts.get(rg)[i]);
|
||||
|
||||
if (Double.isInfinite(value) || Double.isNaN(value)) {
|
||||
value = 0.0;
|
||||
} else {
|
||||
print = true;
|
||||
}
|
||||
|
||||
squareDeviationSum += Math.pow(value - mean, 2.0);
|
||||
}
|
||||
|
||||
double sd = Math.sqrt(squareDeviationSum/rgs.length);
|
||||
|
||||
//row += String.format("\t%f\t%f\t%f\t%f", mean, sd, min, max);
|
||||
|
||||
row += String.format("%n");
|
||||
|
||||
if (print) {
|
||||
out.print(row);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue