Computes empirical confusion matrices, optionally with up to five bases of preceding context
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2621 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
12453fa163
commit
dd6d5aadf9
|
|
@ -0,0 +1,143 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Hashtable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes empirical base confusion matrix, and optionally computes
|
||||||
|
* these matrices with up to five bases of preceding context
|
||||||
|
*/
|
||||||
|
@Reference(window=@Window(start=-5,stop=5))
|
||||||
|
public class ComputeConfusionMatrix extends LocusWalker<Integer, Integer> {
|
||||||
|
@Argument(fullName="minimumDepth", shortName="minDepth", doc="Require locus pileup to have specified minimum depth", required=false)
|
||||||
|
public Integer MIN_DEPTH = 10;
|
||||||
|
|
||||||
|
@Argument(fullName="maximumDepth", shortName="maxDepth", doc="Require locus pileup to have specified maximum depth", required=false)
|
||||||
|
public Integer MAX_DEPTH = 100;
|
||||||
|
|
||||||
|
@Argument(fullName="contextWindowSize", shortName="window", doc="Size of context window", required=false)
|
||||||
|
public Integer WINDOW_SIZE = 0;
|
||||||
|
|
||||||
|
private Hashtable<String, Integer> confusionCounts = new Hashtable<String, Integer>();
|
||||||
|
|
||||||
|
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
int pileupSize = context.size();
|
||||||
|
|
||||||
|
int numAlts = 0;
|
||||||
|
int[] baseCounts = context.getBasePileup().getBaseCounts();
|
||||||
|
for (int baseIndex = 0; baseIndex < baseCounts.length; baseIndex++) {
|
||||||
|
if (baseIndex != ref.getBaseIndex()) {
|
||||||
|
numAlts += baseCounts[baseIndex];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
pileupSize >= MIN_DEPTH && // don't process regions without a reasonable pileup
|
||||||
|
pileupSize < MAX_DEPTH && // don't process suspiciously overcovered regions
|
||||||
|
ref.getBases().length % 2 == 1 && // don't process regions that don't have a full context window
|
||||||
|
numAlts == 1 && // don't process regions that have more than one mismatching base
|
||||||
|
ref.getBaseIndex() >= 0 // don't process a locus with an ambiguous reference base
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
int windowLength = ref.getBases().length;
|
||||||
|
int windowCenter = (windowLength - 1)/2;
|
||||||
|
|
||||||
|
String fwRefBases = new String(ref.getBases());
|
||||||
|
String fwRefBase = String.format("%c", ref.getBase());
|
||||||
|
String fwWindowLeft = fwRefBases.substring(windowCenter - WINDOW_SIZE, windowCenter);
|
||||||
|
|
||||||
|
//String rcRefBases = new String(BaseUtils.simpleReverseComplement(ref.getBases()));
|
||||||
|
//String rcRefBase = String.format("%c", BaseUtils.simpleComplement(ref.getBase()));
|
||||||
|
//String rcWindowRight = rcRefBases.substring(windowCenter + 1, windowCenter + 1 + WINDOW_SIZE);
|
||||||
|
|
||||||
|
int[] baseCounts = context.getBasePileup().getBaseCounts();
|
||||||
|
int altBaseIndex = -1;
|
||||||
|
for (int baseIndex = 0; baseIndex < 4; baseIndex++) {
|
||||||
|
if (baseCounts[baseIndex] == 1) {
|
||||||
|
altBaseIndex = baseIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String fwAltBase = String.format("%c", BaseUtils.baseIndexToSimpleBase(altBaseIndex));
|
||||||
|
//String rcAltBase = BaseUtils.simpleComplement(fwAltBase);
|
||||||
|
|
||||||
|
for (int readIndex = 0; readIndex < context.getReads().size(); readIndex++) {
|
||||||
|
SAMRecord read = context.getReads().get(readIndex);
|
||||||
|
int offset = context.getOffsets().get(readIndex);
|
||||||
|
|
||||||
|
char base = read.getReadString().charAt(offset);
|
||||||
|
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
|
||||||
|
|
||||||
|
if (baseIndex == altBaseIndex) {
|
||||||
|
if (read.getReadNegativeStrandFlag()) {
|
||||||
|
//incrementConfusionCounts(rcWindowRight, rcRefBase, rcAltBase);
|
||||||
|
} else {
|
||||||
|
incrementConfusionCounts(fwWindowLeft, fwAltBase, fwRefBase);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void incrementConfusionCounts(String context, String altBase, String refBase) {
|
||||||
|
String key = String.format("%s:%s:%s", context, altBase, refBase);
|
||||||
|
|
||||||
|
Integer counts = confusionCounts.get(key);
|
||||||
|
if (counts == null) { counts = 0; }
|
||||||
|
|
||||||
|
confusionCounts.put(key, counts + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduceInit() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer reduce(Integer value, Integer sum) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Integer result) {
|
||||||
|
String[] keys = confusionCounts.keySet().toArray(new String[0]);
|
||||||
|
Arrays.sort(keys);
|
||||||
|
|
||||||
|
HashMap<String, Integer> contextualNorms = new HashMap<String, Integer>();
|
||||||
|
for (String key : keys) {
|
||||||
|
String[] fields = key.split(":");
|
||||||
|
|
||||||
|
String contextualKey = String.format("%s:%s", fields[0], fields[1]);
|
||||||
|
Integer contextualCount = contextualNorms.get(contextualKey);
|
||||||
|
if (contextualCount == null) { contextualCount = 0; }
|
||||||
|
contextualNorms.put(contextualKey, contextualCount + confusionCounts.get(key));
|
||||||
|
}
|
||||||
|
|
||||||
|
out.printf("confusionMatrix\tcontext\talt\tref\tcontextualCounts\tcontextualPercentage\n");
|
||||||
|
for (String key : keys) {
|
||||||
|
String[] fields = key.split(":");
|
||||||
|
String contextualKey = String.format("%s:%s", fields[0], fields[1]);
|
||||||
|
|
||||||
|
out.printf(
|
||||||
|
"confusionMatrix\t%s\t%s\t%s\t%d\t%d\t%f\n",
|
||||||
|
fields[0],
|
||||||
|
fields[1],
|
||||||
|
fields[2],
|
||||||
|
confusionCounts.get(key),
|
||||||
|
contextualNorms.get(contextualKey),
|
||||||
|
confusionCounts.get(key)/((float) contextualNorms.get(contextualKey))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue