incorporating skew check
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1078 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1339f3f3e3
commit
eb999f880a
|
|
@ -23,6 +23,10 @@ import java.util.*;
|
||||||
|
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
protected enum MutantFailureReason {
|
||||||
|
StrandImbalance,
|
||||||
|
Misalignment
|
||||||
|
}
|
||||||
protected static class QualitySums {
|
protected static class QualitySums {
|
||||||
private int a = 0;
|
private int a = 0;
|
||||||
private int c = 0;
|
private int c = 0;
|
||||||
|
|
@ -74,10 +78,11 @@ public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
||||||
@Argument(fullName = "min_mutant_sum", required = false, doc = "threshold for sum of mutant allele quality scores")
|
@Argument(fullName = "min_mutant_sum", required = false, doc = "threshold for sum of mutant allele quality scores")
|
||||||
public int MIN_MUTANT_SUM = 100;
|
public int MIN_MUTANT_SUM = 100;
|
||||||
|
|
||||||
|
|
||||||
@Argument(fullName = "mode", required = false, doc="Mode of operation (detect, full)")
|
@Argument(fullName = "mode", required = false, doc="Mode of operation (detect, full)")
|
||||||
public String mode = "full";
|
public String mode = "full";
|
||||||
|
|
||||||
|
public float SKEW_LOD_THRESHOLD = 1.0f;
|
||||||
|
|
||||||
// @Argument(fullName = "output_failures", required = false, doc="produce output for failed sites")
|
// @Argument(fullName = "output_failures", required = false, doc="produce output for failed sites")
|
||||||
public boolean OUTPUT_FAILURES = true;
|
public boolean OUTPUT_FAILURES = true;
|
||||||
|
|
||||||
|
|
@ -125,9 +130,20 @@ public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getLocusBases() {
|
public String getLocusBases() {
|
||||||
|
return getLocusBases(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getLocusBases(int locusOffset) {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for(int i=0; i<reads.size(); i++) {
|
for(int i=0; i<reads.size(); i++) {
|
||||||
sb.append(reads.get(i).getReadString().charAt(offsets.get(i)));
|
SAMRecord read = reads.get(i);
|
||||||
|
int readOffset = offsets.get(i);
|
||||||
|
|
||||||
|
int offset = readOffset + locusOffset;
|
||||||
|
if (offset >= 0 && offset < read.getReadString().length()) {
|
||||||
|
char base = read.getReadString().charAt(offset);
|
||||||
|
sb.append(base);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -173,6 +189,71 @@ public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
||||||
return new double[]{refRef, altRef, altAlt};
|
return new double[]{refRef, altRef, altAlt};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private GenotypeLikelihoods getLikelihood(int locusOffset) {
|
||||||
|
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods();
|
||||||
|
|
||||||
|
|
||||||
|
for(int i=0; i<reads.size(); i++) {
|
||||||
|
SAMRecord read = reads.get(i);
|
||||||
|
int readOffset = offsets.get(i);
|
||||||
|
|
||||||
|
int offset = readOffset + locusOffset;
|
||||||
|
if (offset >= 0 && offset < read.getReadString().length()) {
|
||||||
|
|
||||||
|
char base = read.getReadString().charAt(offset);
|
||||||
|
byte qual = read.getBaseQualities()[offset];
|
||||||
|
|
||||||
|
likelihoods.add(refBase, base, qual);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return likelihoods;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double[] getNormalizedProbs(int locusOffset) {
|
||||||
|
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods();
|
||||||
|
|
||||||
|
|
||||||
|
for(int i=0; i<reads.size(); i++) {
|
||||||
|
SAMRecord read = reads.get(i);
|
||||||
|
int readOffset = offsets.get(i);
|
||||||
|
|
||||||
|
int offset = readOffset + locusOffset;
|
||||||
|
if (offset >= 0 && offset < read.getReadString().length()) {
|
||||||
|
|
||||||
|
char base = read.getReadString().charAt(offset);
|
||||||
|
byte qual = read.getBaseQualities()[offset];
|
||||||
|
|
||||||
|
likelihoods.add(refBase, base, qual);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
double[] logLikelihood = likelihoods.likelihoods;
|
||||||
|
double[] nonLogLikelihood = new double[10];
|
||||||
|
double sum = 0;
|
||||||
|
for(int i=0; i<10; i++) {
|
||||||
|
nonLogLikelihood[i] = Math.pow(10, logLikelihood[i]);
|
||||||
|
sum += nonLogLikelihood[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
double[] normalizedProbs = new double[10];
|
||||||
|
for(int i=0; i<10; i++) {
|
||||||
|
normalizedProbs[i] = nonLogLikelihood[i] / sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//quick sanity check
|
||||||
|
// sum=0;
|
||||||
|
// for(int i=0; i<10; i++) {
|
||||||
|
// sum += normalizedProbs[i];
|
||||||
|
// }
|
||||||
|
// System.out.println("normalized probs = " + sum);
|
||||||
|
|
||||||
|
|
||||||
|
return normalizedProbs;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
|
|
@ -272,6 +353,13 @@ public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// (ii) the quality score sum for the mutant base in the normal must be < 50 and the
|
||||||
|
// LOD score for ref:ref vs mutant:ref + mutant:mutant must be at least 2.3.
|
||||||
|
double normalLod = normalReadPile.getRefVsAlt(altAllele);
|
||||||
|
if ( normalReadPile.qualitySums.get(altAllele) > 50 || normalLod < NORMAL_LOD_THRESHOLD) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// make sure we've seen at least 1 obs of the alternate allele within 20bp of the read-middle
|
// make sure we've seen at least 1 obs of the alternate allele within 20bp of the read-middle
|
||||||
boolean failedMidpointCheck = midp.get(altAllele) > 20;
|
boolean failedMidpointCheck = midp.get(altAllele) > 20;
|
||||||
// if (failedMidpointCheck) {
|
// if (failedMidpointCheck) {
|
||||||
|
|
@ -324,14 +412,17 @@ public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: using the original pile here since often these artifacts will be supported
|
// // TODO: using the original pile here since often these artifacts will be supported
|
||||||
// by those reads that get thrown out! Maybe that means we don't need the noise filter...
|
// // by those reads that get thrown out! Maybe that means we don't need the noise filter...
|
||||||
boolean shouldDisalign =
|
// boolean shouldDisalign =
|
||||||
disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);
|
// disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart);
|
||||||
|
MutantFailureReason failureReason =
|
||||||
|
readPileSkew(t2, altAllele, StringUtil.bytesToString(refSeq.getBases()), refStart);
|
||||||
|
|
||||||
if (mode.equals("full") && shouldDisalign) {
|
|
||||||
|
if (mode.equals("full") && failureReason != null) {
|
||||||
if (OUTPUT_FAILURES) {
|
if (OUTPUT_FAILURES) {
|
||||||
String msg = "FAILED due to DISALIGNMENT TEST.";
|
String msg = "FAILED due to " + failureReason.name();
|
||||||
|
|
||||||
out.println(
|
out.println(
|
||||||
context.getContig() + "\t" +
|
context.getContig() + "\t" +
|
||||||
|
|
@ -349,12 +440,6 @@ public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// (ii) the quality score sum for the mutant base in the normal must be < 50 and the
|
|
||||||
// LOD score for ref:ref vs mutant:ref + mutant:mutant must be at least 2.3.
|
|
||||||
double normalLod = normalReadPile.getRefVsAlt(altAllele);
|
|
||||||
if ( normalReadPile.qualitySums.get(altAllele) > 50 || normalLod < NORMAL_LOD_THRESHOLD) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we're still here... we've got a somatic mutation! Output the results
|
// if we're still here... we've got a somatic mutation! Output the results
|
||||||
// and stop looking for mutants!
|
// and stop looking for mutants!
|
||||||
|
|
@ -402,6 +487,73 @@ public class SomaticMutationWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private MutantFailureReason readPileSkew(LocusReadPile pile, char mutantAllele, String reference, long leftmostIndex) {
|
||||||
|
// first split into two piles, those supporting the mutant and those not
|
||||||
|
LocusReadPile mutantPile = new LocusReadPile(mutantAllele);
|
||||||
|
LocusReadPile otherPile = new LocusReadPile(mutantAllele);
|
||||||
|
|
||||||
|
|
||||||
|
for (int i=0; i<pile.reads.size(); i++) {
|
||||||
|
SAMRecord read = pile.reads.get(i);
|
||||||
|
int offset = pile.offsets.get(i);
|
||||||
|
|
||||||
|
if (read.getReadString().charAt(offset) == mutantAllele) {
|
||||||
|
mutantPile.add(read, offset);
|
||||||
|
} else {
|
||||||
|
otherPile.add(read, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// now we can ask questons abot the two piles.
|
||||||
|
|
||||||
|
// e.g. is the mutant allele seen in both strands?
|
||||||
|
boolean seenOnPositive = false;
|
||||||
|
boolean seenOnNegative = false;
|
||||||
|
for(SAMRecord read : mutantPile.reads) {
|
||||||
|
if (read.getReadNegativeStrandFlag()) { seenOnNegative = true; } else { seenOnPositive = true; }
|
||||||
|
}
|
||||||
|
if (!seenOnPositive || !seenOnNegative) {
|
||||||
|
// return MutantFailureReason.StrandImbalance;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//chr17:4979257
|
||||||
|
// are the two alleles distributed differently
|
||||||
|
//fixme: calculate this range properly
|
||||||
|
//fixme: this seems to degrade as you lose reads?
|
||||||
|
//fixme: what should the threshold be here?
|
||||||
|
SortedMap<Integer, Double> skewLodOffsets = new TreeMap<Integer, Double>();
|
||||||
|
|
||||||
|
for(int offset=-76; offset<76; offset++) {
|
||||||
|
// allow for doubletons
|
||||||
|
if (offset >= -1 && offset <= 1 ) { continue; }
|
||||||
|
|
||||||
|
double[] mutantNormProbs = mutantPile.getNormalizedProbs(offset);
|
||||||
|
double[] otherNormProbs = otherPile.getNormalizedProbs(offset);
|
||||||
|
|
||||||
|
double J = 0;
|
||||||
|
for(int i=0; i<10; i++) {
|
||||||
|
J += mutantNormProbs[i] * otherNormProbs[i];
|
||||||
|
}
|
||||||
|
double skewLod = Math.log10( (1-J) / J);
|
||||||
|
|
||||||
|
if (skewLod > SKEW_LOD_THRESHOLD) {
|
||||||
|
// System.out.println( "Offset: " + offset +
|
||||||
|
// " mutant_reads: " + mutantPile.getLocusBases(offset).length() +
|
||||||
|
// " other_reads: " + otherPile.getLocusBases(offset).length() +
|
||||||
|
// " skewLod:" + skewLod );
|
||||||
|
|
||||||
|
skewLodOffsets.put(offset, skewLod);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skewLodOffsets.size() > 0) {
|
||||||
|
return MutantFailureReason.Misalignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
int MAX_READ_MISMATCH_QUALITY_SCORE_SUM = 100;
|
int MAX_READ_MISMATCH_QUALITY_SCORE_SUM = 100;
|
||||||
private LocusReadPile filterHighMismatchScoreReads(LocusReadPile pile, String reference, long leftmostIndex) {
|
private LocusReadPile filterHighMismatchScoreReads(LocusReadPile pile, String reference, long leftmostIndex) {
|
||||||
LocusReadPile newPile = new LocusReadPile(pile.refBase);
|
LocusReadPile newPile = new LocusReadPile(pile.refBase);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue