From 3b24264c2b8de18a0d9c00a79f55ec3df9107ece Mon Sep 17 00:00:00 2001 From: kcibul Date: Thu, 25 Jun 2009 16:01:07 +0000 Subject: [PATCH] incorporating skew check, further output of metrics git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1094 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/SomaticMutationWalker.java | 86 +++++++++++++------ 1 file changed, 58 insertions(+), 28 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java index 4b31bafa6..d40c55711 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java @@ -22,7 +22,7 @@ import java.io.IOException; import java.util.*; @By(DataSource.REFERENCE) -public class SomaticMutationWalker extends LocusWalker { +public class SomaticMutationWalker extends LocusWalker { protected enum MutantFailureReason { StrandImbalance, Misalignment @@ -210,10 +210,13 @@ public class SomaticMutationWalker extends LocusWalker { return likelihoods; } - public double[] getNormalizedProbs(int locusOffset) { + public double[] getNormalizedProbs(int locusOffset, int qThreshold) { GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); + // FIXME: gaddy suggested this "correction" which evidently has a name... + //likelihoods.add(refBase, refBase, (byte) 20); + for(int i=0; i { char base = read.getReadString().charAt(offset); byte qual = read.getBaseQualities()[offset]; - likelihoods.add(refBase, base, qual); + if (qual >= qThreshold) { + likelihoods.add(refBase, base, qual); +// if (locusOffset == -43) System.out.println("\tUSING " + base + " " + qual); + } else { +// if (locusOffset == -43) System.out.println("\tDropping " + base + " " + qual); + } } } + double[] logLikelihood = likelihoods.likelihoods; double[] nonLogLikelihood = new double[10]; double sum = 0; @@ -282,8 +291,8 @@ public class SomaticMutationWalker extends LocusWalker { List reads = context.getReads(); - LocusReadPile tumorReadPile = new LocusReadPile(ref); - LocusReadPile normalReadPile = new LocusReadPile(ref); + LocusReadPile tumorReadPile = new LocusReadPile(upRef); + LocusReadPile normalReadPile = new LocusReadPile(upRef); for ( int i = 0; i < reads.size(); i++ ) { @@ -327,7 +336,7 @@ public class SomaticMutationWalker extends LocusWalker { } // pretest: if the sum of the quality scores for all non-ref alleles < 60, just quit looking now - if (tumorReadPile.qualitySums.getOtherQualities(ref) < MIN_MUTANT_SUM_PRETEST) { + if (tumorReadPile.qualitySums.getOtherQualities(upRef) < MIN_MUTANT_SUM_PRETEST) { return -1; } @@ -342,7 +351,7 @@ public class SomaticMutationWalker extends LocusWalker { // at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must // be at least 6.3; int mutantSum = tumorReadPile.qualitySums.get(altAllele); - int refSum = tumorReadPile.qualitySums.get(ref); + int refSum = tumorReadPile.qualitySums.get(upRef); if (tumorReadPile.getAltVsRef(altAllele) >= TUMOR_LOD_THRESHOLD // || @@ -416,19 +425,22 @@ public class SomaticMutationWalker extends LocusWalker { // // by those reads that get thrown out! Maybe that means we don't need the noise filter... // boolean shouldDisalign = // disaligner(context.getPosition(), tumorReadPile, StringUtil.bytesToString(refSeq.getBases()), refStart); - MutantFailureReason failureReason = - readPileSkew(t2, altAllele, StringUtil.bytesToString(refSeq.getBases()), refStart); + Pair failureReason = + readPileSkew(t2, upRef, altAllele, StringUtil.bytesToString(refSeq.getBases()), refStart); - if (mode.equals("full") && failureReason != null) { + if (mode.equals("full") && failureReason.first != null) { if (OUTPUT_FAILURES) { - String msg = "FAILED due to " + failureReason.name(); + String msg = "FAILED due to " + failureReason.first.name() + + "mutAllele_" + altAllele + "_" + + "maxSkewLod_" + failureReason.second + ; out.println( context.getContig() + "\t" + context.getPosition() + "\t" + - context.getPosition() + "\t" - + msg.replace(' ','_') + context.getPosition() + "\t" + + msg.replace(' ','_') ); } @@ -446,11 +458,12 @@ public class SomaticMutationWalker extends LocusWalker { String msg = (failedMidpointCheck?"__FAILED-MPCHECK":"") + "TScore:" + tumorLod + - "__TRefSum: " + tumorReadPile.qualitySums.get(ref) + + "__TRefSum: " + tumorReadPile.qualitySums.get(upRef) + "__TAltSum: " + tumorReadPile.qualitySums.get(altAllele) + "__NScore:" + normalLod + - "__NRefSum: " + normalReadPile.qualitySums.get(ref) + + "__NRefSum: " + normalReadPile.qualitySums.get(upRef) + "__NAltSum: " + normalReadPile.qualitySums.get(altAllele) + + "__maxSkewLod_" + failureReason.second + "_" + "__MIDP: " + midp.get(altAllele); out.println( @@ -487,10 +500,10 @@ public class SomaticMutationWalker extends LocusWalker { - private MutantFailureReason readPileSkew(LocusReadPile pile, char mutantAllele, String reference, long leftmostIndex) { + private Pair readPileSkew(LocusReadPile pile, char refAllele, char mutantAllele, String reference, long leftmostIndex) { // first split into two piles, those supporting the mutant and those not LocusReadPile mutantPile = new LocusReadPile(mutantAllele); - LocusReadPile otherPile = new LocusReadPile(mutantAllele); + LocusReadPile refPile = new LocusReadPile(mutantAllele); for (int i=0; i { if (read.getReadString().charAt(offset) == mutantAllele) { mutantPile.add(read, offset); + } else if (read.getReadString().charAt(offset) == refAllele) { + refPile.add(read, offset); } else { - otherPile.add(read, offset); + // just drop the read... } } @@ -523,13 +538,15 @@ public class SomaticMutationWalker extends LocusWalker { //fixme: this seems to degrade as you lose reads? //fixme: what should the threshold be here? SortedMap skewLodOffsets = new TreeMap(); + double maxSkewLod = 0; for(int offset=-76; offset<76; offset++) { // allow for doubletons if (offset >= -1 && offset <= 1 ) { continue; } - double[] mutantNormProbs = mutantPile.getNormalizedProbs(offset); - double[] otherNormProbs = otherPile.getNormalizedProbs(offset); + int SKEW_QSCORE_THRESHOLD = 10; + double[] mutantNormProbs = mutantPile.getNormalizedProbs(offset, SKEW_QSCORE_THRESHOLD); + double[] otherNormProbs = refPile.getNormalizedProbs(offset, SKEW_QSCORE_THRESHOLD); double J = 0; for(int i=0; i<10; i++) { @@ -537,21 +554,34 @@ public class SomaticMutationWalker extends LocusWalker { } double skewLod = Math.log10( (1-J) / J); - if (skewLod > SKEW_LOD_THRESHOLD) { -// System.out.println( "Offset: " + offset + -// " mutant_reads: " + mutantPile.getLocusBases(offset).length() + -// " other_reads: " + otherPile.getLocusBases(offset).length() + -// " skewLod:" + skewLod ); - skewLodOffsets.put(offset, skewLod); + int mutantReadCounts = mutantPile.getLocusBases(offset).length(); + int otherReadCounts = refPile.getLocusBases(offset).length(); + if (mutantReadCounts >= 2 && otherReadCounts >= 2) { + + if (skewLod > maxSkewLod) { maxSkewLod = skewLod; } + if (skewLod > SKEW_LOD_THRESHOLD) { + + // fixme: is it legit to require that we see it in at least 2 reads each? +// System.out.println( "Offset: " + offset + +// " mutant_reads: " + mutantReadCounts + +// " mutant bases: " + mutantPile.getLocusBases(offset) + +// " other_reads: " + otherReadCounts + +// " other bases: " + refPile.getLocusBases(offset) + +// " skewLod:" + skewLod ); + + skewLodOffsets.put(offset, skewLod); + } } } if (skewLodOffsets.size() > 0) { - return MutantFailureReason.Misalignment; + return new Pair(MutantFailureReason.Misalignment, maxSkewLod); + } else { + return new Pair(null, maxSkewLod); +// System.out.println("MAX SKEW: " + maxSkewLod); } - return null; } int MAX_READ_MISMATCH_QUALITY_SCORE_SUM = 100;