From 6a25f0b9c58ab3fa0e0509cca208671b7182e9bb Mon Sep 17 00:00:00 2001 From: kcibul Date: Thu, 2 Jul 2009 14:37:54 +0000 Subject: [PATCH] refactored into new package git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1158 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/cancer/LocusReadPile.java | 184 ++++++++++++++ .../gatk/walkers/cancer/QualitySums.java | 58 +++++ .../{ => cancer}/SomaticCoverageWalker.java | 2 +- .../{ => cancer}/SomaticMutationWalker.java | 229 ++---------------- 4 files changed, 260 insertions(+), 213 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LocusReadPile.java create mode 100644 java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/QualitySums.java rename java/src/org/broadinstitute/sting/playground/gatk/walkers/{ => cancer}/SomaticCoverageWalker.java (98%) rename java/src/org/broadinstitute/sting/playground/gatk/walkers/{ => cancer}/SomaticMutationWalker.java (81%) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LocusReadPile.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LocusReadPile.java new file mode 100644 index 000000000..e4d25fb0c --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/LocusReadPile.java @@ -0,0 +1,184 @@ +package org.broadinstitute.sting.playground.gatk.walkers.cancer; + +import net.sf.samtools.SAMRecord; + +import java.util.List; +import java.util.ArrayList; + +import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; + +/** + * Created by IntelliJ IDEA. + * User: kcibul + * Date: Jun 27, 2009 + * Time: 3:41:59 PM + * To change this template use File | Settings | File Templates. + */ +public class LocusReadPile { + public List reads = new ArrayList(); + public List offsets = new ArrayList(); + public char refBase; + public int minQualityScore; + public GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); + public QualitySums qualitySums = new QualitySums(); + + public LocusReadPile(char refBase) { + this(refBase, 0); + } + + public LocusReadPile(char refBase, int minQualityScore) { + this.refBase = refBase; + this.minQualityScore = minQualityScore; + } + + public void add(SAMRecord read, int offset) { + char base = read.getReadString().charAt(offset); + byte qual = read.getBaseQualities()[offset]; + + if (base == 'N' || base == 'n') { return; } + + reads.add(read); + offsets.add(offset); + + + likelihoods.add(refBase, base, qual); + if (qual > this.minQualityScore) qualitySums.incrementSum(base, qual); + + } + + public String getLocusBases() { + return getLocusBases(0); + } + + public String getLocusBases(int locusOffset) { + StringBuilder sb = new StringBuilder(); + for(int i=0; i= 0 && offset < read.getReadString().length()) { + char base = read.getReadString().charAt(offset); + sb.append(base); + } + } + return sb.toString(); + } + + public double getAltVsRef(char altAllele) { + double[] tumorRefAlt = extractRefAlt(this.likelihoods, this.refBase, altAllele); + double tumorLod = Math.log10(tumorRefAlt[1] + tumorRefAlt[2]) - tumorRefAlt[0]; + return tumorLod; + } + + public double getRefVsAlt(char altAllele) { + double[] refAlt = extractRefAlt(this.likelihoods, this.refBase, altAllele); + double normalLod = refAlt[0] - Math.log10(refAlt[1] + refAlt[2]); + return normalLod; + } + + /** + * Extract the LOD comparing ref:ref to ref:alt and alt:alt + */ + private double[] extractRefAlt(GenotypeLikelihoods gl, char ref, char altAllele) { + double refRef = 0; + double altRef = 0; + double altAlt = 0; + for(int j=0; j<10; j++) { + String gt = gl.genotypes[j]; + double likelihood = gl.likelihoods[j]; + + // the ref:mutant theory + if ( (gt.charAt(0) == ref && gt.charAt(1) == altAllele) || + (gt.charAt(0) == altAllele && gt.charAt(1) == ref) ) { + altRef += Math.pow(10, likelihood); + } + + if ( gt.charAt(0) == altAllele && gt.charAt(1) == altAllele) { + altAlt += Math.pow(10, likelihood); + } + + if ( gt.charAt(0) == ref && gt.charAt(1) == ref) { + refRef = likelihood; + } + + } + return new double[]{refRef, altRef, altAlt}; + } + + private GenotypeLikelihoods getLikelihood(int locusOffset) { + GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); + + + for(int i=0; i= 0 && offset < read.getReadString().length()) { + + char base = read.getReadString().charAt(offset); + byte qual = read.getBaseQualities()[offset]; + + likelihoods.add(refBase, base, qual); + } + + } + return likelihoods; + } + + public double[] getNormalizedProbs(int locusOffset, int qThreshold) { + GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); + + + // FIXME: gaddy suggested this "correction" which evidently has a name... + //likelihoods.add(refBase, refBase, (byte) 20); + + for(int i=0; i= 0 && offset < read.getReadString().length()) { + + char base = read.getReadString().charAt(offset); + byte qual = read.getBaseQualities()[offset]; + + if (qual >= qThreshold) { + likelihoods.add(refBase, base, qual); +// if (locusOffset == -43) System.out.println("\tUSING " + base + " " + qual); + } else { +// if (locusOffset == -43) System.out.println("\tDropping " + base + " " + qual); + } + } + + } + + + double[] logLikelihood = likelihoods.likelihoods; + double[] nonLogLikelihood = new double[10]; + double sum = 0; + for(int i=0; i<10; i++) { + nonLogLikelihood[i] = Math.pow(10, logLikelihood[i]); + sum += nonLogLikelihood[i]; + } + + double[] normalizedProbs = new double[10]; + for(int i=0; i<10; i++) { + normalizedProbs[i] = nonLogLikelihood[i] / sum; + } + + + //quick sanity check +// sum=0; +// for(int i=0; i<10; i++) { +// sum += normalizedProbs[i]; +// } +// System.out.println("normalized probs = " + sum); + + + return normalizedProbs; + } + + +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/QualitySums.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/QualitySums.java new file mode 100644 index 000000000..714389560 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/QualitySums.java @@ -0,0 +1,58 @@ +package org.broadinstitute.sting.playground.gatk.walkers.cancer; + +/** + * Created by IntelliJ IDEA. + * User: kcibul + * Date: Jun 27, 2009 + * Time: 3:43:29 PM + * To change this template use File | Settings | File Templates. + */ +public class QualitySums { + private int a = 0; + private int c = 0; + private int g = 0; + private int t = 0; + private int aCounts = 0; + private int cCounts = 0; + private int gCounts = 0; + private int tCounts = 0; + + public int getQualitySum(final char base) { + if (base == 'a' || base == 'A') { return a; } + if (base == 'c' || base == 'C') { return c; } + if (base == 'g' || base == 'G') { return g; } + if (base == 't' || base == 'T') { return t; } + throw new RuntimeException("Unknown base: " + base); + } + + public int getCounts(final char base) { + if (base == 'a' || base == 'A') { return aCounts; } + if (base == 'c' || base == 'C') { return cCounts; } + if (base == 'g' || base == 'G') { return gCounts; } + if (base == 't' || base == 'T') { return tCounts; } + throw new RuntimeException("Unknown base: " + base); + } + + public void incrementSum(final char base, final byte qual) { + if (base == 'a' || base == 'A') { a += qual; aCounts++;} + else if (base == 'c' || base == 'C') { c += qual; cCounts++;} + else if (base == 'g' || base == 'G') { g += qual; gCounts++; } + else if (base == 't' || base == 'T') { t += qual; tCounts++; } + else throw new RuntimeException("Unknown base: " + base); + } + + public int getOtherQualities(final char base) { + int total = a + c + g + t; + if (base == 'a' || base == 'A') { return total-a; } + else if (base == 'c' || base == 'C') { return total-c; } + else if (base == 'g' || base == 'G') { return total-g; } + else if (base == 't' || base == 'T') { return total-t; } + else throw new RuntimeException("Unknown base: " + base); + } + + public void reset() { + a = 0; c = 0; g = 0; t = 0; + aCounts = 0; cCounts = 0; gCounts = 0; tCounts = 0; + } +} + diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticCoverageWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticCoverageWalker.java similarity index 98% rename from java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticCoverageWalker.java rename to java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticCoverageWalker.java index 329641316..005fe1229 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticCoverageWalker.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.playground.gatk.walkers; +package org.broadinstitute.sting.playground.gatk.walkers.cancer; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticMutationWalker.java similarity index 81% rename from java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java rename to java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticMutationWalker.java index eabbaf7cc..046da3d3a 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SomaticMutationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/cancer/SomaticMutationWalker.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.playground.gatk.walkers; +package org.broadinstitute.sting.playground.gatk.walkers.cancer; import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.Cigar; @@ -27,42 +27,6 @@ public class SomaticMutationWalker extends LocusWalker { StrandImbalance, Misalignment } - protected static class QualitySums { - private int a = 0; - private int c = 0; - private int g = 0; - private int t = 0; - - public int get(final char base) { - if (base == 'a' || base == 'A') { return a; } - if (base == 'c' || base == 'C') { return c; } - if (base == 'g' || base == 'G') { return g; } - if (base == 't' || base == 'T') { return t; } - throw new RuntimeException("Unknown base: " + base); - } - - public void incrementSum(final char base, final byte qual) { - if (base == 'a' || base == 'A') { a += qual; } - else if (base == 'c' || base == 'C') { c += qual; } - else if (base == 'g' || base == 'G') { g += qual; } - else if (base == 't' || base == 'T') { t += qual; } - else throw new RuntimeException("Unknown base: " + base); - } - - public int getOtherQualities(final char base) { - int total = a + c + g + t; - if (base == 'a' || base == 'A') { return total-a; } - else if (base == 'c' || base == 'C') { return total-c; } - else if (base == 'g' || base == 'G') { return total-g; } - else if (base == 't' || base == 'T') { return total-t; } - else throw new RuntimeException("Unknown base: " + base); - } - - public void reset() { - a = 0; c = 0; g = 0; t = 0; - } - } - @Argument(fullName = "tumor_sample_name", shortName = "s1", required = true, doc="Name of the tumor sample") public String tumorSampleName; @@ -103,168 +67,6 @@ public class SomaticMutationWalker extends LocusWalker { public static int MIN_QSCORE = 13; - private static class LocusReadPile { - private char refBase; - public List reads = new ArrayList(); - public List offsets = new ArrayList(); - public GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); - public QualitySums qualitySums = new QualitySums(); - - public LocusReadPile(char refBase) { - this.refBase = refBase; - } - - public void add(SAMRecord read, int offset) { - char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - - if (base == 'N' || base == 'n') { return; } - - reads.add(read); - offsets.add(offset); - - - likelihoods.add(refBase, base, qual); - if (qual > MIN_QSCORE) qualitySums.incrementSum(base, qual); - - } - - public String getLocusBases() { - return getLocusBases(0); - } - - public String getLocusBases(int locusOffset) { - StringBuilder sb = new StringBuilder(); - for(int i=0; i= 0 && offset < read.getReadString().length()) { - char base = read.getReadString().charAt(offset); - sb.append(base); - } - } - return sb.toString(); - } - - public double getAltVsRef(char altAllele) { - double[] tumorRefAlt = extractRefAlt(this.likelihoods, this.refBase, altAllele); - double tumorLod = Math.log10(tumorRefAlt[1] + tumorRefAlt[2]) - tumorRefAlt[0]; - return tumorLod; - } - - public double getRefVsAlt(char altAllele) { - double[] refAlt = extractRefAlt(this.likelihoods, this.refBase, altAllele); - double normalLod = refAlt[0] - Math.log10(refAlt[1] + refAlt[2]); - return normalLod; - } - - /** - * Extract the LOD comparing ref:ref to ref:alt and alt:alt - */ - private double[] extractRefAlt(GenotypeLikelihoods gl, char ref, char altAllele) { - double refRef = 0; - double altRef = 0; - double altAlt = 0; - for(int j=0; j<10; j++) { - String gt = gl.genotypes[j]; - double likelihood = gl.likelihoods[j]; - - // the ref:mutant theory - if ( (gt.charAt(0) == ref && gt.charAt(1) == altAllele) || - (gt.charAt(0) == altAllele && gt.charAt(1) == ref) ) { - altRef += Math.pow(10, likelihood); - } - - if ( gt.charAt(0) == altAllele && gt.charAt(1) == altAllele) { - altAlt += Math.pow(10, likelihood); - } - - if ( gt.charAt(0) == ref && gt.charAt(1) == ref) { - refRef = likelihood; - } - - } - return new double[]{refRef, altRef, altAlt}; - } - - private GenotypeLikelihoods getLikelihood(int locusOffset) { - GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); - - - for(int i=0; i= 0 && offset < read.getReadString().length()) { - - char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - - likelihoods.add(refBase, base, qual); - } - - } - return likelihoods; - } - - public double[] getNormalizedProbs(int locusOffset, int qThreshold) { - GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(); - - - // FIXME: gaddy suggested this "correction" which evidently has a name... - //likelihoods.add(refBase, refBase, (byte) 20); - - for(int i=0; i= 0 && offset < read.getReadString().length()) { - - char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - - if (qual >= qThreshold) { - likelihoods.add(refBase, base, qual); -// if (locusOffset == -43) System.out.println("\tUSING " + base + " " + qual); - } else { -// if (locusOffset == -43) System.out.println("\tDropping " + base + " " + qual); - } - } - - } - - - double[] logLikelihood = likelihoods.likelihoods; - double[] nonLogLikelihood = new double[10]; - double sum = 0; - for(int i=0; i<10; i++) { - nonLogLikelihood[i] = Math.pow(10, logLikelihood[i]); - sum += nonLogLikelihood[i]; - } - - double[] normalizedProbs = new double[10]; - for(int i=0; i<10; i++) { - normalizedProbs[i] = nonLogLikelihood[i] / sum; - } - - - //quick sanity check -// sum=0; -// for(int i=0; i<10; i++) { -// sum += normalizedProbs[i]; -// } -// System.out.println("normalized probs = " + sum); - - - return normalizedProbs; - } - - } - public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) { Map midp = new HashMap(); midp.put('A', Integer.MAX_VALUE); @@ -291,8 +93,8 @@ public class SomaticMutationWalker extends LocusWalker { List reads = context.getReads(); - LocusReadPile tumorReadPile = new LocusReadPile(upRef); - LocusReadPile normalReadPile = new LocusReadPile(upRef); + LocusReadPile tumorReadPile = new LocusReadPile(upRef,MIN_QSCORE); + LocusReadPile normalReadPile = new LocusReadPile(upRef,MIN_QSCORE); for ( int i = 0; i < reads.size(); i++ ) { @@ -350,8 +152,8 @@ public class SomaticMutationWalker extends LocusWalker { // (i) either an adjusted quality score sum in the tumor for the mutant base must be // at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must // be at least 6.3; - int mutantSum = tumorReadPile.qualitySums.get(altAllele); - int refSum = tumorReadPile.qualitySums.get(upRef); + int mutantSum = tumorReadPile.qualitySums.getQualitySum(altAllele); + int refSum = tumorReadPile.qualitySums.getQualitySum(upRef); if (tumorReadPile.getAltVsRef(altAllele) >= TUMOR_LOD_THRESHOLD // || @@ -365,7 +167,10 @@ public class SomaticMutationWalker extends LocusWalker { // (ii) the quality score sum for the mutant base in the normal must be < 50 and the // LOD score for ref:ref vs mutant:ref + mutant:mutant must be at least 2.3. double normalLod = normalReadPile.getRefVsAlt(altAllele); - if ( normalReadPile.qualitySums.get(altAllele) > 50 || normalLod < NORMAL_LOD_THRESHOLD) { + QualitySums normQs = normalReadPile.qualitySums; + + if ( (normQs.getCounts(altAllele) >= 2 && normQs.getQualitySum(altAllele) > 20) + || normalLod < NORMAL_LOD_THRESHOLD) { continue; } @@ -402,14 +207,14 @@ public class SomaticMutationWalker extends LocusWalker { // at least 100 or the LOD score for mutant:ref + mutant:mutant vs ref:ref must // be at least 6.3; double tumorLod = t2.getAltVsRef(altAllele); - if (mode.equals("full") && t2.qualitySums.get(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD_THRESHOLD) { + if (mode.equals("full") && t2.qualitySums.getQualitySum(altAllele) < MIN_MUTANT_SUM && tumorLod < TUMOR_LOD_THRESHOLD) { if (OUTPUT_FAILURES) { String msg = "FAILED due to MAX MM QSCORE TEST." + " LOD was " + tumorReadPile.getAltVsRef(altAllele) + " LOD is now " + t2.getAltVsRef(altAllele) + - " QSUM was " + tumorReadPile.qualitySums.get(altAllele) + - " QSUM is now " + t2.qualitySums.get(altAllele); + " QSUM was " + tumorReadPile.qualitySums.getQualitySum(altAllele) + + " QSUM is now " + t2.qualitySums.getQualitySum(altAllele); out.println( context.getContig() + "\t" + @@ -458,11 +263,11 @@ public class SomaticMutationWalker extends LocusWalker { String msg = (failedMidpointCheck?"__FAILED-MPCHECK":"") + "TScore:" + tumorLod + - "__TRefSum: " + tumorReadPile.qualitySums.get(upRef) + - "__TAltSum: " + tumorReadPile.qualitySums.get(altAllele) + + "__TRefSum: " + tumorReadPile.qualitySums.getQualitySum(upRef) + + "__TAltSum: " + tumorReadPile.qualitySums.getQualitySum(altAllele) + "__NScore:" + normalLod + - "__NRefSum: " + normalReadPile.qualitySums.get(upRef) + - "__NAltSum: " + normalReadPile.qualitySums.get(altAllele) + + "__NRefSum: " + normalReadPile.qualitySums.getQualitySum(upRef) + + "__NAltSum: " + normalReadPile.qualitySums.getQualitySum(altAllele) + "__maxSkewLod_" + failureReason.second + "_" + "__MIDP: " + midp.get(altAllele); @@ -528,7 +333,7 @@ public class SomaticMutationWalker extends LocusWalker { if (read.getReadNegativeStrandFlag()) { seenOnNegative = true; } else { seenOnPositive = true; } } if (!seenOnPositive || !seenOnNegative) { -// return MutantFailureReason.StrandImbalance; + return new Pair(MutantFailureReason.StrandImbalance, 0d); }