From 0c46845c92dad370521a7c80ad0c6779c901f019 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 4 Oct 2012 10:37:11 -0400 Subject: [PATCH] Refactored the BaseCounts classes so that they are safer and allow for calculations on the most probable base (which is not necessarily the most common base). --- .../reducereads/BaseAndQualsCounts.java | 15 ++- .../compression/reducereads/BaseCounts.java | 96 ++++++++++++------- .../reducereads/HeaderElement.java | 2 +- .../reducereads/SlidingWindow.java | 10 +- .../reducereads/BaseCountsUnitTest.java | 2 +- 5 files changed, 75 insertions(+), 50 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java index 98a96fbfb..d5afc5722 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java @@ -23,7 +23,7 @@ public class BaseAndQualsCounts extends BaseCounts { } } - public void incr(byte base, byte baseQual, byte insQual, byte delQual) { + public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual) { super.incr(base, baseQual); BaseIndex i = BaseIndex.byteToBase(base); if (i != null) { // do not allow Ns @@ -32,7 +32,7 @@ public class BaseAndQualsCounts extends BaseCounts { } } - public void decr(byte base, byte baseQual, byte insQual, byte delQual) { + public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual) { super.decr(base, baseQual); BaseIndex i = BaseIndex.byteToBase(base); if (i != null) { // do not allow Ns @@ -41,16 +41,15 @@ public class BaseAndQualsCounts extends BaseCounts { } } - public byte averageInsertionQualsOfMostCommonBase() { - return getGenericAverageQualOfMostCommonBase(sumInsertionQuals); + public byte averageInsertionQualsOfBase(final BaseIndex base) { + return getGenericAverageQualOfBase(base, sumInsertionQuals); } - public byte averageDeletionQualsOfMostCommonBase() { - return getGenericAverageQualOfMostCommonBase(sumDeletionQuals); + public byte averageDeletionQualsOfBase(final BaseIndex base) { + return getGenericAverageQualOfBase(base, sumDeletionQuals); } - private byte getGenericAverageQualOfMostCommonBase(Map sumQuals) { - BaseIndex base = BaseIndex.byteToBase(baseWithMostCounts()); + private byte getGenericAverageQualOfBase(final BaseIndex base, final Map sumQuals) { return (byte) (sumQuals.get(base) / getCount(base)); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java index 53c36c3f9..3da2a32c3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java @@ -41,26 +41,26 @@ import java.util.Map; @Requires("other != null") public void add(BaseCounts other) { - for (BaseIndex i : BaseIndex.values()) + for (final BaseIndex i : BaseIndex.values()) counts.put(i, counts.get(i) + other.counts.get(i)); } @Requires("other != null") public void sub(BaseCounts other) { - for (BaseIndex i : BaseIndex.values()) + for (final BaseIndex i : BaseIndex.values()) counts.put(i, counts.get(i) - other.counts.get(i)); } @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1") public void incr(byte base) { - BaseIndex i = BaseIndex.byteToBase(base); + final BaseIndex i = BaseIndex.byteToBase(base); if (i != null) // no Ns counts.put(i, counts.get(i) + 1); } @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1") public void incr(byte base, byte qual) { - BaseIndex i = BaseIndex.byteToBase(base); + final BaseIndex i = BaseIndex.byteToBase(base); if (i != null) { // no Ns counts.put(i, counts.get(i) + 1); sumQuals.put(i, sumQuals.get(i) + qual); @@ -69,14 +69,14 @@ import java.util.Map; @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1") public void decr(byte base) { - BaseIndex i = BaseIndex.byteToBase(base); + final BaseIndex i = BaseIndex.byteToBase(base); if (i != null) // no Ns counts.put(i, counts.get(i) - 1); } @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1") public void decr(byte base, byte qual) { - BaseIndex i = BaseIndex.byteToBase(base); + final BaseIndex i = BaseIndex.byteToBase(base); if (i != null) { // no Ns counts.put(i, counts.get(i) - 1); sumQuals.put(i, sumQuals.get(i) - qual); @@ -84,52 +84,48 @@ import java.util.Map; } @Ensures("result >= 0") - public int getCount(byte base) { + public int getCount(final byte base) { return getCount(BaseIndex.byteToBase(base)); } @Ensures("result >= 0") - public int getCount(BaseIndex base) { + public int getCount(final BaseIndex base) { return counts.get(base); } @Ensures("result >= 0") - public long getSumQuals(byte base) { + public long getSumQuals(final byte base) { return getSumQuals(BaseIndex.byteToBase(base)); } @Ensures("result >= 0") - public long getSumQuals(BaseIndex base) { + public long getSumQuals(final BaseIndex base) { return sumQuals.get(base); } @Ensures("result >= 0") - public byte averageQuals(byte base) { + public byte averageQuals(final byte base) { return (byte) (getSumQuals(base) / getCount(base)); } @Ensures("result >= 0") - public byte averageQuals(BaseIndex base) { + public byte averageQuals(final BaseIndex base) { return (byte) (getSumQuals(base) / getCount(base)); } - public byte baseWithMostCounts() { - return baseIndexWithMostCounts().getByte(); + @Ensures("result >= 0") + public int countOfBase(final BaseIndex base) { + return counts.get(base); } @Ensures("result >= 0") - public int countOfMostCommonBase() { - return counts.get(baseIndexWithMostCounts()); + public long sumQualsOfBase(final BaseIndex base) { + return sumQuals.get(base); } @Ensures("result >= 0") - public long sumQualsOfMostCommonBase() { - return sumQuals.get(baseIndexWithMostCounts()); - } - - @Ensures("result >= 0") - public byte averageQualsOfMostCommonBase() { - return (byte) (sumQualsOfMostCommonBase() / countOfMostCommonBase()); + public byte averageQualsOfBase(final BaseIndex base) { + return (byte) (sumQualsOfBase(base) / countOfBase(base)); } @@ -149,7 +145,7 @@ import java.util.Map; * @return the proportion of this base over all other bases */ @Ensures({"result >=0.0", "result<= 1.0"}) - public double baseCountProportion(byte base) { + public double baseCountProportion(final byte base) { return (double) counts.get(BaseIndex.byteToBase(base)) / totalCount(); } @@ -160,7 +156,7 @@ import java.util.Map; * @return the proportion of this base over all other bases */ @Ensures({"result >=0.0", "result<= 1.0"}) - public double baseCountProportion(BaseIndex baseIndex) { + public double baseCountProportion(final BaseIndex baseIndex) { int total = totalCount(); if (total == 0) return 0.0; @@ -177,22 +173,28 @@ import java.util.Map; return b.toString(); } + public byte baseWithMostCounts() { + return baseIndexWithMostCounts().getByte(); + } + @Ensures("result != null") public BaseIndex baseIndexWithMostCounts() { BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; - for (BaseIndex i : counts.keySet()) - if (hasHigherCount(i, maxI)) - maxI = i; + for (Map.Entry entry : counts.entrySet()) { + if (entry.getValue() > counts.get(maxI)) + maxI = entry.getKey(); + } return maxI; } @Ensures("result != null") public BaseIndex baseIndexWithMostCountsWithoutIndels() { - BaseIndex mostCounts = MAX_BASE_INDEX_WITH_NO_COUNTS; - for (BaseIndex index : counts.keySet()) - if (index.isNucleotide() && hasHigherCount(index, mostCounts)) - mostCounts = index; - return mostCounts; + BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; + for (Map.Entry entry : counts.entrySet()) { + if (entry.getKey().isNucleotide() && entry.getValue() > counts.get(maxI)) + maxI = entry.getKey(); + } + return maxI; } private boolean hasHigherCount(final BaseIndex targetIndex, final BaseIndex testIndex) { @@ -201,6 +203,30 @@ import java.util.Map; return ( targetCount > testCount || (targetCount == testCount && sumQuals.get(targetIndex) > sumQuals.get(testIndex)) ); } + public byte baseWithMostProbability() { + return baseIndexWithMostProbability().getByte(); + } + + @Ensures("result != null") + public BaseIndex baseIndexWithMostProbability() { + BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; + for (Map.Entry entry : sumQuals.entrySet()) { + if (entry.getValue() > sumQuals.get(maxI)) + maxI = entry.getKey(); + } + return maxI; + } + + @Ensures("result != null") + public BaseIndex baseIndexWithMostProbabilityWithoutIndels() { + BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; + for (Map.Entry entry : sumQuals.entrySet()) { + if (entry.getKey().isNucleotide() && entry.getValue() > sumQuals.get(maxI)) + maxI = entry.getKey(); + } + return maxI; + } + @Ensures("result >=0") public int totalCountWithoutIndels() { int sum = 0; @@ -218,8 +244,8 @@ import java.util.Map; */ @Requires("index.isNucleotide()") @Ensures({"result >=0.0", "result<= 1.0"}) - public double baseCountProportionWithoutIndels(BaseIndex index) { - int total = totalCountWithoutIndels(); + public double baseCountProportionWithoutIndels(final BaseIndex index) { + final int total = totalCountWithoutIndels(); if (total == 0) return 0.0; return (double) counts.get(index) / totalCountWithoutIndels(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java index 3fc438b19..0c1854ad1 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java @@ -182,7 +182,7 @@ public class HeaderElement { * @return whether or not the HeaderElement is variant due to excess insertions */ private boolean isVariantFromMismatches(double minVariantProportion) { - BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostCountsWithoutIndels(); + BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels(); double mostCommonProportion = consensusBaseCounts.baseCountProportionWithoutIndels(mostCommon); return mostCommonProportion != 0.0 && mostCommonProportion < (1 - minVariantProportion); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 6c588898c..00e4d12c6 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -472,11 +472,11 @@ public class SlidingWindow { * @param rms the rms mapping quality in the header element */ private void genericAddBaseToConsensus(SyntheticRead syntheticRead, BaseAndQualsCounts baseCounts, double rms) { - BaseIndex base = baseCounts.baseIndexWithMostCounts(); - byte count = (byte) Math.min(baseCounts.countOfMostCommonBase(), Byte.MAX_VALUE); - byte qual = baseCounts.averageQualsOfMostCommonBase(); - byte insQual = baseCounts.averageInsertionQualsOfMostCommonBase(); - byte delQual = baseCounts.averageDeletionQualsOfMostCommonBase(); + BaseIndex base = baseCounts.baseIndexWithMostProbability(); + byte count = (byte) Math.min(baseCounts.countOfBase(base), Byte.MAX_VALUE); + byte qual = baseCounts.averageQualsOfBase(base); + byte insQual = baseCounts.averageInsertionQualsOfBase(base); + byte delQual = baseCounts.averageDeletionQualsOfBase(base); syntheticRead.add(base, count, qual, insQual, delQual, rms); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java index a8707641a..3e5cbf0e8 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java @@ -63,7 +63,7 @@ public class BaseCountsUnitTest extends BaseTest { String name = String.format("Test-%s", params.bases); Assert.assertEquals(counts.totalCount(), params.bases.length(), name); - Assert.assertEquals(counts.countOfMostCommonBase(), params.mostCommonCount, name); + Assert.assertEquals(counts.countOfBase(counts.baseIndexWithMostCounts()), params.mostCommonCount, name); Assert.assertEquals((char)counts.baseWithMostCounts(), (char)params.mostCountBase, name); } } \ No newline at end of file