From 6d22485a4cd3fa715a66c330b176ede0a42017b4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 5 Apr 2013 13:50:03 -0400 Subject: [PATCH] Critical bugfix to ReduceRead functionality of the GATKSAMRecord -- The function getReducedCounts() was returning the undecoded reduced read tag, which looks like [10, 5, -1, -5] when the depths were [10, 15, 9, 5]. The only function that actually gave the real counts was getReducedCount(int i) which did the proper decoding. Now GATKSAMRecord decodes the tag into the proper depths vector so that getReduceCounts() returns what one reasonably expects it to, and getReduceCount(i) merely looks up the value at i. Added unit test to ensure this behavior going forward. -- Changed the name of setReducedCounts() to setReducedCountsTag as this function assumes that counts have already been encoded in the tag way. --- .../reducereads/SyntheticRead.java | 2 +- .../sting/utils/sam/GATKSAMRecord.java | 61 +++++++++++++++++-- .../utils/sam/GATKSAMRecordUnitTest.java | 8 +++ 3 files changed, 64 insertions(+), 7 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java index b1ac19f50..ae4366768 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java @@ -235,7 +235,7 @@ public class SyntheticRead { read.setReadBases(convertReadBases()); read.setMappingQuality((int) Math.ceil(mappingQuality / basesCountsQuals.size())); read.setReadGroup(readGroupRecord); - read.setReducedReadCounts(convertBaseCounts()); + read.setReducedReadCountsTag(convertBaseCounts()); if (hasIndelQualities) { read.setBaseQualities(convertInsertionQualities(), EventType.BASE_INSERTION); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 01f39a67b..0e672b3d7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -345,24 +345,50 @@ public class GATKSAMRecord extends BAMRecord { // *** ReduceReads functions ***// /////////////////////////////////////////////////////////////////////////////// + /** + * Get the counts of the bases in this reduced read + * + * NOTE that this is not the value of the REDUCED_READ_CONSENSUS_TAG, which + * is encoded in a special way. This is the actual positive counts of the + * depth at each bases. So for a RR with a tag of: + * + * [10, 5, -1, -5] + * + * this function returns + * + * [10, 15, 9, 5] + * + * as one might expect. + * + * @return a byte[] holding the depth of the bases in this reduced read, or null if this isn't a reduced read + */ public byte[] getReducedReadCounts() { if ( ! retrievedReduceReadCounts ) { - reducedReadCounts = getByteArrayAttribute(REDUCED_READ_CONSENSUS_TAG); + final byte[] tag = getByteArrayAttribute(REDUCED_READ_CONSENSUS_TAG); + if ( tag != null ) reducedReadCounts = decodeReadReadCounts(tag); retrievedReduceReadCounts = true; } return reducedReadCounts; } + /** + * Is this read a reduced read? + * @return true if yes + */ public boolean isReducedRead() { return getReducedReadCounts() != null; } /** - * Set the reduced read counts for this record to counts + * Set the reduced read counts tag for this record to counts + * + * WARNING -- this function assumes that counts is encoded as a difference in value count + * of count[i] - count[0]. It is not a straight counting of the bases in the read. + * * @param counts the count array */ - public void setReducedReadCounts(final byte[] counts) { + public void setReducedReadCountsTag(final byte[] counts) { retrievedReduceReadCounts = false; setAttribute(REDUCED_READ_CONSENSUS_TAG, counts); } @@ -374,9 +400,32 @@ public class GATKSAMRecord extends BAMRecord { * @return the number of bases corresponding to the i'th base of the reduced read */ public final byte getReducedCount(final int i) { - byte firstCount = getReducedReadCounts()[0]; - byte offsetCount = getReducedReadCounts()[i]; - return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE); + return getReducedReadCounts()[i]; + } + + /** + * Actually decode the consensus tag of a reduce read, returning a newly allocated + * set of values countsFromTag to be the real depth of cover at each base of the reduced read. + * + * for example, if the tag contains [10, 5, -1, -5], after running this function the + * byte[] will contain the true counts [10, 15, 9, 5]. + * + * as one might expect. + * + * @param countsFromTag a non-null byte[] containing the tag encoded reduce reads counts + * @return a non-null byte[] containing the true depth values for the vector + */ + private byte[] decodeReadReadCounts(final byte[] countsFromTag) { + final int n = countsFromTag.length; + final byte[] result = new byte[n]; + final byte firstCount = countsFromTag[0]; + result[0] = firstCount; + for ( int i = 1; i < n; i++) { + final byte offsetCount = countsFromTag[i]; + result[i] = (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE); + } + + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java index 18a501b51..57a7946ae 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java @@ -134,4 +134,12 @@ public class GATKSAMRecordUnitTest extends BaseTest { read.setIsStrandless(true); read.setReadNegativeStrandFlag(true); } + + @Test + public void testGetReducedCountsIsCorrect() { + final byte[] counts = reducedRead.getReducedReadCounts(); + Assert.assertNotSame(counts, reducedRead.getAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG)); + for ( int i = 0; i < counts.length; i++ ) + Assert.assertEquals(counts[i], reducedRead.getReducedCount(i), "Reduced counts vector not equal to getReducedCount(i) at " + i); + } }