compress Reduce Read counts bytes by offset

compressed the representation of the reduce reads counts by offset results in 17% average compression in final BAM file size.

Example compression -->

from : 10, 10, 11, 11, 12, 12, 12, 11, 10
to:      10, 0, 1, 1,2, 2, 2, 1, 0
This commit is contained in:
Mauricio Carneiro 2011-11-14 18:07:41 -05:00
parent a1ce3d8141
commit cde829899d
2 changed files with 6 additions and 3 deletions

View File

@ -188,7 +188,9 @@ public class GATKSAMRecord extends BAMRecord {
} }
public final byte getReducedCount(final int i) { public final byte getReducedCount(final int i) {
return getReducedReadCounts()[i]; byte firstCount = getReducedReadCounts()[0];
byte offsetCount = getReducedReadCounts()[i];
return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE);
} }

View File

@ -17,6 +17,7 @@ public class ReadUtilsUnitTest extends BaseTest {
final static String BASES = "ACTG"; final static String BASES = "ACTG";
final static String QUALS = "!+5?"; final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40}; final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30}; // just the offsets
@BeforeTest @BeforeTest
public void init() { public void init() {
@ -29,7 +30,7 @@ public class ReadUtilsUnitTest extends BaseTest {
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes()); reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS); reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS); reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
} }
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) { private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {