compress Reduce Read counts bytes by offset

compressed the representation of the reduce reads counts by offset results in 17% average compression in final BAM file size.

Example compression -->

from : 10, 10, 11, 11, 12, 12, 12, 11, 10
to:      10, 0, 1, 1,2, 2, 2, 1, 0
This commit is contained in:
Mauricio Carneiro 2011-11-14 18:07:41 -05:00
parent a1ce3d8141
commit cde829899d
2 changed files with 6 additions and 3 deletions

View File

@ -188,7 +188,9 @@ public class GATKSAMRecord extends BAMRecord {
}
public final byte getReducedCount(final int i) {
return getReducedReadCounts()[i];
byte firstCount = getReducedReadCounts()[0];
byte offsetCount = getReducedReadCounts()[i];
return (i==0) ? firstCount : (byte) Math.min(firstCount + offsetCount, Byte.MAX_VALUE);
}

View File

@ -16,7 +16,8 @@ public class ReadUtilsUnitTest extends BaseTest {
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30}; // just the offsets
@BeforeTest
public void init() {
@ -29,7 +30,7 @@ public class ReadUtilsUnitTest extends BaseTest {
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
}
private void testReadBasesAndQuals(GATKSAMRecord read, int expectedStart, int expectedStop) {