diff --git a/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java index bacfb65c8..6e3d0d8f3 100755 --- a/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java @@ -40,14 +40,43 @@ public class GLFRecord { private String referenceSequenceName = ""; private long referenceSequenceLength = 0; + private int currentOffset = -1; /** * The public constructor for creating a GLF object * @param headerText the header text (currently unclear what the contents are) * @param referenceSequenceName the reference sequence name */ - public GLFRecord(String headerText, String referenceSequenceName) { + public GLFRecord(String headerText, String referenceSequenceName, int referenceSequenceLength) { this.headerText = headerText; this.referenceSequenceName = referenceSequenceName; + this.referenceSequenceLength = referenceSequenceLength; + } + + public void addSNPCall(int genomicLoc, long read_depth, int rmsMapQ, LikelihoodObject lhValues) { + if (currentOffset >= genomicLoc) { + throw new IllegalArgumentException("The location supplied is less then a previous location"); + } + + // make sure the read depth isn't too large + if (read_depth < 0 || read_depth > 0x00FFFFFF) { + throw new IllegalArgumentException("The read depth is too large; must lie in the range 0 to 0x00ffffff"); + } + + // check that the rmsSquare is greater then 0, and will fit in a uint8 + if (rmsMapQ > 0x000000FF || rmsMapQ < 0) { + throw new IllegalArgumentException("rms of mapping quality is too large; must lie in the range 0 to 0x000000ff"); + } + + if (lhValues == null) { + throw new IllegalArgumentException("likelihood object cannot be null"); + } + + SinglePointCall call = new SinglePointCall(genomicLoc - currentOffset, + read_depth, + rmsMapQ, + lhValues.toByteArray(), + (short)lhValues.getMinimumValue()); + } /** @@ -84,22 +113,35 @@ interface RecordType { } }; + /** + * write the record out to a binary codec + * @param out + */ public void write(BinaryCodec out); + /** + * get the record type + * @return the record type + */ public RECORD_TYPE getRecordType(); + /** + * + * @return + */ + public int getByteSize(); } // the second record type -class VariableLengthGenotype implements RecordType { - public int offset; - public int min_depth; - public byte rmsMapQ; - public byte lkHom1; - public byte lkHom2; - public byte lkHet; - public short indelLen1; - public short indelLen2; +class VariableLengthCall implements RecordType { + public int offset = 0; + public int min_depth = 0; + public byte rmsMapQ = 0; + public byte lkHom1 = 0; + public byte lkHom2 = 0; + public byte lkHet = 0; + public short indelLen1 = 0; + public short indelLen2 = 0; public final byte indelSeq1[]; public final byte indelSeq2[]; @@ -111,7 +153,7 @@ class VariableLengthGenotype implements RecordType { * * @param in the binary codec to get data from */ - VariableLengthGenotype(BinaryCodec in) { + VariableLengthCall(BinaryCodec in) { offset = in.readInt(); min_depth = in.readInt(); rmsMapQ = in.readByte(); @@ -146,19 +188,25 @@ class VariableLengthGenotype implements RecordType { public RECORD_TYPE getRecordType() { return RECORD_TYPE.VARIABLE; } + + /** @return */ + public int getByteSize() { + return size; + } } // the first record type -class SinglePointGenotype implements RecordType { +class SinglePointCall implements RecordType { // our likelyhood array size public static final int LIKELYHOOD_SIZE = 10; // class fields - public int offset; - public int min_depth; - public byte rmsMapQ; - public final byte lk[] = new byte[LIKELYHOOD_SIZE]; + public int offset = 0; + public Long min_depth = 0L; + public int rmsMapQ = 0; + public final short lk[] = new short[LIKELYHOOD_SIZE]; + public short minimumLikelihood = 0; // our size, we're immutable (the size at least) private final int byteSize; // in bytes @@ -171,13 +219,14 @@ class SinglePointGenotype implements RecordType { * @param rmsMapQ * @param lk */ - SinglePointGenotype(int offset, int min_depth, byte rmsMapQ, byte[] lk) { + SinglePointCall(int offset, long min_depth, int rmsMapQ, short[] lk, short minimumLikelihood) { if (lk.length != LIKELYHOOD_SIZE) { - throw new IllegalArgumentException("SinglePointGenotype: passed in likelyhood array size != LIKELYHOOD_SIZE"); + throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE"); } this.offset = offset; this.min_depth = min_depth; this.rmsMapQ = rmsMapQ; + this.minimumLikelihood = minimumLikelihood; System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE); byteSize = 9 + lk.length; } @@ -187,11 +236,13 @@ class SinglePointGenotype implements RecordType { * * @param in the binary codec to get data from */ - SinglePointGenotype(BinaryCodec in) { + SinglePointCall(BinaryCodec in) { offset = in.readInt(); - min_depth = in.readInt(); + min_depth = Long.valueOf(in.readInt()); rmsMapQ = in.readByte(); - in.readBytes(lk); + for (int x = 0; x < LIKELYHOOD_SIZE; x++) { + lk[x] = in.readUByte(); + } byteSize = 9 + lk.length; } @@ -202,13 +253,20 @@ class SinglePointGenotype implements RecordType { */ public void write(BinaryCodec out) { out.writeInt(offset); - out.writeInt(min_depth); + out.writeInt(min_depth.intValue()); out.writeByte(rmsMapQ); - out.writeBytes(lk); + for (int x = 0; x < LIKELYHOOD_SIZE; x++) { + out.writeUByte(lk[x]); + } } public RECORD_TYPE getRecordType() { return RECORD_TYPE.SINGLE; } + /** @return */ + public int getByteSize() { + return byteSize; + } + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java index 865c778d6..015ca1dff 100755 --- a/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java +++ b/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java @@ -84,14 +84,28 @@ public class LikelihoodObject { } /** - * return a byte array representation of the likelihood object, in GLFv3 specified order + * find the minimum likelihood value stored in the set + * @return + */ + public int getMinimumValue() { + int minimum = Integer.MAX_VALUE; + for (int i : likelihood.values()) { + if (i < minimum) { minimum = i;} + } + return minimum; + } + + /** + * return a byte array representation of the likelihood object, in GLFv3 specified order. + * The return type is short[] instead of byte[], since signed bytes only store -127 to 127, + * not the 255 range we need. * @return a byte array of the genotype values */ - public int[] toByteArray() { - int ret[] = new int[GENOTYPE.values().length]; + public short[] toByteArray() { + short ret[] = new short[GENOTYPE.values().length]; int index = 0; for (GENOTYPE type : GENOTYPE.values()) { - ret[index] = likelihood.get(type); + ret[index] = (short)likelihood.get(type).intValue(); ++index; } return ret; diff --git a/java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java b/java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java index 17f1ebda7..b56ab20d3 100755 --- a/java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java +++ b/java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java @@ -1,6 +1,10 @@ package org.broadinstitute.sting.utils.glf; import org.junit.Test; +import org.junit.Before; +import net.sf.samtools.util.BinaryCodec; + +import java.io.File; /* @@ -37,9 +41,37 @@ import org.junit.Test; */ public class GLFRecordTest { - @Test - public void basicWrite() { + /** some made up values that we use to generate the GLF */ + private final String header = "header"; + private final String referenceSequenceName = "refSeq"; + private final int refLength = 1000; + + private GLFRecord rec; + + @Before + public void before() { + rec = new GLFRecord(header, referenceSequenceName, refLength); } + /** + * make a fake snp + * @param genotype the genotype, 0-15 (AA, AT, AA, ... GG) + */ + private void addFakeSNP(int genotype, int location) { + LikelihoodObject obj = new LikelihoodObject(); + obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype],0.5f); + rec.addSNPCall(location,10,10,obj); + } + + + @Test + public void basicWrite() { + File writeTo = new File("testGLF.glf"); + BinaryCodec codec = new BinaryCodec(writeTo, true); + for (int x = 0; x < 100; x++) { + addFakeSNP(0,x); + } + } + } diff --git a/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java b/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java index 129ea56f0..f223e55c2 100755 --- a/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java +++ b/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java @@ -80,7 +80,7 @@ public class LikelihoodObjectTest extends BaseTest { assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length); int index = 0; - int[] ret = mLO.toByteArray(); + short[] ret = mLO.toByteArray(); for (index = 0; index < ret.length; index++) { assertTrue(ray[index] == ret[index]); } @@ -95,7 +95,7 @@ public class LikelihoodObjectTest extends BaseTest { assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length); int index = 0; - int[] ret = mLO.toByteArray(); + short[] ret = mLO.toByteArray(); for (index = 0; index < ret.length; index++) { assertTrue(ret[index] == 128); }