diff --git a/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java index a73f6abd4..00afe3f72 100644 --- a/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java @@ -184,7 +184,7 @@ abstract class GLFRecord { * * @param out the binary codec to write to */ - public void write( BinaryCodec out ) { + void write( BinaryCodec out ) { out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) )); out.writeUInt(( (Long) offset ).intValue()); out.writeUInt((new Long(readDepth).intValue())); @@ -203,6 +203,8 @@ abstract class GLFRecord { * * @return the size of this record type, in bytes */ - public abstract int getByteSize(); + public int getByteSize() { + return 10; // the record type field (1), offset (4), the min depth field (4), and the rms mapping (1) + } } diff --git a/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java index 8fbe5ebd7..05acf6497 100755 --- a/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java @@ -28,12 +28,20 @@ import java.io.DataOutputStream; /** * @author aaron * @version 1.0 + * + * This class writes GLF files. You can either specify GLFRecords, or programaticly generate + * single and variable length genotype calls using the provided functions. When you've finished + * generating GLF records, make sure you close the file. + * */ public class GLFWriter { // our output codec private final BinaryCodec outputBinaryCodec; + // the glf magic number, which identifies a properly formatted GLF file public static final short[] glfMagic = {'G', 'L', 'F', '\3'}; + + // our header text, reference sequence name (i.e. chr1), and it's length private String headerText = ""; private String referenceSequenceName = ""; private long referenceSequenceLength = 0; @@ -42,7 +50,7 @@ public class GLFWriter { * The public constructor for creating a GLF object * * @param headerText the header text (currently unclear what the contents are) - * @param referenceSequenceName the reference sequence name + * @param referenceSequenceName the reference sequence name, i.e. "chr1", "chr2", etc */ public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) { this.headerText = headerText; @@ -71,7 +79,6 @@ public class GLFWriter { SinglePointCall call = new SinglePointCall(refBase, genomicLoc, readDepth, rmsMapQ, - lhValues.toByteArray(), lhValues); call.write(this.outputBinaryCodec); } @@ -84,9 +91,9 @@ public class GLFWriter { * @param readDepth the read depth at the specified postion * @param rmsMapQ the root mean square of the mapping quality * @param minimumLikelihood the minimum likelihood value - * @param homozygProb1 the probability of the first homozygous indel allele - * @param homozygProb2 the probability of the second homozygous indel allele - * @param heterozygProb the probability of the heterozygote + * @param homozygProb1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255 + * @param homozygProb2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255 + * @param heterozygProb the negitive log likelihood of the heterozygote, from 0 to 255 * @param indelLength1 the length of the first indel allele * @param indelLength2 the length of the second indel allele * @param indelSeq1 the sequence for the first indel allele @@ -140,7 +147,10 @@ public class GLFWriter { } /** - * Write out the header information for the GLF file + * Write out the header information for the GLF file. The header contains + * the magic number, the length of the header text, the text itself, the reference + * sequence (null terminated) preceeded by it's length, and the the genomic + * length of the reference sequence. * */ private void writeHeader() { @@ -157,7 +167,8 @@ public class GLFWriter { } /** - * close the file + * close the file. You must close the file to ensure any remaining data gets + * written out. * */ public void close() { diff --git a/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java index 20fa2896e..8c82baa7a 100755 --- a/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java +++ b/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java @@ -46,6 +46,9 @@ public class LikelihoodObject { AA, AT, AC, AG, CC, CT, CG, GG, GT, TT } + // how many genotypes we're storing + public static final int genoTypeCount = GENOTYPE.values().length; + // the associated negitive log likelihoods for each genotype final protected HashMap likelihood = new HashMap(); @@ -57,7 +60,7 @@ public class LikelihoodObject { } // since the default case it to start with all infinity, we can choose any random base - GENOTYPE greatest = GENOTYPE.AA; + protected GENOTYPE greatest = GENOTYPE.AA; /** @@ -69,11 +72,16 @@ public class LikelihoodObject { throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length); } int index = 0; + int lowestScore = 256; for (GENOTYPE type : GENOTYPE.values()) { if (values[index] < 0 || values[index] > 255) { throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]); } likelihood.put(type, values[index]); + if (values[index] < lowestScore) { + lowestScore = values[index]; + greatest = type; + } ++index; } } diff --git a/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java index 8f2c61030..8f13344c9 100644 --- a/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java +++ b/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java @@ -29,37 +29,33 @@ import net.sf.samtools.util.BinaryCodec; */ /** - * - * @author aaron - * - * Class SinglePointCall - * - * This class represents a single point geneotype call in GLF vernacular - **/ + * @author aaron + *

+ * Class SinglePointCall + *

+ * This class represents a single point geneotype call in GLF vernacular + */ class SinglePointCall extends GLFRecord { - // our likelyhood array size - public static final int LIKELYHOOD_SIZE = 10; - // our record type private final RECORD_TYPE type = RECORD_TYPE.SINGLE; - // our array of likelihoods - public final short lk[] = new short[LIKELYHOOD_SIZE]; + // our likelihood object + private LikelihoodObject likelihoods; - // our size, we're immutable (the size at least). In bytes. - private final int byteSize; + /** + * create a single + * + * @param refBase the reference base, as a char + * @param offset the location, as an offset from the previous glf record + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param lhValues the LikelihoodObject, representing the genotype likelyhoods + */ + SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, LikelihoodObject lhValues ) { + super(refBase, offset, (short) lhValues.getMinimumValue(), readDepth, rmsMapQ); - - SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, short[] lk, LikelihoodObject minimumLikelihood ) { - super(refBase,offset,(short)minimumLikelihood.getMinimumValue(),readDepth,rmsMapQ); - - if (lk.length != LIKELYHOOD_SIZE) { - throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE"); - } - - System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE); - byteSize = 9 + lk.length; + likelihoods = lhValues; } @@ -68,15 +64,17 @@ class SinglePointCall extends GLFRecord { * * @param out */ - public void write( BinaryCodec out ) { + void write( BinaryCodec out ) { super.write(out); - for (int x = 0; x < LIKELYHOOD_SIZE; x++) { - out.writeUByte(lk[x]); + short array[] = likelihoods.toByteArray(); + for (int x = 0; x < array.length; x++) { + out.writeUByte(array[x]); } } /** * return the record type we represent, in this case SINGLE + * * @return RECORD_TYPE.SINGLE */ public RECORD_TYPE getRecordType() { @@ -85,10 +83,11 @@ class SinglePointCall extends GLFRecord { /** * return our size in bytes + * * @return number of bytes we represent */ public int getByteSize() { - return byteSize; + return likelihoods.genoTypeCount + super.getByteSize(); } } diff --git a/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java b/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java index aa8cd207e..a98d837d7 100644 --- a/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java +++ b/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java @@ -30,12 +30,12 @@ import net.sf.samtools.util.BinaryCodec; /** * @author aaron - *

- * Class VariableLengthCall - *

- * This class represents variable length genotype calls in the GLF format. - * Currently a lot of parameters need to be provided, but we may be able to thin - * those down as we understand what we have to specify and what we can infer. + *

+ * Class VariableLengthCall + *

+ * This class represents variable length genotype calls in the GLF format. + * Currently a lot of parameters need to be provided, but we may be able to thin + * those down as we understand what we have to specify and what we can infer. */ class VariableLengthCall extends GLFRecord { // our fields, corresponding to the glf spec @@ -51,7 +51,22 @@ class VariableLengthCall extends GLFRecord { private final int size; - /** the default constructor */ + /** + * the default constructor + * + * @param refBase the reference base + * @param offset the location, as an offset from the previous glf record + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param minimumLikelihood the minimum likelihood value + * @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255 + * @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255 + * @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255 + * @param indelLen1 the length of the first indel allele + * @param indelLen2 the length of the second indel allele + * @param indelSeq1 the sequence for the first indel allele + * @param indelSeq2 the sequence for the second indel allele + */ VariableLengthCall( char refBase, long offset, int readDepth, @@ -81,7 +96,7 @@ class VariableLengthCall extends GLFRecord { * * @param out the binary codec to write to */ - public void write( BinaryCodec out ) { + void write( BinaryCodec out ) { super.write(out); out.writeByte(lkHom1); out.writeByte(lkHom2); @@ -101,8 +116,8 @@ class VariableLengthCall extends GLFRecord { return RECORD_TYPE.VARIABLE; } - /** @return */ + /** @return the size of the record, which is the size of our fields plus the generic records fields */ public int getByteSize() { - return size; + return size + super.getByteSize(); } } diff --git a/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java b/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java index 34c20c7c0..61a17dce9 100755 --- a/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java +++ b/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java @@ -86,6 +86,34 @@ public class LikelihoodObjectTest extends BaseTest { } } + @Test + public void testDefaultArrayValues() { + mLO = new LikelihoodObject(); + short[] ret = mLO.toByteArray(); + for (int index = 0; index < ret.length; index++) { + assertTrue(ret[index] == 255); + } + } + + @Test + public void testGetMinimum() { + int[] ray = new int[10]; + for (int x = 0; x < 10; x++) { + ray[x] = ( 240 ); + } + ray [5] = 0; + mLO = new LikelihoodObject(ray); + assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length); + short smallest = (short)mLO.getMinimumValue(); + assertTrue(smallest == 0); + int index = 0; + short[] ret = mLO.toByteArray(); + for (index = 0; index < ret.length; index++) { + assertTrue(smallest <= ret[index]); + } + } + + @Test public void testSetLikelihood() { mLO = new LikelihoodObject();