diff --git a/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java
old mode 100755
new mode 100644
index 6e3d0d8f3..a73f6abd4
--- a/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java
@@ -2,271 +2,207 @@ package org.broadinstitute.sting.utils.glf;
import net.sf.samtools.util.BinaryCodec;
-import java.util.ArrayList;
-import java.util.List;
-/**
+/*
+ * Copyright (c) 2009 The Broad Institute
*
- * User: aaron
- * Date: May 13, 2009
- * Time: 3:36:18 PM
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
*
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
*
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
-
/**
* @author aaron
- * @version 1.0
- * @date May 13, 2009
- *
- * Class GLFRecord
- *
- * The base record that's stored in the GLF format.
+ *
+ * Class RecordType
+ *
+ * The base record type for all GLF entries. Each record has a number of fields
+ * common to the record set. This is also the source of the REF_BASE enumeration,
+ * which represents the accepted FASTA nucleotide symbols and their assocated GLF
+ * field values.
*/
-public class GLFRecord {
- // our record list
- private final List records = new ArrayList();
+abstract class GLFRecord {
- public static final byte[] glfMagic = {'G','L','F','\3'};
- private String headerText = "";
- private String referenceSequenceName = "";
- private long referenceSequenceLength = 0;
+ // fields common to all records
+ protected REF_BASE refBase;
+ protected long offset = 1;
+ protected short minimumLikelihood = 0;
+ protected int readDepth = 0;
+ protected short rmsMapQ = 0;
- private int currentOffset = -1;
- /**
- * The public constructor for creating a GLF object
- * @param headerText the header text (currently unclear what the contents are)
- * @param referenceSequenceName the reference sequence name
- */
- public GLFRecord(String headerText, String referenceSequenceName, int referenceSequenceLength) {
- this.headerText = headerText;
- this.referenceSequenceName = referenceSequenceName;
- this.referenceSequenceLength = referenceSequenceLength;
- }
+ // the size of this base structure
+ protected final int baseSize = 10;
- public void addSNPCall(int genomicLoc, long read_depth, int rmsMapQ, LikelihoodObject lhValues) {
- if (currentOffset >= genomicLoc) {
- throw new IllegalArgumentException("The location supplied is less then a previous location");
- }
+ /** the reference base enumeration, with their short (type) values for GLF */
+ public enum REF_BASE {
+ X((short) 0x00),
+ A((short) 0x01),
+ C((short) 0x02),
+ M((short) 0x03),
+ G((short) 0x04),
+ R((short) 0x05),
+ S((short) 0x06),
+ V((short) 0x07),
+ T((short) 0x08),
+ W((short) 0x09),
+ Y((short) 0x0A),
+ H((short) 0x0B),
+ K((short) 0x0C),
+ D((short) 0x0D),
+ B((short) 0x0E),
+ N((short) 0x0F);
- // make sure the read depth isn't too large
- if (read_depth < 0 || read_depth > 0x00FFFFFF) {
- throw new IllegalArgumentException("The read depth is too large; must lie in the range 0 to 0x00ffffff");
- }
-
- // check that the rmsSquare is greater then 0, and will fit in a uint8
- if (rmsMapQ > 0x000000FF || rmsMapQ < 0) {
- throw new IllegalArgumentException("rms of mapping quality is too large; must lie in the range 0 to 0x000000ff");
- }
-
- if (lhValues == null) {
- throw new IllegalArgumentException("likelihood object cannot be null");
- }
-
- SinglePointCall call = new SinglePointCall(genomicLoc - currentOffset,
- read_depth,
- rmsMapQ,
- lhValues.toByteArray(),
- (short)lhValues.getMinimumValue());
-
- }
-
- /**
- * Write out the record to a binary codec object
- *
- * @param out
- */
- public void write(BinaryCodec out) {
- out.writeBytes(glfMagic);
- out.writeString(headerText,true,true);
- out.writeString(referenceSequenceName,true,true);
- out.writeUInt(referenceSequenceLength);
- for (RecordType rec: records) {
- out.writeUByte(rec.getRecordType().getFieldValue());
- rec.write(out);
- }
- out.writeByte((byte)0);
- }
-
-}
-
-
-interface RecordType {
-
- enum RECORD_TYPE {
- VARIABLE((short)2),
- SINGLE((short)1);
private final short fieldValue; // in kilograms
- RECORD_TYPE(short value) {
+
+ /**
+ * private constructor, used by the enum class to makes each enum value
+ * @param value the short values specified in the enum listing
+ */
+ REF_BASE( short value ) {
fieldValue = value;
}
- public short getFieldValue() {
+
+ /**
+ * static method from returning a REF_BASE given the character representation
+ *
+ * @param value the character representation of a REF_BASE
+ *
+ * @return the corresponding REF_BASE
+ * @throws IllegalArgumentException if the value passed can't be converted
+ */
+ public static REF_BASE toBase( char value ) {
+ String str = String.valueOf(value).toUpperCase();
+ for (int x = 0; x < REF_BASE.values().length; x++) {
+ if (REF_BASE.values()[x].toString().equals(str)) {
+ return REF_BASE.values()[x];
+ }
+ }
+ throw new IllegalArgumentException("Counldn't find matching reference base for " + str);
+ }
+
+ /** @return the hex value of the given REF_BASE */
+ public short getBaseHexValue() {
return fieldValue;
}
- };
+ }
+
+ /** the record type enum, which enumerates the different records we can have in a GLF */
+ enum RECORD_TYPE {
+ SINGLE((short) 1),
+ VARIABLE((short) 2);
+
+ private final short fieldValue; // in kilograms
+
+ RECORD_TYPE( short value ) {
+ fieldValue = value;
+ }
+
+ public short getReadTypeValue() {
+ return fieldValue;
+ }
+ }
+
/**
- * write the record out to a binary codec
- * @param out
+ * Constructor, given the base a character reference base
+ *
+ * @param base the reference base in the reference
+ * @param offset the offset from the beginning of the reference seq
+ * @param minimumLikelihood it's minimum likelihood
+ * @param readDepth the read depth at this position
+ * @param rmsMapQ the root mean square of the mapping quality
*/
- public void write(BinaryCodec out);
+ public GLFRecord( char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) {
+ REF_BASE newBase = REF_BASE.toBase(base);
+ validateInput(newBase, offset, minimumLikelihood, readDepth, rmsMapQ);
+ }
+
+ /**
+ * Constructor, given the base a REF_BASE
+ *
+ * @param base the reference base in the reference
+ * @param offset the offset from the beginning of the reference seq
+ * @param minimumLikelihood it's minimum likelihood
+ * @param readDepth the read depth at this position
+ * @param rmsMapQ the root mean square of the mapping quality
+ */
+ GLFRecord( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) {
+ validateInput(base, offset, minimumLikelihood, readDepth, rmsMapQ);
+ }
+
+ /**
+ * validate the input during construction, and store valid values
+ *
+ * @param base the reference base in the reference, as a REF_BASE
+ * @param offset the offset from the beginning of the reference seq
+ * @param minimumLikelihood it's minimum likelihood
+ * @param readDepth the read depth at this position
+ * @param rmsMapQ the root mean square of the mapping quality
+ */
+ private void validateInput( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) {
+ this.refBase = base;
+ if (offset > 4294967295L || offset < 0) {
+ throw new IllegalArgumentException("Offset is out of bounds (0 to 0xffffffff) value passed = " + offset);
+ }
+ this.offset = offset;
+
+ if (minimumLikelihood > 255 || minimumLikelihood < 0) {
+ throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood);
+ }
+ this.minimumLikelihood = minimumLikelihood;
+
+ if (readDepth > 16777215 || readDepth < 0) {
+ throw new IllegalArgumentException("readDepth is out of bounds (0 to 0xffffff) value passed = " + readDepth);
+ }
+ this.readDepth = readDepth;
+
+ if (rmsMapQ > 255 || rmsMapQ < 0) {
+ throw new IllegalArgumentException("rmsMapQ is out of bounds (0 to 0xff) value passed = " + rmsMapQ);
+ }
+ this.rmsMapQ = rmsMapQ;
+ }
+
+ /**
+ * write the this record to a binary codec output.
+ *
+ * @param out the binary codec to write to
+ */
+ public void write( BinaryCodec out ) {
+ out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) ));
+ out.writeUInt(( (Long) offset ).intValue());
+ out.writeUInt((new Long(readDepth).intValue()));
+ out.writeUByte((short) rmsMapQ);
+ }
/**
* get the record type
- * @return the record type
+ *
+ * @return the record type enumeration
*/
- public RECORD_TYPE getRecordType();
+ public abstract RECORD_TYPE getRecordType();
/**
- *
- * @return
+ * Return the size of this record in bytes.
+ *
+ * @return the size of this record type, in bytes
*/
- public int getByteSize();
+ public abstract int getByteSize();
}
-// the second record type
-class VariableLengthCall implements RecordType {
- public int offset = 0;
- public int min_depth = 0;
- public byte rmsMapQ = 0;
- public byte lkHom1 = 0;
- public byte lkHom2 = 0;
- public byte lkHet = 0;
- public short indelLen1 = 0;
- public short indelLen2 = 0;
- public final byte indelSeq1[];
- public final byte indelSeq2[];
-
- // our size, we're immutable (at least the size is)
- private final int size; // in bytes
-
- /**
- * the BinaryCodec constructor
- *
- * @param in the binary codec to get data from
- */
- VariableLengthCall(BinaryCodec in) {
- offset = in.readInt();
- min_depth = in.readInt();
- rmsMapQ = in.readByte();
- lkHom1 = in.readByte();
- lkHom2 = in.readByte();
- lkHet = in.readByte();
- indelLen1 = in.readShort();
- indelLen2 = in.readShort();
- indelSeq1 = new byte[indelLen1];
- indelSeq2 = new byte[indelLen2];
- this.size = 16 + indelLen1 + indelLen2;
- }
-
- /**
- * Write out the record to a binary codec
- *
- * @param out
- */
- public void write(BinaryCodec out) {
- out.writeInt(offset);
- out.writeInt(min_depth);
- out.writeByte(rmsMapQ);
- out.writeByte(lkHom1);
- out.writeByte(lkHom2);
- out.writeByte(lkHet);
- out.writeShort(indelLen1);
- out.writeShort(indelLen2);
- out.writeBytes(indelSeq1);
- out.writeBytes(indelSeq2);
- }
-
- public RECORD_TYPE getRecordType() {
- return RECORD_TYPE.VARIABLE;
- }
-
- /** @return */
- public int getByteSize() {
- return size;
- }
-}
-
-
-// the first record type
-class SinglePointCall implements RecordType {
- // our likelyhood array size
- public static final int LIKELYHOOD_SIZE = 10;
-
- // class fields
- public int offset = 0;
- public Long min_depth = 0L;
- public int rmsMapQ = 0;
- public final short lk[] = new short[LIKELYHOOD_SIZE];
- public short minimumLikelihood = 0;
-
- // our size, we're immutable (the size at least)
- private final int byteSize; // in bytes
-
- /**
- * The parameter constructor
- *
- * @param offset
- * @param min_depth
- * @param rmsMapQ
- * @param lk
- */
- SinglePointCall(int offset, long min_depth, int rmsMapQ, short[] lk, short minimumLikelihood) {
- if (lk.length != LIKELYHOOD_SIZE) {
- throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
- }
- this.offset = offset;
- this.min_depth = min_depth;
- this.rmsMapQ = rmsMapQ;
- this.minimumLikelihood = minimumLikelihood;
- System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
- byteSize = 9 + lk.length;
- }
-
- /**
- * the BinaryCodec constructor
- *
- * @param in the binary codec to get data from
- */
- SinglePointCall(BinaryCodec in) {
- offset = in.readInt();
- min_depth = Long.valueOf(in.readInt());
- rmsMapQ = in.readByte();
- for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
- lk[x] = in.readUByte();
- }
- byteSize = 9 + lk.length;
- }
-
- /**
- * Write out the record to a binary codec
- *
- * @param out
- */
- public void write(BinaryCodec out) {
- out.writeInt(offset);
- out.writeInt(min_depth.intValue());
- out.writeByte(rmsMapQ);
- for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
- out.writeUByte(lk[x]);
- }
- }
-
- public RECORD_TYPE getRecordType() {
- return RECORD_TYPE.SINGLE;
- }
-
- /** @return */
- public int getByteSize() {
- return byteSize;
- }
-
-}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java
new file mode 100755
index 000000000..8fbe5ebd7
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java
@@ -0,0 +1,170 @@
+package org.broadinstitute.sting.utils.glf;
+
+import net.sf.samtools.util.BinaryCodec;
+import net.sf.samtools.util.BlockCompressedOutputStream;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.io.File;
+import java.io.DataOutputStream;
+
+/**
+ *
+ * User: aaron
+ * Date: May 13, 2009
+ * Time: 3:36:18 PM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+
+
+/**
+ * @author aaron
+ * @version 1.0
+ */
+public class GLFWriter {
+ // our output codec
+ private final BinaryCodec outputBinaryCodec;
+
+ public static final short[] glfMagic = {'G', 'L', 'F', '\3'};
+ private String headerText = "";
+ private String referenceSequenceName = "";
+ private long referenceSequenceLength = 0;
+
+ /**
+ * The public constructor for creating a GLF object
+ *
+ * @param headerText the header text (currently unclear what the contents are)
+ * @param referenceSequenceName the reference sequence name
+ */
+ public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) {
+ this.headerText = headerText;
+ this.referenceSequenceName = referenceSequenceName;
+ this.referenceSequenceLength = referenceSequenceLength;
+ outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo)));
+ outputBinaryCodec.setOutputFileName(writeTo.toString());
+ this.writeHeader();
+ }
+
+ /**
+ * add a point genotype to the GLF writer
+ *
+ * @param refBase the reference base, as a char
+ * @param genomicLoc the location, as an offset from the previous glf record
+ * @param readDepth the read depth at the specified postion
+ * @param rmsMapQ the root mean square of the mapping quality
+ * @param lhValues the LikelihoodObject, representing the genotype likelyhoods
+ */
+ public void addPointCall( char refBase,
+ int genomicLoc,
+ int readDepth,
+ short rmsMapQ,
+ LikelihoodObject lhValues ) {
+
+ SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
+ readDepth,
+ rmsMapQ,
+ lhValues.toByteArray(),
+ lhValues);
+ call.write(this.outputBinaryCodec);
+ }
+
+ /**
+ * add a variable length (indel, deletion, etc) to the genotype writer
+ *
+ * @param refBase the reference base
+ * @param genomicLoc the location, as an offset from the previous glf record
+ * @param readDepth the read depth at the specified postion
+ * @param rmsMapQ the root mean square of the mapping quality
+ * @param minimumLikelihood the minimum likelihood value
+ * @param homozygProb1 the probability of the first homozygous indel allele
+ * @param homozygProb2 the probability of the second homozygous indel allele
+ * @param heterozygProb the probability of the heterozygote
+ * @param indelLength1 the length of the first indel allele
+ * @param indelLength2 the length of the second indel allele
+ * @param indelSeq1 the sequence for the first indel allele
+ * @param indelSeq2 the sequence for the second indel allele
+ */
+ public void addVarLengthCall( char refBase,
+ long genomicLoc,
+ int readDepth,
+ short rmsMapQ,
+ short minimumLikelihood,
+ short homozygProb1,
+ short homozygProb2,
+ short heterozygProb,
+ int indelLength1,
+ int indelLength2,
+ char[] indelSeq1,
+ char[] indelSeq2 ) {
+
+ short[] indexSeqEq1 = new short[indelSeq1.length];
+ short[] indexSeqEq2 = new short[indelSeq2.length];
+ for (int x = 0; x < indelSeq1.length; x++) {
+ indexSeqEq1[x] = new Integer(0x00ff & indelSeq1[x]).shortValue();
+ }
+ for (int x = 0; x < indelSeq2.length; x++) {
+ indexSeqEq2[x] = new Integer(0x00ff & indelSeq2[x]).shortValue();
+ }
+
+ VariableLengthCall call = new VariableLengthCall(refBase,
+ genomicLoc,
+ readDepth,
+ minimumLikelihood,
+ rmsMapQ,
+ homozygProb1,
+ homozygProb2,
+ heterozygProb,
+ indelLength1,
+ indelLength2,
+ indexSeqEq1,
+ indexSeqEq2);
+
+ call.write(this.outputBinaryCodec);
+
+ }
+
+ /**
+ * add a GLF record to the output file
+ * @param rec the GLF record to write.
+ */
+ public void addGLFRecord( GLFRecord rec ) {
+ rec.write(this.outputBinaryCodec);
+ }
+
+ /**
+ * Write out the header information for the GLF file
+ *
+ */
+ private void writeHeader() {
+ for (int x = 0; x < glfMagic.length; x++) {
+ outputBinaryCodec.writeByte(glfMagic[x]);
+ }
+ if (!(headerText.equals(""))) {
+ outputBinaryCodec.writeString(headerText, true, true);
+ } else {
+ outputBinaryCodec.writeInt(0);
+ }
+ outputBinaryCodec.writeString(referenceSequenceName, true, true);
+ outputBinaryCodec.writeUInt(referenceSequenceLength);
+ }
+
+ /**
+ * close the file
+ *
+ */
+ public void close() {
+ outputBinaryCodec.writeByte((byte) 0);
+ outputBinaryCodec.close();
+ }
+
+}
+
+
diff --git a/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java
index 015ca1dff..20fa2896e 100755
--- a/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java
+++ b/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java
@@ -46,19 +46,23 @@ public class LikelihoodObject {
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
}
- // the associated probabilities for each genotype
+ // the associated negitive log likelihoods for each genotype
final protected HashMap likelihood = new HashMap();
/** create a blank likelihood object */
public LikelihoodObject() {
for (GENOTYPE type : GENOTYPE.values()) {
- likelihood.put(type, 0);
+ likelihood.put(type, 255);
}
}
+ // since the default case it to start with all infinity, we can choose any random base
+ GENOTYPE greatest = GENOTYPE.AA;
+
+
/**
* create a likelyhood object, given an array of genotype scores in GLFv3 ordering
- * @param values
+ * @param values an array of int's from 0 to 255, representing the negitive log likelihoods.
*/
public LikelihoodObject(int[] values) {
if (values.length != GENOTYPE.values().length) {
@@ -77,22 +81,25 @@ public class LikelihoodObject {
/**
* set the likelihood, given it's probability and the genotype
* @param type the genotype
- * @param likelyhood the likelihood as a float between 0 and 1, which is converted to a byte
+ * @param lh the likelihood as a double between 0 and 1, which is converted to a byte
*/
- public void setLikelihood(GENOTYPE type, float likelyhood) {
- likelihood.put(type,(int)Math.round(likelyhood*255.0));
+ public void setLikelihood(GENOTYPE type, int lh) {
+ if (lh < 0 || lh > 255) {
+ throw new IllegalArgumentException("supplied likelihood must be between 0 and 255");
+ }
+ likelihood.put(type,lh);
+ if (lh < likelihood.get(this.greatest)) {
+ this.greatest = type;
+ }
}
/**
- * find the minimum likelihood value stored in the set
+ * find the minimum likelihood value stored in the set. This represents the most likely genotype,
+ * since genotypes are represented as negitive log likeihoods
* @return
*/
public int getMinimumValue() {
- int minimum = Integer.MAX_VALUE;
- for (int i : likelihood.values()) {
- if (i < minimum) { minimum = i;}
- }
- return minimum;
+ return likelihood.get(this.greatest);
}
/**
diff --git a/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java
new file mode 100644
index 000000000..8f2c61030
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java
@@ -0,0 +1,94 @@
+package org.broadinstitute.sting.utils.glf;
+
+import net.sf.samtools.util.BinaryCodec;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ *
+ * @author aaron
+ *
+ * Class SinglePointCall
+ *
+ * This class represents a single point geneotype call in GLF vernacular
+ **/
+class SinglePointCall extends GLFRecord {
+
+ // our likelyhood array size
+ public static final int LIKELYHOOD_SIZE = 10;
+
+ // our record type
+ private final RECORD_TYPE type = RECORD_TYPE.SINGLE;
+
+ // our array of likelihoods
+ public final short lk[] = new short[LIKELYHOOD_SIZE];
+
+ // our size, we're immutable (the size at least). In bytes.
+ private final int byteSize;
+
+
+ SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, short[] lk, LikelihoodObject minimumLikelihood ) {
+ super(refBase,offset,(short)minimumLikelihood.getMinimumValue(),readDepth,rmsMapQ);
+
+ if (lk.length != LIKELYHOOD_SIZE) {
+ throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
+ }
+
+ System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
+ byteSize = 9 + lk.length;
+ }
+
+
+ /**
+ * Write out the record to a binary codec
+ *
+ * @param out
+ */
+ public void write( BinaryCodec out ) {
+ super.write(out);
+ for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
+ out.writeUByte(lk[x]);
+ }
+ }
+
+ /**
+ * return the record type we represent, in this case SINGLE
+ * @return RECORD_TYPE.SINGLE
+ */
+ public RECORD_TYPE getRecordType() {
+ return RECORD_TYPE.SINGLE;
+ }
+
+ /**
+ * return our size in bytes
+ * @return number of bytes we represent
+ */
+ public int getByteSize() {
+ return byteSize;
+ }
+
+}
diff --git a/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java b/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java
new file mode 100644
index 000000000..aa8cd207e
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java
@@ -0,0 +1,108 @@
+package org.broadinstitute.sting.utils.glf;
+
+import net.sf.samtools.util.BinaryCodec;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * Class VariableLengthCall
+ *
+ * This class represents variable length genotype calls in the GLF format.
+ * Currently a lot of parameters need to be provided, but we may be able to thin
+ * those down as we understand what we have to specify and what we can infer.
+ */
+class VariableLengthCall extends GLFRecord {
+ // our fields, corresponding to the glf spec
+ private short lkHom1 = 0;
+ private short lkHom2 = 0;
+ private short lkHet = 0;
+ private int indelLen1 = 0;
+ private int indelLen2 = 0;
+ private final short indelSeq1[];
+ private final short indelSeq2[];
+
+ // our size, which is immutable, in bytes
+ private final int size;
+
+
+ /** the default constructor */
+ VariableLengthCall( char refBase,
+ long offset,
+ int readDepth,
+ short minimumLikelihood,
+ short rmsMapQ,
+ short lkHom1,
+ short lkHom2,
+ short lkHet,
+ int indelLen1,
+ int indelLen2,
+ final short indelSeq1[],
+ final short indelSeq2[] ) {
+ super(refBase, offset, minimumLikelihood, readDepth, rmsMapQ);
+ this.lkHom1 = lkHom1;
+ this.lkHom2 = lkHom2;
+ this.lkHet = lkHet;
+ this.indelLen1 = indelLen1;
+ this.indelLen2 = indelLen2;
+ this.indelSeq1 = indelSeq1;
+ this.indelSeq2 = indelSeq2;
+ size = 16 + indelSeq1.length + indelSeq2.length;
+
+ }
+
+ /**
+ * Write out the record to a binary codec
+ *
+ * @param out the binary codec to write to
+ */
+ public void write( BinaryCodec out ) {
+ super.write(out);
+ out.writeByte(lkHom1);
+ out.writeByte(lkHom2);
+ out.writeByte(lkHet);
+ out.writeShort(new Integer(indelLen1).shortValue());
+ out.writeShort(new Integer(indelLen2).shortValue());
+ for (int x = 0; x < indelSeq1.length; x++) {
+ out.writeUByte(indelSeq1[x]);
+ }
+ for (int x = 0; x < indelSeq2.length; x++) {
+ out.writeUByte(indelSeq2[x]);
+ }
+ }
+
+ /** @return RECORD_TYPE.VARIABLE */
+ public RECORD_TYPE getRecordType() {
+ return RECORD_TYPE.VARIABLE;
+ }
+
+ /** @return */
+ public int getByteSize() {
+ return size;
+ }
+}
diff --git a/java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java b/java/test/org/broadinstitute/sting/utils/glf/GLFWriterTest.java
similarity index 63%
rename from java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java
rename to java/test/org/broadinstitute/sting/utils/glf/GLFWriterTest.java
index b56ab20d3..928a17620 100755
--- a/java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java
+++ b/java/test/org/broadinstitute/sting/utils/glf/GLFWriterTest.java
@@ -2,9 +2,13 @@ package org.broadinstitute.sting.utils.glf;
import org.junit.Test;
import org.junit.Before;
+import org.broadinstitute.sting.BaseTest;
import net.sf.samtools.util.BinaryCodec;
+import net.sf.samtools.util.BlockCompressedOutputStream;
import java.io.File;
+import java.io.DataOutputStream;
+import java.io.IOException;
/*
@@ -39,39 +43,57 @@ import java.io.File;
*
* Tests for the GLFRecord class
*/
-public class GLFRecordTest {
+public class GLFWriterTest extends BaseTest {
/** some made up values that we use to generate the GLF */
- private final String header = "header";
- private final String referenceSequenceName = "refSeq";
+ private final String header = "";
+ private final String referenceSequenceName = "chr1";
private final int refLength = 1000;
+ File writeTo = new File("testGLF.glf");
- private GLFRecord rec;
+ private GLFWriter rec;
@Before
public void before() {
- rec = new GLFRecord(header, referenceSequenceName, refLength);
-
+
}
/**
* make a fake snp
+ *
* @param genotype the genotype, 0-15 (AA, AT, AA, ... GG)
*/
- private void addFakeSNP(int genotype, int location) {
+ private void addFakeSNP( int genotype, int location ) {
LikelihoodObject obj = new LikelihoodObject();
- obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype],0.5f);
- rec.addSNPCall(location,10,10,obj);
+ obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype], 128);
+ int ran = (int) Math.floor(Math.random() * 4.0);
+ char let = 'A';
+ switch (ran) {
+ case 0:
+ let = 'T';
+ break;
+ case 1:
+ let = 'C';
+ break;
+ case 2:
+ let = 'G';
+ break;
+ }
+ try {
+ rec.addPointCall(let, location, 10, (short) 10, obj);
+ } catch (IllegalArgumentException e) {
+ e.printStackTrace();
+ }
}
@Test
public void basicWrite() {
- File writeTo = new File("testGLF.glf");
- BinaryCodec codec = new BinaryCodec(writeTo, true);
+ rec = new GLFWriter(header, referenceSequenceName, refLength, writeTo);
for (int x = 0; x < 100; x++) {
- addFakeSNP(0,x);
+ addFakeSNP((int) Math.round(Math.random() * 9), 1);
}
+ rec.close();
}
}
diff --git a/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java b/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java
index f223e55c2..34c20c7c0 100755
--- a/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java
+++ b/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java
@@ -90,7 +90,7 @@ public class LikelihoodObjectTest extends BaseTest {
public void testSetLikelihood() {
mLO = new LikelihoodObject();
for (LikelihoodObject.GENOTYPE t : LikelihoodObject.GENOTYPE.values()) {
- mLO.setLikelihood(t,0.5f);
+ mLO.setLikelihood(t,128);
}
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);