diff --git a/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java old mode 100755 new mode 100644 index 6e3d0d8f3..a73f6abd4 --- a/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/glf/GLFRecord.java @@ -2,271 +2,207 @@ package org.broadinstitute.sting.utils.glf; import net.sf.samtools.util.BinaryCodec; -import java.util.ArrayList; -import java.util.List; -/** +/* + * Copyright (c) 2009 The Broad Institute * - * User: aaron - * Date: May 13, 2009 - * Time: 3:36:18 PM + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ - /** * @author aaron - * @version 1.0 - * @date May 13, 2009 - *

- * Class GLFRecord - *

- * The base record that's stored in the GLF format. + *

+ * Class RecordType + *

+ * The base record type for all GLF entries. Each record has a number of fields + * common to the record set. This is also the source of the REF_BASE enumeration, + * which represents the accepted FASTA nucleotide symbols and their assocated GLF + * field values. */ -public class GLFRecord { - // our record list - private final List records = new ArrayList(); +abstract class GLFRecord { - public static final byte[] glfMagic = {'G','L','F','\3'}; - private String headerText = ""; - private String referenceSequenceName = ""; - private long referenceSequenceLength = 0; + // fields common to all records + protected REF_BASE refBase; + protected long offset = 1; + protected short minimumLikelihood = 0; + protected int readDepth = 0; + protected short rmsMapQ = 0; - private int currentOffset = -1; - /** - * The public constructor for creating a GLF object - * @param headerText the header text (currently unclear what the contents are) - * @param referenceSequenceName the reference sequence name - */ - public GLFRecord(String headerText, String referenceSequenceName, int referenceSequenceLength) { - this.headerText = headerText; - this.referenceSequenceName = referenceSequenceName; - this.referenceSequenceLength = referenceSequenceLength; - } + // the size of this base structure + protected final int baseSize = 10; - public void addSNPCall(int genomicLoc, long read_depth, int rmsMapQ, LikelihoodObject lhValues) { - if (currentOffset >= genomicLoc) { - throw new IllegalArgumentException("The location supplied is less then a previous location"); - } + /** the reference base enumeration, with their short (type) values for GLF */ + public enum REF_BASE { + X((short) 0x00), + A((short) 0x01), + C((short) 0x02), + M((short) 0x03), + G((short) 0x04), + R((short) 0x05), + S((short) 0x06), + V((short) 0x07), + T((short) 0x08), + W((short) 0x09), + Y((short) 0x0A), + H((short) 0x0B), + K((short) 0x0C), + D((short) 0x0D), + B((short) 0x0E), + N((short) 0x0F); - // make sure the read depth isn't too large - if (read_depth < 0 || read_depth > 0x00FFFFFF) { - throw new IllegalArgumentException("The read depth is too large; must lie in the range 0 to 0x00ffffff"); - } - - // check that the rmsSquare is greater then 0, and will fit in a uint8 - if (rmsMapQ > 0x000000FF || rmsMapQ < 0) { - throw new IllegalArgumentException("rms of mapping quality is too large; must lie in the range 0 to 0x000000ff"); - } - - if (lhValues == null) { - throw new IllegalArgumentException("likelihood object cannot be null"); - } - - SinglePointCall call = new SinglePointCall(genomicLoc - currentOffset, - read_depth, - rmsMapQ, - lhValues.toByteArray(), - (short)lhValues.getMinimumValue()); - - } - - /** - * Write out the record to a binary codec object - * - * @param out - */ - public void write(BinaryCodec out) { - out.writeBytes(glfMagic); - out.writeString(headerText,true,true); - out.writeString(referenceSequenceName,true,true); - out.writeUInt(referenceSequenceLength); - for (RecordType rec: records) { - out.writeUByte(rec.getRecordType().getFieldValue()); - rec.write(out); - } - out.writeByte((byte)0); - } - -} - - -interface RecordType { - - enum RECORD_TYPE { - VARIABLE((short)2), - SINGLE((short)1); private final short fieldValue; // in kilograms - RECORD_TYPE(short value) { + + /** + * private constructor, used by the enum class to makes each enum value + * @param value the short values specified in the enum listing + */ + REF_BASE( short value ) { fieldValue = value; } - public short getFieldValue() { + + /** + * static method from returning a REF_BASE given the character representation + * + * @param value the character representation of a REF_BASE + * + * @return the corresponding REF_BASE + * @throws IllegalArgumentException if the value passed can't be converted + */ + public static REF_BASE toBase( char value ) { + String str = String.valueOf(value).toUpperCase(); + for (int x = 0; x < REF_BASE.values().length; x++) { + if (REF_BASE.values()[x].toString().equals(str)) { + return REF_BASE.values()[x]; + } + } + throw new IllegalArgumentException("Counldn't find matching reference base for " + str); + } + + /** @return the hex value of the given REF_BASE */ + public short getBaseHexValue() { return fieldValue; } - }; + } + + /** the record type enum, which enumerates the different records we can have in a GLF */ + enum RECORD_TYPE { + SINGLE((short) 1), + VARIABLE((short) 2); + + private final short fieldValue; // in kilograms + + RECORD_TYPE( short value ) { + fieldValue = value; + } + + public short getReadTypeValue() { + return fieldValue; + } + } + /** - * write the record out to a binary codec - * @param out + * Constructor, given the base a character reference base + * + * @param base the reference base in the reference + * @param offset the offset from the beginning of the reference seq + * @param minimumLikelihood it's minimum likelihood + * @param readDepth the read depth at this position + * @param rmsMapQ the root mean square of the mapping quality */ - public void write(BinaryCodec out); + public GLFRecord( char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) { + REF_BASE newBase = REF_BASE.toBase(base); + validateInput(newBase, offset, minimumLikelihood, readDepth, rmsMapQ); + } + + /** + * Constructor, given the base a REF_BASE + * + * @param base the reference base in the reference + * @param offset the offset from the beginning of the reference seq + * @param minimumLikelihood it's minimum likelihood + * @param readDepth the read depth at this position + * @param rmsMapQ the root mean square of the mapping quality + */ + GLFRecord( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) { + validateInput(base, offset, minimumLikelihood, readDepth, rmsMapQ); + } + + /** + * validate the input during construction, and store valid values + * + * @param base the reference base in the reference, as a REF_BASE + * @param offset the offset from the beginning of the reference seq + * @param minimumLikelihood it's minimum likelihood + * @param readDepth the read depth at this position + * @param rmsMapQ the root mean square of the mapping quality + */ + private void validateInput( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) { + this.refBase = base; + if (offset > 4294967295L || offset < 0) { + throw new IllegalArgumentException("Offset is out of bounds (0 to 0xffffffff) value passed = " + offset); + } + this.offset = offset; + + if (minimumLikelihood > 255 || minimumLikelihood < 0) { + throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood); + } + this.minimumLikelihood = minimumLikelihood; + + if (readDepth > 16777215 || readDepth < 0) { + throw new IllegalArgumentException("readDepth is out of bounds (0 to 0xffffff) value passed = " + readDepth); + } + this.readDepth = readDepth; + + if (rmsMapQ > 255 || rmsMapQ < 0) { + throw new IllegalArgumentException("rmsMapQ is out of bounds (0 to 0xff) value passed = " + rmsMapQ); + } + this.rmsMapQ = rmsMapQ; + } + + /** + * write the this record to a binary codec output. + * + * @param out the binary codec to write to + */ + public void write( BinaryCodec out ) { + out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) )); + out.writeUInt(( (Long) offset ).intValue()); + out.writeUInt((new Long(readDepth).intValue())); + out.writeUByte((short) rmsMapQ); + } /** * get the record type - * @return the record type + * + * @return the record type enumeration */ - public RECORD_TYPE getRecordType(); + public abstract RECORD_TYPE getRecordType(); /** - * - * @return + * Return the size of this record in bytes. + * + * @return the size of this record type, in bytes */ - public int getByteSize(); + public abstract int getByteSize(); } -// the second record type -class VariableLengthCall implements RecordType { - public int offset = 0; - public int min_depth = 0; - public byte rmsMapQ = 0; - public byte lkHom1 = 0; - public byte lkHom2 = 0; - public byte lkHet = 0; - public short indelLen1 = 0; - public short indelLen2 = 0; - public final byte indelSeq1[]; - public final byte indelSeq2[]; - - // our size, we're immutable (at least the size is) - private final int size; // in bytes - - /** - * the BinaryCodec constructor - * - * @param in the binary codec to get data from - */ - VariableLengthCall(BinaryCodec in) { - offset = in.readInt(); - min_depth = in.readInt(); - rmsMapQ = in.readByte(); - lkHom1 = in.readByte(); - lkHom2 = in.readByte(); - lkHet = in.readByte(); - indelLen1 = in.readShort(); - indelLen2 = in.readShort(); - indelSeq1 = new byte[indelLen1]; - indelSeq2 = new byte[indelLen2]; - this.size = 16 + indelLen1 + indelLen2; - } - - /** - * Write out the record to a binary codec - * - * @param out - */ - public void write(BinaryCodec out) { - out.writeInt(offset); - out.writeInt(min_depth); - out.writeByte(rmsMapQ); - out.writeByte(lkHom1); - out.writeByte(lkHom2); - out.writeByte(lkHet); - out.writeShort(indelLen1); - out.writeShort(indelLen2); - out.writeBytes(indelSeq1); - out.writeBytes(indelSeq2); - } - - public RECORD_TYPE getRecordType() { - return RECORD_TYPE.VARIABLE; - } - - /** @return */ - public int getByteSize() { - return size; - } -} - - -// the first record type -class SinglePointCall implements RecordType { - // our likelyhood array size - public static final int LIKELYHOOD_SIZE = 10; - - // class fields - public int offset = 0; - public Long min_depth = 0L; - public int rmsMapQ = 0; - public final short lk[] = new short[LIKELYHOOD_SIZE]; - public short minimumLikelihood = 0; - - // our size, we're immutable (the size at least) - private final int byteSize; // in bytes - - /** - * The parameter constructor - * - * @param offset - * @param min_depth - * @param rmsMapQ - * @param lk - */ - SinglePointCall(int offset, long min_depth, int rmsMapQ, short[] lk, short minimumLikelihood) { - if (lk.length != LIKELYHOOD_SIZE) { - throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE"); - } - this.offset = offset; - this.min_depth = min_depth; - this.rmsMapQ = rmsMapQ; - this.minimumLikelihood = minimumLikelihood; - System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE); - byteSize = 9 + lk.length; - } - - /** - * the BinaryCodec constructor - * - * @param in the binary codec to get data from - */ - SinglePointCall(BinaryCodec in) { - offset = in.readInt(); - min_depth = Long.valueOf(in.readInt()); - rmsMapQ = in.readByte(); - for (int x = 0; x < LIKELYHOOD_SIZE; x++) { - lk[x] = in.readUByte(); - } - byteSize = 9 + lk.length; - } - - /** - * Write out the record to a binary codec - * - * @param out - */ - public void write(BinaryCodec out) { - out.writeInt(offset); - out.writeInt(min_depth.intValue()); - out.writeByte(rmsMapQ); - for (int x = 0; x < LIKELYHOOD_SIZE; x++) { - out.writeUByte(lk[x]); - } - } - - public RECORD_TYPE getRecordType() { - return RECORD_TYPE.SINGLE; - } - - /** @return */ - public int getByteSize() { - return byteSize; - } - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java new file mode 100755 index 000000000..8fbe5ebd7 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/glf/GLFWriter.java @@ -0,0 +1,170 @@ +package org.broadinstitute.sting.utils.glf; + +import net.sf.samtools.util.BinaryCodec; +import net.sf.samtools.util.BlockCompressedOutputStream; + +import java.util.ArrayList; +import java.util.List; +import java.io.File; +import java.io.DataOutputStream; + +/** + * + * User: aaron + * Date: May 13, 2009 + * Time: 3:36:18 PM + * + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ + + +/** + * @author aaron + * @version 1.0 + */ +public class GLFWriter { + // our output codec + private final BinaryCodec outputBinaryCodec; + + public static final short[] glfMagic = {'G', 'L', 'F', '\3'}; + private String headerText = ""; + private String referenceSequenceName = ""; + private long referenceSequenceLength = 0; + + /** + * The public constructor for creating a GLF object + * + * @param headerText the header text (currently unclear what the contents are) + * @param referenceSequenceName the reference sequence name + */ + public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) { + this.headerText = headerText; + this.referenceSequenceName = referenceSequenceName; + this.referenceSequenceLength = referenceSequenceLength; + outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo))); + outputBinaryCodec.setOutputFileName(writeTo.toString()); + this.writeHeader(); + } + + /** + * add a point genotype to the GLF writer + * + * @param refBase the reference base, as a char + * @param genomicLoc the location, as an offset from the previous glf record + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param lhValues the LikelihoodObject, representing the genotype likelyhoods + */ + public void addPointCall( char refBase, + int genomicLoc, + int readDepth, + short rmsMapQ, + LikelihoodObject lhValues ) { + + SinglePointCall call = new SinglePointCall(refBase, genomicLoc, + readDepth, + rmsMapQ, + lhValues.toByteArray(), + lhValues); + call.write(this.outputBinaryCodec); + } + + /** + * add a variable length (indel, deletion, etc) to the genotype writer + * + * @param refBase the reference base + * @param genomicLoc the location, as an offset from the previous glf record + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param minimumLikelihood the minimum likelihood value + * @param homozygProb1 the probability of the first homozygous indel allele + * @param homozygProb2 the probability of the second homozygous indel allele + * @param heterozygProb the probability of the heterozygote + * @param indelLength1 the length of the first indel allele + * @param indelLength2 the length of the second indel allele + * @param indelSeq1 the sequence for the first indel allele + * @param indelSeq2 the sequence for the second indel allele + */ + public void addVarLengthCall( char refBase, + long genomicLoc, + int readDepth, + short rmsMapQ, + short minimumLikelihood, + short homozygProb1, + short homozygProb2, + short heterozygProb, + int indelLength1, + int indelLength2, + char[] indelSeq1, + char[] indelSeq2 ) { + + short[] indexSeqEq1 = new short[indelSeq1.length]; + short[] indexSeqEq2 = new short[indelSeq2.length]; + for (int x = 0; x < indelSeq1.length; x++) { + indexSeqEq1[x] = new Integer(0x00ff & indelSeq1[x]).shortValue(); + } + for (int x = 0; x < indelSeq2.length; x++) { + indexSeqEq2[x] = new Integer(0x00ff & indelSeq2[x]).shortValue(); + } + + VariableLengthCall call = new VariableLengthCall(refBase, + genomicLoc, + readDepth, + minimumLikelihood, + rmsMapQ, + homozygProb1, + homozygProb2, + heterozygProb, + indelLength1, + indelLength2, + indexSeqEq1, + indexSeqEq2); + + call.write(this.outputBinaryCodec); + + } + + /** + * add a GLF record to the output file + * @param rec the GLF record to write. + */ + public void addGLFRecord( GLFRecord rec ) { + rec.write(this.outputBinaryCodec); + } + + /** + * Write out the header information for the GLF file + * + */ + private void writeHeader() { + for (int x = 0; x < glfMagic.length; x++) { + outputBinaryCodec.writeByte(glfMagic[x]); + } + if (!(headerText.equals(""))) { + outputBinaryCodec.writeString(headerText, true, true); + } else { + outputBinaryCodec.writeInt(0); + } + outputBinaryCodec.writeString(referenceSequenceName, true, true); + outputBinaryCodec.writeUInt(referenceSequenceLength); + } + + /** + * close the file + * + */ + public void close() { + outputBinaryCodec.writeByte((byte) 0); + outputBinaryCodec.close(); + } + +} + + diff --git a/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java index 015ca1dff..20fa2896e 100755 --- a/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java +++ b/java/src/org/broadinstitute/sting/utils/glf/LikelihoodObject.java @@ -46,19 +46,23 @@ public class LikelihoodObject { AA, AT, AC, AG, CC, CT, CG, GG, GT, TT } - // the associated probabilities for each genotype + // the associated negitive log likelihoods for each genotype final protected HashMap likelihood = new HashMap(); /** create a blank likelihood object */ public LikelihoodObject() { for (GENOTYPE type : GENOTYPE.values()) { - likelihood.put(type, 0); + likelihood.put(type, 255); } } + // since the default case it to start with all infinity, we can choose any random base + GENOTYPE greatest = GENOTYPE.AA; + + /** * create a likelyhood object, given an array of genotype scores in GLFv3 ordering - * @param values + * @param values an array of int's from 0 to 255, representing the negitive log likelihoods. */ public LikelihoodObject(int[] values) { if (values.length != GENOTYPE.values().length) { @@ -77,22 +81,25 @@ public class LikelihoodObject { /** * set the likelihood, given it's probability and the genotype * @param type the genotype - * @param likelyhood the likelihood as a float between 0 and 1, which is converted to a byte + * @param lh the likelihood as a double between 0 and 1, which is converted to a byte */ - public void setLikelihood(GENOTYPE type, float likelyhood) { - likelihood.put(type,(int)Math.round(likelyhood*255.0)); + public void setLikelihood(GENOTYPE type, int lh) { + if (lh < 0 || lh > 255) { + throw new IllegalArgumentException("supplied likelihood must be between 0 and 255"); + } + likelihood.put(type,lh); + if (lh < likelihood.get(this.greatest)) { + this.greatest = type; + } } /** - * find the minimum likelihood value stored in the set + * find the minimum likelihood value stored in the set. This represents the most likely genotype, + * since genotypes are represented as negitive log likeihoods * @return */ public int getMinimumValue() { - int minimum = Integer.MAX_VALUE; - for (int i : likelihood.values()) { - if (i < minimum) { minimum = i;} - } - return minimum; + return likelihood.get(this.greatest); } /** diff --git a/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java new file mode 100644 index 000000000..8f2c61030 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/glf/SinglePointCall.java @@ -0,0 +1,94 @@ +package org.broadinstitute.sting.utils.glf; + +import net.sf.samtools.util.BinaryCodec; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * + * @author aaron + * + * Class SinglePointCall + * + * This class represents a single point geneotype call in GLF vernacular + **/ +class SinglePointCall extends GLFRecord { + + // our likelyhood array size + public static final int LIKELYHOOD_SIZE = 10; + + // our record type + private final RECORD_TYPE type = RECORD_TYPE.SINGLE; + + // our array of likelihoods + public final short lk[] = new short[LIKELYHOOD_SIZE]; + + // our size, we're immutable (the size at least). In bytes. + private final int byteSize; + + + SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, short[] lk, LikelihoodObject minimumLikelihood ) { + super(refBase,offset,(short)minimumLikelihood.getMinimumValue(),readDepth,rmsMapQ); + + if (lk.length != LIKELYHOOD_SIZE) { + throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE"); + } + + System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE); + byteSize = 9 + lk.length; + } + + + /** + * Write out the record to a binary codec + * + * @param out + */ + public void write( BinaryCodec out ) { + super.write(out); + for (int x = 0; x < LIKELYHOOD_SIZE; x++) { + out.writeUByte(lk[x]); + } + } + + /** + * return the record type we represent, in this case SINGLE + * @return RECORD_TYPE.SINGLE + */ + public RECORD_TYPE getRecordType() { + return RECORD_TYPE.SINGLE; + } + + /** + * return our size in bytes + * @return number of bytes we represent + */ + public int getByteSize() { + return byteSize; + } + +} diff --git a/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java b/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java new file mode 100644 index 000000000..aa8cd207e --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/glf/VariableLengthCall.java @@ -0,0 +1,108 @@ +package org.broadinstitute.sting.utils.glf; + +import net.sf.samtools.util.BinaryCodec; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author aaron + *

+ * Class VariableLengthCall + *

+ * This class represents variable length genotype calls in the GLF format. + * Currently a lot of parameters need to be provided, but we may be able to thin + * those down as we understand what we have to specify and what we can infer. + */ +class VariableLengthCall extends GLFRecord { + // our fields, corresponding to the glf spec + private short lkHom1 = 0; + private short lkHom2 = 0; + private short lkHet = 0; + private int indelLen1 = 0; + private int indelLen2 = 0; + private final short indelSeq1[]; + private final short indelSeq2[]; + + // our size, which is immutable, in bytes + private final int size; + + + /** the default constructor */ + VariableLengthCall( char refBase, + long offset, + int readDepth, + short minimumLikelihood, + short rmsMapQ, + short lkHom1, + short lkHom2, + short lkHet, + int indelLen1, + int indelLen2, + final short indelSeq1[], + final short indelSeq2[] ) { + super(refBase, offset, minimumLikelihood, readDepth, rmsMapQ); + this.lkHom1 = lkHom1; + this.lkHom2 = lkHom2; + this.lkHet = lkHet; + this.indelLen1 = indelLen1; + this.indelLen2 = indelLen2; + this.indelSeq1 = indelSeq1; + this.indelSeq2 = indelSeq2; + size = 16 + indelSeq1.length + indelSeq2.length; + + } + + /** + * Write out the record to a binary codec + * + * @param out the binary codec to write to + */ + public void write( BinaryCodec out ) { + super.write(out); + out.writeByte(lkHom1); + out.writeByte(lkHom2); + out.writeByte(lkHet); + out.writeShort(new Integer(indelLen1).shortValue()); + out.writeShort(new Integer(indelLen2).shortValue()); + for (int x = 0; x < indelSeq1.length; x++) { + out.writeUByte(indelSeq1[x]); + } + for (int x = 0; x < indelSeq2.length; x++) { + out.writeUByte(indelSeq2[x]); + } + } + + /** @return RECORD_TYPE.VARIABLE */ + public RECORD_TYPE getRecordType() { + return RECORD_TYPE.VARIABLE; + } + + /** @return */ + public int getByteSize() { + return size; + } +} diff --git a/java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java b/java/test/org/broadinstitute/sting/utils/glf/GLFWriterTest.java similarity index 63% rename from java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java rename to java/test/org/broadinstitute/sting/utils/glf/GLFWriterTest.java index b56ab20d3..928a17620 100755 --- a/java/test/org/broadinstitute/sting/utils/glf/GLFRecordTest.java +++ b/java/test/org/broadinstitute/sting/utils/glf/GLFWriterTest.java @@ -2,9 +2,13 @@ package org.broadinstitute.sting.utils.glf; import org.junit.Test; import org.junit.Before; +import org.broadinstitute.sting.BaseTest; import net.sf.samtools.util.BinaryCodec; +import net.sf.samtools.util.BlockCompressedOutputStream; import java.io.File; +import java.io.DataOutputStream; +import java.io.IOException; /* @@ -39,39 +43,57 @@ import java.io.File; *

* Tests for the GLFRecord class */ -public class GLFRecordTest { +public class GLFWriterTest extends BaseTest { /** some made up values that we use to generate the GLF */ - private final String header = "header"; - private final String referenceSequenceName = "refSeq"; + private final String header = ""; + private final String referenceSequenceName = "chr1"; private final int refLength = 1000; + File writeTo = new File("testGLF.glf"); - private GLFRecord rec; + private GLFWriter rec; @Before public void before() { - rec = new GLFRecord(header, referenceSequenceName, refLength); - + } /** * make a fake snp + * * @param genotype the genotype, 0-15 (AA, AT, AA, ... GG) */ - private void addFakeSNP(int genotype, int location) { + private void addFakeSNP( int genotype, int location ) { LikelihoodObject obj = new LikelihoodObject(); - obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype],0.5f); - rec.addSNPCall(location,10,10,obj); + obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype], 128); + int ran = (int) Math.floor(Math.random() * 4.0); + char let = 'A'; + switch (ran) { + case 0: + let = 'T'; + break; + case 1: + let = 'C'; + break; + case 2: + let = 'G'; + break; + } + try { + rec.addPointCall(let, location, 10, (short) 10, obj); + } catch (IllegalArgumentException e) { + e.printStackTrace(); + } } @Test public void basicWrite() { - File writeTo = new File("testGLF.glf"); - BinaryCodec codec = new BinaryCodec(writeTo, true); + rec = new GLFWriter(header, referenceSequenceName, refLength, writeTo); for (int x = 0; x < 100; x++) { - addFakeSNP(0,x); + addFakeSNP((int) Math.round(Math.random() * 9), 1); } + rec.close(); } } diff --git a/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java b/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java index f223e55c2..34c20c7c0 100755 --- a/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java +++ b/java/test/org/broadinstitute/sting/utils/glf/LikelihoodObjectTest.java @@ -90,7 +90,7 @@ public class LikelihoodObjectTest extends BaseTest { public void testSetLikelihood() { mLO = new LikelihoodObject(); for (LikelihoodObject.GENOTYPE t : LikelihoodObject.GENOTYPE.values()) { - mLO.setLikelihood(t,0.5f); + mLO.setLikelihood(t,128); } assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);