GLF writing support

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@872 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-06-01 21:30:18 +00:00
parent 417f5b145e
commit e712d69382
7 changed files with 600 additions and 263 deletions

View File

@ -2,271 +2,207 @@ package org.broadinstitute.sting.utils.glf;
import net.sf.samtools.util.BinaryCodec;
import java.util.ArrayList;
import java.util.List;
/**
/*
* Copyright (c) 2009 The Broad Institute
*
* User: aaron
* Date: May 13, 2009
* Time: 3:36:18 PM
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
* @version 1.0
* @date May 13, 2009
* <p/>
* Class GLFRecord
* <p/>
* The base record that's stored in the GLF format.
* <p/>
* Class RecordType
* <p/>
* The base record type for all GLF entries. Each record has a number of fields
* common to the record set. This is also the source of the REF_BASE enumeration,
* which represents the accepted FASTA nucleotide symbols and their assocated GLF
* field values.
*/
public class GLFRecord {
// our record list
private final List<RecordType> records = new ArrayList<RecordType>();
abstract class GLFRecord {
public static final byte[] glfMagic = {'G','L','F','\3'};
private String headerText = "";
private String referenceSequenceName = "";
private long referenceSequenceLength = 0;
// fields common to all records
protected REF_BASE refBase;
protected long offset = 1;
protected short minimumLikelihood = 0;
protected int readDepth = 0;
protected short rmsMapQ = 0;
private int currentOffset = -1;
/**
* The public constructor for creating a GLF object
* @param headerText the header text (currently unclear what the contents are)
* @param referenceSequenceName the reference sequence name
*/
public GLFRecord(String headerText, String referenceSequenceName, int referenceSequenceLength) {
this.headerText = headerText;
this.referenceSequenceName = referenceSequenceName;
this.referenceSequenceLength = referenceSequenceLength;
}
// the size of this base structure
protected final int baseSize = 10;
public void addSNPCall(int genomicLoc, long read_depth, int rmsMapQ, LikelihoodObject lhValues) {
if (currentOffset >= genomicLoc) {
throw new IllegalArgumentException("The location supplied is less then a previous location");
}
/** the reference base enumeration, with their short (type) values for GLF */
public enum REF_BASE {
X((short) 0x00),
A((short) 0x01),
C((short) 0x02),
M((short) 0x03),
G((short) 0x04),
R((short) 0x05),
S((short) 0x06),
V((short) 0x07),
T((short) 0x08),
W((short) 0x09),
Y((short) 0x0A),
H((short) 0x0B),
K((short) 0x0C),
D((short) 0x0D),
B((short) 0x0E),
N((short) 0x0F);
// make sure the read depth isn't too large
if (read_depth < 0 || read_depth > 0x00FFFFFF) {
throw new IllegalArgumentException("The read depth is too large; must lie in the range 0 to 0x00ffffff");
}
// check that the rmsSquare is greater then 0, and will fit in a uint8
if (rmsMapQ > 0x000000FF || rmsMapQ < 0) {
throw new IllegalArgumentException("rms of mapping quality is too large; must lie in the range 0 to 0x000000ff");
}
if (lhValues == null) {
throw new IllegalArgumentException("likelihood object cannot be null");
}
SinglePointCall call = new SinglePointCall(genomicLoc - currentOffset,
read_depth,
rmsMapQ,
lhValues.toByteArray(),
(short)lhValues.getMinimumValue());
}
/**
* Write out the record to a binary codec object
*
* @param out
*/
public void write(BinaryCodec out) {
out.writeBytes(glfMagic);
out.writeString(headerText,true,true);
out.writeString(referenceSequenceName,true,true);
out.writeUInt(referenceSequenceLength);
for (RecordType rec: records) {
out.writeUByte(rec.getRecordType().getFieldValue());
rec.write(out);
}
out.writeByte((byte)0);
}
}
interface RecordType {
enum RECORD_TYPE {
VARIABLE((short)2),
SINGLE((short)1);
private final short fieldValue; // in kilograms
RECORD_TYPE(short value) {
/**
* private constructor, used by the enum class to makes each enum value
* @param value the short values specified in the enum listing
*/
REF_BASE( short value ) {
fieldValue = value;
}
public short getFieldValue() {
/**
* static method from returning a REF_BASE given the character representation
*
* @param value the character representation of a REF_BASE
*
* @return the corresponding REF_BASE
* @throws IllegalArgumentException if the value passed can't be converted
*/
public static REF_BASE toBase( char value ) {
String str = String.valueOf(value).toUpperCase();
for (int x = 0; x < REF_BASE.values().length; x++) {
if (REF_BASE.values()[x].toString().equals(str)) {
return REF_BASE.values()[x];
}
}
throw new IllegalArgumentException("Counldn't find matching reference base for " + str);
}
/** @return the hex value of the given REF_BASE */
public short getBaseHexValue() {
return fieldValue;
}
};
}
/** the record type enum, which enumerates the different records we can have in a GLF */
enum RECORD_TYPE {
SINGLE((short) 1),
VARIABLE((short) 2);
private final short fieldValue; // in kilograms
RECORD_TYPE( short value ) {
fieldValue = value;
}
public short getReadTypeValue() {
return fieldValue;
}
}
/**
* write the record out to a binary codec
* @param out
* Constructor, given the base a character reference base
*
* @param base the reference base in the reference
* @param offset the offset from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality
*/
public void write(BinaryCodec out);
public GLFRecord( char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) {
REF_BASE newBase = REF_BASE.toBase(base);
validateInput(newBase, offset, minimumLikelihood, readDepth, rmsMapQ);
}
/**
* Constructor, given the base a REF_BASE
*
* @param base the reference base in the reference
* @param offset the offset from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality
*/
GLFRecord( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) {
validateInput(base, offset, minimumLikelihood, readDepth, rmsMapQ);
}
/**
* validate the input during construction, and store valid values
*
* @param base the reference base in the reference, as a REF_BASE
* @param offset the offset from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality
*/
private void validateInput( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) {
this.refBase = base;
if (offset > 4294967295L || offset < 0) {
throw new IllegalArgumentException("Offset is out of bounds (0 to 0xffffffff) value passed = " + offset);
}
this.offset = offset;
if (minimumLikelihood > 255 || minimumLikelihood < 0) {
throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood);
}
this.minimumLikelihood = minimumLikelihood;
if (readDepth > 16777215 || readDepth < 0) {
throw new IllegalArgumentException("readDepth is out of bounds (0 to 0xffffff) value passed = " + readDepth);
}
this.readDepth = readDepth;
if (rmsMapQ > 255 || rmsMapQ < 0) {
throw new IllegalArgumentException("rmsMapQ is out of bounds (0 to 0xff) value passed = " + rmsMapQ);
}
this.rmsMapQ = rmsMapQ;
}
/**
* write the this record to a binary codec output.
*
* @param out the binary codec to write to
*/
public void write( BinaryCodec out ) {
out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) ));
out.writeUInt(( (Long) offset ).intValue());
out.writeUInt((new Long(readDepth).intValue()));
out.writeUByte((short) rmsMapQ);
}
/**
* get the record type
* @return the record type
*
* @return the record type enumeration
*/
public RECORD_TYPE getRecordType();
public abstract RECORD_TYPE getRecordType();
/**
*
* @return
* Return the size of this record in bytes.
*
* @return the size of this record type, in bytes
*/
public int getByteSize();
public abstract int getByteSize();
}
// the second record type
class VariableLengthCall implements RecordType {
public int offset = 0;
public int min_depth = 0;
public byte rmsMapQ = 0;
public byte lkHom1 = 0;
public byte lkHom2 = 0;
public byte lkHet = 0;
public short indelLen1 = 0;
public short indelLen2 = 0;
public final byte indelSeq1[];
public final byte indelSeq2[];
// our size, we're immutable (at least the size is)
private final int size; // in bytes
/**
* the BinaryCodec constructor
*
* @param in the binary codec to get data from
*/
VariableLengthCall(BinaryCodec in) {
offset = in.readInt();
min_depth = in.readInt();
rmsMapQ = in.readByte();
lkHom1 = in.readByte();
lkHom2 = in.readByte();
lkHet = in.readByte();
indelLen1 = in.readShort();
indelLen2 = in.readShort();
indelSeq1 = new byte[indelLen1];
indelSeq2 = new byte[indelLen2];
this.size = 16 + indelLen1 + indelLen2;
}
/**
* Write out the record to a binary codec
*
* @param out
*/
public void write(BinaryCodec out) {
out.writeInt(offset);
out.writeInt(min_depth);
out.writeByte(rmsMapQ);
out.writeByte(lkHom1);
out.writeByte(lkHom2);
out.writeByte(lkHet);
out.writeShort(indelLen1);
out.writeShort(indelLen2);
out.writeBytes(indelSeq1);
out.writeBytes(indelSeq2);
}
public RECORD_TYPE getRecordType() {
return RECORD_TYPE.VARIABLE;
}
/** @return */
public int getByteSize() {
return size;
}
}
// the first record type
class SinglePointCall implements RecordType {
// our likelyhood array size
public static final int LIKELYHOOD_SIZE = 10;
// class fields
public int offset = 0;
public Long min_depth = 0L;
public int rmsMapQ = 0;
public final short lk[] = new short[LIKELYHOOD_SIZE];
public short minimumLikelihood = 0;
// our size, we're immutable (the size at least)
private final int byteSize; // in bytes
/**
* The parameter constructor
*
* @param offset
* @param min_depth
* @param rmsMapQ
* @param lk
*/
SinglePointCall(int offset, long min_depth, int rmsMapQ, short[] lk, short minimumLikelihood) {
if (lk.length != LIKELYHOOD_SIZE) {
throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
}
this.offset = offset;
this.min_depth = min_depth;
this.rmsMapQ = rmsMapQ;
this.minimumLikelihood = minimumLikelihood;
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
byteSize = 9 + lk.length;
}
/**
* the BinaryCodec constructor
*
* @param in the binary codec to get data from
*/
SinglePointCall(BinaryCodec in) {
offset = in.readInt();
min_depth = Long.valueOf(in.readInt());
rmsMapQ = in.readByte();
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
lk[x] = in.readUByte();
}
byteSize = 9 + lk.length;
}
/**
* Write out the record to a binary codec
*
* @param out
*/
public void write(BinaryCodec out) {
out.writeInt(offset);
out.writeInt(min_depth.intValue());
out.writeByte(rmsMapQ);
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
out.writeUByte(lk[x]);
}
}
public RECORD_TYPE getRecordType() {
return RECORD_TYPE.SINGLE;
}
/** @return */
public int getByteSize() {
return byteSize;
}
}

View File

@ -0,0 +1,170 @@
package org.broadinstitute.sting.utils.glf;
import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream;
import java.util.ArrayList;
import java.util.List;
import java.io.File;
import java.io.DataOutputStream;
/**
*
* User: aaron
* Date: May 13, 2009
* Time: 3:36:18 PM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* @version 1.0
*/
public class GLFWriter {
// our output codec
private final BinaryCodec outputBinaryCodec;
public static final short[] glfMagic = {'G', 'L', 'F', '\3'};
private String headerText = "";
private String referenceSequenceName = "";
private long referenceSequenceLength = 0;
/**
* The public constructor for creating a GLF object
*
* @param headerText the header text (currently unclear what the contents are)
* @param referenceSequenceName the reference sequence name
*/
public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) {
this.headerText = headerText;
this.referenceSequenceName = referenceSequenceName;
this.referenceSequenceLength = referenceSequenceLength;
outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo)));
outputBinaryCodec.setOutputFileName(writeTo.toString());
this.writeHeader();
}
/**
* add a point genotype to the GLF writer
*
* @param refBase the reference base, as a char
* @param genomicLoc the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param lhValues the LikelihoodObject, representing the genotype likelyhoods
*/
public void addPointCall( char refBase,
int genomicLoc,
int readDepth,
short rmsMapQ,
LikelihoodObject lhValues ) {
SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
readDepth,
rmsMapQ,
lhValues.toByteArray(),
lhValues);
call.write(this.outputBinaryCodec);
}
/**
* add a variable length (indel, deletion, etc) to the genotype writer
*
* @param refBase the reference base
* @param genomicLoc the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param minimumLikelihood the minimum likelihood value
* @param homozygProb1 the probability of the first homozygous indel allele
* @param homozygProb2 the probability of the second homozygous indel allele
* @param heterozygProb the probability of the heterozygote
* @param indelLength1 the length of the first indel allele
* @param indelLength2 the length of the second indel allele
* @param indelSeq1 the sequence for the first indel allele
* @param indelSeq2 the sequence for the second indel allele
*/
public void addVarLengthCall( char refBase,
long genomicLoc,
int readDepth,
short rmsMapQ,
short minimumLikelihood,
short homozygProb1,
short homozygProb2,
short heterozygProb,
int indelLength1,
int indelLength2,
char[] indelSeq1,
char[] indelSeq2 ) {
short[] indexSeqEq1 = new short[indelSeq1.length];
short[] indexSeqEq2 = new short[indelSeq2.length];
for (int x = 0; x < indelSeq1.length; x++) {
indexSeqEq1[x] = new Integer(0x00ff & indelSeq1[x]).shortValue();
}
for (int x = 0; x < indelSeq2.length; x++) {
indexSeqEq2[x] = new Integer(0x00ff & indelSeq2[x]).shortValue();
}
VariableLengthCall call = new VariableLengthCall(refBase,
genomicLoc,
readDepth,
minimumLikelihood,
rmsMapQ,
homozygProb1,
homozygProb2,
heterozygProb,
indelLength1,
indelLength2,
indexSeqEq1,
indexSeqEq2);
call.write(this.outputBinaryCodec);
}
/**
* add a GLF record to the output file
* @param rec the GLF record to write.
*/
public void addGLFRecord( GLFRecord rec ) {
rec.write(this.outputBinaryCodec);
}
/**
* Write out the header information for the GLF file
*
*/
private void writeHeader() {
for (int x = 0; x < glfMagic.length; x++) {
outputBinaryCodec.writeByte(glfMagic[x]);
}
if (!(headerText.equals(""))) {
outputBinaryCodec.writeString(headerText, true, true);
} else {
outputBinaryCodec.writeInt(0);
}
outputBinaryCodec.writeString(referenceSequenceName, true, true);
outputBinaryCodec.writeUInt(referenceSequenceLength);
}
/**
* close the file
*
*/
public void close() {
outputBinaryCodec.writeByte((byte) 0);
outputBinaryCodec.close();
}
}

View File

@ -46,19 +46,23 @@ public class LikelihoodObject {
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
}
// the associated probabilities for each genotype
// the associated negitive log likelihoods for each genotype
final protected HashMap<GENOTYPE, Integer> likelihood = new HashMap<GENOTYPE, Integer>();
/** create a blank likelihood object */
public LikelihoodObject() {
for (GENOTYPE type : GENOTYPE.values()) {
likelihood.put(type, 0);
likelihood.put(type, 255);
}
}
// since the default case it to start with all infinity, we can choose any random base
GENOTYPE greatest = GENOTYPE.AA;
/**
* create a likelyhood object, given an array of genotype scores in GLFv3 ordering
* @param values
* @param values an array of int's from 0 to 255, representing the negitive log likelihoods.
*/
public LikelihoodObject(int[] values) {
if (values.length != GENOTYPE.values().length) {
@ -77,22 +81,25 @@ public class LikelihoodObject {
/**
* set the likelihood, given it's probability and the genotype
* @param type the genotype
* @param likelyhood the likelihood as a float between 0 and 1, which is converted to a byte
* @param lh the likelihood as a double between 0 and 1, which is converted to a byte
*/
public void setLikelihood(GENOTYPE type, float likelyhood) {
likelihood.put(type,(int)Math.round(likelyhood*255.0));
public void setLikelihood(GENOTYPE type, int lh) {
if (lh < 0 || lh > 255) {
throw new IllegalArgumentException("supplied likelihood must be between 0 and 255");
}
likelihood.put(type,lh);
if (lh < likelihood.get(this.greatest)) {
this.greatest = type;
}
}
/**
* find the minimum likelihood value stored in the set
* find the minimum likelihood value stored in the set. This represents the most likely genotype,
* since genotypes are represented as negitive log likeihoods
* @return
*/
public int getMinimumValue() {
int minimum = Integer.MAX_VALUE;
for (int i : likelihood.values()) {
if (i < minimum) { minimum = i;}
}
return minimum;
return likelihood.get(this.greatest);
}
/**

View File

@ -0,0 +1,94 @@
package org.broadinstitute.sting.utils.glf;
import net.sf.samtools.util.BinaryCodec;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
*
* @author aaron
*
* Class SinglePointCall
*
* This class represents a single point geneotype call in GLF vernacular
**/
class SinglePointCall extends GLFRecord {
// our likelyhood array size
public static final int LIKELYHOOD_SIZE = 10;
// our record type
private final RECORD_TYPE type = RECORD_TYPE.SINGLE;
// our array of likelihoods
public final short lk[] = new short[LIKELYHOOD_SIZE];
// our size, we're immutable (the size at least). In bytes.
private final int byteSize;
SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, short[] lk, LikelihoodObject minimumLikelihood ) {
super(refBase,offset,(short)minimumLikelihood.getMinimumValue(),readDepth,rmsMapQ);
if (lk.length != LIKELYHOOD_SIZE) {
throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
}
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
byteSize = 9 + lk.length;
}
/**
* Write out the record to a binary codec
*
* @param out
*/
public void write( BinaryCodec out ) {
super.write(out);
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
out.writeUByte(lk[x]);
}
}
/**
* return the record type we represent, in this case SINGLE
* @return RECORD_TYPE.SINGLE
*/
public RECORD_TYPE getRecordType() {
return RECORD_TYPE.SINGLE;
}
/**
* return our size in bytes
* @return number of bytes we represent
*/
public int getByteSize() {
return byteSize;
}
}

View File

@ -0,0 +1,108 @@
package org.broadinstitute.sting.utils.glf;
import net.sf.samtools.util.BinaryCodec;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
* <p/>
* Class VariableLengthCall
* <p/>
* This class represents variable length genotype calls in the GLF format.
* Currently a lot of parameters need to be provided, but we may be able to thin
* those down as we understand what we have to specify and what we can infer.
*/
class VariableLengthCall extends GLFRecord {
// our fields, corresponding to the glf spec
private short lkHom1 = 0;
private short lkHom2 = 0;
private short lkHet = 0;
private int indelLen1 = 0;
private int indelLen2 = 0;
private final short indelSeq1[];
private final short indelSeq2[];
// our size, which is immutable, in bytes
private final int size;
/** the default constructor */
VariableLengthCall( char refBase,
long offset,
int readDepth,
short minimumLikelihood,
short rmsMapQ,
short lkHom1,
short lkHom2,
short lkHet,
int indelLen1,
int indelLen2,
final short indelSeq1[],
final short indelSeq2[] ) {
super(refBase, offset, minimumLikelihood, readDepth, rmsMapQ);
this.lkHom1 = lkHom1;
this.lkHom2 = lkHom2;
this.lkHet = lkHet;
this.indelLen1 = indelLen1;
this.indelLen2 = indelLen2;
this.indelSeq1 = indelSeq1;
this.indelSeq2 = indelSeq2;
size = 16 + indelSeq1.length + indelSeq2.length;
}
/**
* Write out the record to a binary codec
*
* @param out the binary codec to write to
*/
public void write( BinaryCodec out ) {
super.write(out);
out.writeByte(lkHom1);
out.writeByte(lkHom2);
out.writeByte(lkHet);
out.writeShort(new Integer(indelLen1).shortValue());
out.writeShort(new Integer(indelLen2).shortValue());
for (int x = 0; x < indelSeq1.length; x++) {
out.writeUByte(indelSeq1[x]);
}
for (int x = 0; x < indelSeq2.length; x++) {
out.writeUByte(indelSeq2[x]);
}
}
/** @return RECORD_TYPE.VARIABLE */
public RECORD_TYPE getRecordType() {
return RECORD_TYPE.VARIABLE;
}
/** @return */
public int getByteSize() {
return size;
}
}

View File

@ -2,9 +2,13 @@ package org.broadinstitute.sting.utils.glf;
import org.junit.Test;
import org.junit.Before;
import org.broadinstitute.sting.BaseTest;
import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream;
import java.io.File;
import java.io.DataOutputStream;
import java.io.IOException;
/*
@ -39,39 +43,57 @@ import java.io.File;
* <p/>
* Tests for the GLFRecord class
*/
public class GLFRecordTest {
public class GLFWriterTest extends BaseTest {
/** some made up values that we use to generate the GLF */
private final String header = "header";
private final String referenceSequenceName = "refSeq";
private final String header = "";
private final String referenceSequenceName = "chr1";
private final int refLength = 1000;
File writeTo = new File("testGLF.glf");
private GLFRecord rec;
private GLFWriter rec;
@Before
public void before() {
rec = new GLFRecord(header, referenceSequenceName, refLength);
}
/**
* make a fake snp
*
* @param genotype the genotype, 0-15 (AA, AT, AA, ... GG)
*/
private void addFakeSNP(int genotype, int location) {
private void addFakeSNP( int genotype, int location ) {
LikelihoodObject obj = new LikelihoodObject();
obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype],0.5f);
rec.addSNPCall(location,10,10,obj);
obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype], 128);
int ran = (int) Math.floor(Math.random() * 4.0);
char let = 'A';
switch (ran) {
case 0:
let = 'T';
break;
case 1:
let = 'C';
break;
case 2:
let = 'G';
break;
}
try {
rec.addPointCall(let, location, 10, (short) 10, obj);
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
}
@Test
public void basicWrite() {
File writeTo = new File("testGLF.glf");
BinaryCodec codec = new BinaryCodec(writeTo, true);
rec = new GLFWriter(header, referenceSequenceName, refLength, writeTo);
for (int x = 0; x < 100; x++) {
addFakeSNP(0,x);
addFakeSNP((int) Math.round(Math.random() * 9), 1);
}
rec.close();
}
}

View File

@ -90,7 +90,7 @@ public class LikelihoodObjectTest extends BaseTest {
public void testSetLikelihood() {
mLO = new LikelihoodObject();
for (LikelihoodObject.GENOTYPE t : LikelihoodObject.GENOTYPE.values()) {
mLO.setLikelihood(t,0.5f);
mLO.setLikelihood(t,128);
}
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);