iterative changes to GLF files; also a test of checking-in over sshfs.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@850 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-05-28 20:24:30 +00:00
parent 5e8c08ee63
commit b43deda6c9
4 changed files with 135 additions and 31 deletions

View File

@ -40,14 +40,43 @@ public class GLFRecord {
private String referenceSequenceName = "";
private long referenceSequenceLength = 0;
private int currentOffset = -1;
/**
* The public constructor for creating a GLF object
* @param headerText the header text (currently unclear what the contents are)
* @param referenceSequenceName the reference sequence name
*/
public GLFRecord(String headerText, String referenceSequenceName) {
public GLFRecord(String headerText, String referenceSequenceName, int referenceSequenceLength) {
this.headerText = headerText;
this.referenceSequenceName = referenceSequenceName;
this.referenceSequenceLength = referenceSequenceLength;
}
public void addSNPCall(int genomicLoc, long read_depth, int rmsMapQ, LikelihoodObject lhValues) {
if (currentOffset >= genomicLoc) {
throw new IllegalArgumentException("The location supplied is less then a previous location");
}
// make sure the read depth isn't too large
if (read_depth < 0 || read_depth > 0x00FFFFFF) {
throw new IllegalArgumentException("The read depth is too large; must lie in the range 0 to 0x00ffffff");
}
// check that the rmsSquare is greater then 0, and will fit in a uint8
if (rmsMapQ > 0x000000FF || rmsMapQ < 0) {
throw new IllegalArgumentException("rms of mapping quality is too large; must lie in the range 0 to 0x000000ff");
}
if (lhValues == null) {
throw new IllegalArgumentException("likelihood object cannot be null");
}
SinglePointCall call = new SinglePointCall(genomicLoc - currentOffset,
read_depth,
rmsMapQ,
lhValues.toByteArray(),
(short)lhValues.getMinimumValue());
}
/**
@ -84,22 +113,35 @@ interface RecordType {
}
};
/**
* write the record out to a binary codec
* @param out
*/
public void write(BinaryCodec out);
/**
* get the record type
* @return the record type
*/
public RECORD_TYPE getRecordType();
/**
*
* @return
*/
public int getByteSize();
}
// the second record type
class VariableLengthGenotype implements RecordType {
public int offset;
public int min_depth;
public byte rmsMapQ;
public byte lkHom1;
public byte lkHom2;
public byte lkHet;
public short indelLen1;
public short indelLen2;
class VariableLengthCall implements RecordType {
public int offset = 0;
public int min_depth = 0;
public byte rmsMapQ = 0;
public byte lkHom1 = 0;
public byte lkHom2 = 0;
public byte lkHet = 0;
public short indelLen1 = 0;
public short indelLen2 = 0;
public final byte indelSeq1[];
public final byte indelSeq2[];
@ -111,7 +153,7 @@ class VariableLengthGenotype implements RecordType {
*
* @param in the binary codec to get data from
*/
VariableLengthGenotype(BinaryCodec in) {
VariableLengthCall(BinaryCodec in) {
offset = in.readInt();
min_depth = in.readInt();
rmsMapQ = in.readByte();
@ -146,19 +188,25 @@ class VariableLengthGenotype implements RecordType {
public RECORD_TYPE getRecordType() {
return RECORD_TYPE.VARIABLE;
}
/** @return */
public int getByteSize() {
return size;
}
}
// the first record type
class SinglePointGenotype implements RecordType {
class SinglePointCall implements RecordType {
// our likelyhood array size
public static final int LIKELYHOOD_SIZE = 10;
// class fields
public int offset;
public int min_depth;
public byte rmsMapQ;
public final byte lk[] = new byte[LIKELYHOOD_SIZE];
public int offset = 0;
public Long min_depth = 0L;
public int rmsMapQ = 0;
public final short lk[] = new short[LIKELYHOOD_SIZE];
public short minimumLikelihood = 0;
// our size, we're immutable (the size at least)
private final int byteSize; // in bytes
@ -171,13 +219,14 @@ class SinglePointGenotype implements RecordType {
* @param rmsMapQ
* @param lk
*/
SinglePointGenotype(int offset, int min_depth, byte rmsMapQ, byte[] lk) {
SinglePointCall(int offset, long min_depth, int rmsMapQ, short[] lk, short minimumLikelihood) {
if (lk.length != LIKELYHOOD_SIZE) {
throw new IllegalArgumentException("SinglePointGenotype: passed in likelyhood array size != LIKELYHOOD_SIZE");
throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
}
this.offset = offset;
this.min_depth = min_depth;
this.rmsMapQ = rmsMapQ;
this.minimumLikelihood = minimumLikelihood;
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
byteSize = 9 + lk.length;
}
@ -187,11 +236,13 @@ class SinglePointGenotype implements RecordType {
*
* @param in the binary codec to get data from
*/
SinglePointGenotype(BinaryCodec in) {
SinglePointCall(BinaryCodec in) {
offset = in.readInt();
min_depth = in.readInt();
min_depth = Long.valueOf(in.readInt());
rmsMapQ = in.readByte();
in.readBytes(lk);
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
lk[x] = in.readUByte();
}
byteSize = 9 + lk.length;
}
@ -202,13 +253,20 @@ class SinglePointGenotype implements RecordType {
*/
public void write(BinaryCodec out) {
out.writeInt(offset);
out.writeInt(min_depth);
out.writeInt(min_depth.intValue());
out.writeByte(rmsMapQ);
out.writeBytes(lk);
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
out.writeUByte(lk[x]);
}
}
public RECORD_TYPE getRecordType() {
return RECORD_TYPE.SINGLE;
}
/** @return */
public int getByteSize() {
return byteSize;
}
}

View File

@ -84,14 +84,28 @@ public class LikelihoodObject {
}
/**
* return a byte array representation of the likelihood object, in GLFv3 specified order
* find the minimum likelihood value stored in the set
* @return
*/
public int getMinimumValue() {
int minimum = Integer.MAX_VALUE;
for (int i : likelihood.values()) {
if (i < minimum) { minimum = i;}
}
return minimum;
}
/**
* return a byte array representation of the likelihood object, in GLFv3 specified order.
* The return type is short[] instead of byte[], since signed bytes only store -127 to 127,
* not the 255 range we need.
* @return a byte array of the genotype values
*/
public int[] toByteArray() {
int ret[] = new int[GENOTYPE.values().length];
public short[] toByteArray() {
short ret[] = new short[GENOTYPE.values().length];
int index = 0;
for (GENOTYPE type : GENOTYPE.values()) {
ret[index] = likelihood.get(type);
ret[index] = (short)likelihood.get(type).intValue();
++index;
}
return ret;

View File

@ -1,6 +1,10 @@
package org.broadinstitute.sting.utils.glf;
import org.junit.Test;
import org.junit.Before;
import net.sf.samtools.util.BinaryCodec;
import java.io.File;
/*
@ -37,9 +41,37 @@ import org.junit.Test;
*/
public class GLFRecordTest {
@Test
public void basicWrite() {
/** some made up values that we use to generate the GLF */
private final String header = "header";
private final String referenceSequenceName = "refSeq";
private final int refLength = 1000;
private GLFRecord rec;
@Before
public void before() {
rec = new GLFRecord(header, referenceSequenceName, refLength);
}
/**
* make a fake snp
* @param genotype the genotype, 0-15 (AA, AT, AA, ... GG)
*/
private void addFakeSNP(int genotype, int location) {
LikelihoodObject obj = new LikelihoodObject();
obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype],0.5f);
rec.addSNPCall(location,10,10,obj);
}
@Test
public void basicWrite() {
File writeTo = new File("testGLF.glf");
BinaryCodec codec = new BinaryCodec(writeTo, true);
for (int x = 0; x < 100; x++) {
addFakeSNP(0,x);
}
}
}

View File

@ -80,7 +80,7 @@ public class LikelihoodObjectTest extends BaseTest {
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
int index = 0;
int[] ret = mLO.toByteArray();
short[] ret = mLO.toByteArray();
for (index = 0; index < ret.length; index++) {
assertTrue(ray[index] == ret[index]);
}
@ -95,7 +95,7 @@ public class LikelihoodObjectTest extends BaseTest {
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
int index = 0;
int[] ret = mLO.toByteArray();
short[] ret = mLO.toByteArray();
for (index = 0; index < ret.length; index++) {
assertTrue(ret[index] == 128);
}