iterative changes to GLF files; also a test of checking-in over sshfs.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@850 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-05-28 20:24:30 +00:00
parent 5e8c08ee63
commit b43deda6c9
4 changed files with 135 additions and 31 deletions

View File

@ -40,14 +40,43 @@ public class GLFRecord {
private String referenceSequenceName = ""; private String referenceSequenceName = "";
private long referenceSequenceLength = 0; private long referenceSequenceLength = 0;
private int currentOffset = -1;
/** /**
* The public constructor for creating a GLF object * The public constructor for creating a GLF object
* @param headerText the header text (currently unclear what the contents are) * @param headerText the header text (currently unclear what the contents are)
* @param referenceSequenceName the reference sequence name * @param referenceSequenceName the reference sequence name
*/ */
public GLFRecord(String headerText, String referenceSequenceName) { public GLFRecord(String headerText, String referenceSequenceName, int referenceSequenceLength) {
this.headerText = headerText; this.headerText = headerText;
this.referenceSequenceName = referenceSequenceName; this.referenceSequenceName = referenceSequenceName;
this.referenceSequenceLength = referenceSequenceLength;
}
public void addSNPCall(int genomicLoc, long read_depth, int rmsMapQ, LikelihoodObject lhValues) {
if (currentOffset >= genomicLoc) {
throw new IllegalArgumentException("The location supplied is less then a previous location");
}
// make sure the read depth isn't too large
if (read_depth < 0 || read_depth > 0x00FFFFFF) {
throw new IllegalArgumentException("The read depth is too large; must lie in the range 0 to 0x00ffffff");
}
// check that the rmsSquare is greater then 0, and will fit in a uint8
if (rmsMapQ > 0x000000FF || rmsMapQ < 0) {
throw new IllegalArgumentException("rms of mapping quality is too large; must lie in the range 0 to 0x000000ff");
}
if (lhValues == null) {
throw new IllegalArgumentException("likelihood object cannot be null");
}
SinglePointCall call = new SinglePointCall(genomicLoc - currentOffset,
read_depth,
rmsMapQ,
lhValues.toByteArray(),
(short)lhValues.getMinimumValue());
} }
/** /**
@ -84,22 +113,35 @@ interface RecordType {
} }
}; };
/**
* write the record out to a binary codec
* @param out
*/
public void write(BinaryCodec out); public void write(BinaryCodec out);
/**
* get the record type
* @return the record type
*/
public RECORD_TYPE getRecordType(); public RECORD_TYPE getRecordType();
/**
*
* @return
*/
public int getByteSize();
} }
// the second record type // the second record type
class VariableLengthGenotype implements RecordType { class VariableLengthCall implements RecordType {
public int offset; public int offset = 0;
public int min_depth; public int min_depth = 0;
public byte rmsMapQ; public byte rmsMapQ = 0;
public byte lkHom1; public byte lkHom1 = 0;
public byte lkHom2; public byte lkHom2 = 0;
public byte lkHet; public byte lkHet = 0;
public short indelLen1; public short indelLen1 = 0;
public short indelLen2; public short indelLen2 = 0;
public final byte indelSeq1[]; public final byte indelSeq1[];
public final byte indelSeq2[]; public final byte indelSeq2[];
@ -111,7 +153,7 @@ class VariableLengthGenotype implements RecordType {
* *
* @param in the binary codec to get data from * @param in the binary codec to get data from
*/ */
VariableLengthGenotype(BinaryCodec in) { VariableLengthCall(BinaryCodec in) {
offset = in.readInt(); offset = in.readInt();
min_depth = in.readInt(); min_depth = in.readInt();
rmsMapQ = in.readByte(); rmsMapQ = in.readByte();
@ -146,19 +188,25 @@ class VariableLengthGenotype implements RecordType {
public RECORD_TYPE getRecordType() { public RECORD_TYPE getRecordType() {
return RECORD_TYPE.VARIABLE; return RECORD_TYPE.VARIABLE;
} }
/** @return */
public int getByteSize() {
return size;
}
} }
// the first record type // the first record type
class SinglePointGenotype implements RecordType { class SinglePointCall implements RecordType {
// our likelyhood array size // our likelyhood array size
public static final int LIKELYHOOD_SIZE = 10; public static final int LIKELYHOOD_SIZE = 10;
// class fields // class fields
public int offset; public int offset = 0;
public int min_depth; public Long min_depth = 0L;
public byte rmsMapQ; public int rmsMapQ = 0;
public final byte lk[] = new byte[LIKELYHOOD_SIZE]; public final short lk[] = new short[LIKELYHOOD_SIZE];
public short minimumLikelihood = 0;
// our size, we're immutable (the size at least) // our size, we're immutable (the size at least)
private final int byteSize; // in bytes private final int byteSize; // in bytes
@ -171,13 +219,14 @@ class SinglePointGenotype implements RecordType {
* @param rmsMapQ * @param rmsMapQ
* @param lk * @param lk
*/ */
SinglePointGenotype(int offset, int min_depth, byte rmsMapQ, byte[] lk) { SinglePointCall(int offset, long min_depth, int rmsMapQ, short[] lk, short minimumLikelihood) {
if (lk.length != LIKELYHOOD_SIZE) { if (lk.length != LIKELYHOOD_SIZE) {
throw new IllegalArgumentException("SinglePointGenotype: passed in likelyhood array size != LIKELYHOOD_SIZE"); throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
} }
this.offset = offset; this.offset = offset;
this.min_depth = min_depth; this.min_depth = min_depth;
this.rmsMapQ = rmsMapQ; this.rmsMapQ = rmsMapQ;
this.minimumLikelihood = minimumLikelihood;
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE); System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
byteSize = 9 + lk.length; byteSize = 9 + lk.length;
} }
@ -187,11 +236,13 @@ class SinglePointGenotype implements RecordType {
* *
* @param in the binary codec to get data from * @param in the binary codec to get data from
*/ */
SinglePointGenotype(BinaryCodec in) { SinglePointCall(BinaryCodec in) {
offset = in.readInt(); offset = in.readInt();
min_depth = in.readInt(); min_depth = Long.valueOf(in.readInt());
rmsMapQ = in.readByte(); rmsMapQ = in.readByte();
in.readBytes(lk); for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
lk[x] = in.readUByte();
}
byteSize = 9 + lk.length; byteSize = 9 + lk.length;
} }
@ -202,13 +253,20 @@ class SinglePointGenotype implements RecordType {
*/ */
public void write(BinaryCodec out) { public void write(BinaryCodec out) {
out.writeInt(offset); out.writeInt(offset);
out.writeInt(min_depth); out.writeInt(min_depth.intValue());
out.writeByte(rmsMapQ); out.writeByte(rmsMapQ);
out.writeBytes(lk); for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
out.writeUByte(lk[x]);
}
} }
public RECORD_TYPE getRecordType() { public RECORD_TYPE getRecordType() {
return RECORD_TYPE.SINGLE; return RECORD_TYPE.SINGLE;
} }
/** @return */
public int getByteSize() {
return byteSize;
}
} }

View File

@ -84,14 +84,28 @@ public class LikelihoodObject {
} }
/** /**
* return a byte array representation of the likelihood object, in GLFv3 specified order * find the minimum likelihood value stored in the set
* @return
*/
public int getMinimumValue() {
int minimum = Integer.MAX_VALUE;
for (int i : likelihood.values()) {
if (i < minimum) { minimum = i;}
}
return minimum;
}
/**
* return a byte array representation of the likelihood object, in GLFv3 specified order.
* The return type is short[] instead of byte[], since signed bytes only store -127 to 127,
* not the 255 range we need.
* @return a byte array of the genotype values * @return a byte array of the genotype values
*/ */
public int[] toByteArray() { public short[] toByteArray() {
int ret[] = new int[GENOTYPE.values().length]; short ret[] = new short[GENOTYPE.values().length];
int index = 0; int index = 0;
for (GENOTYPE type : GENOTYPE.values()) { for (GENOTYPE type : GENOTYPE.values()) {
ret[index] = likelihood.get(type); ret[index] = (short)likelihood.get(type).intValue();
++index; ++index;
} }
return ret; return ret;

View File

@ -1,6 +1,10 @@
package org.broadinstitute.sting.utils.glf; package org.broadinstitute.sting.utils.glf;
import org.junit.Test; import org.junit.Test;
import org.junit.Before;
import net.sf.samtools.util.BinaryCodec;
import java.io.File;
/* /*
@ -37,9 +41,37 @@ import org.junit.Test;
*/ */
public class GLFRecordTest { public class GLFRecordTest {
/** some made up values that we use to generate the GLF */
private final String header = "header";
private final String referenceSequenceName = "refSeq";
private final int refLength = 1000;
private GLFRecord rec;
@Before
public void before() {
rec = new GLFRecord(header, referenceSequenceName, refLength);
}
/**
* make a fake snp
* @param genotype the genotype, 0-15 (AA, AT, AA, ... GG)
*/
private void addFakeSNP(int genotype, int location) {
LikelihoodObject obj = new LikelihoodObject();
obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype],0.5f);
rec.addSNPCall(location,10,10,obj);
}
@Test @Test
public void basicWrite() { public void basicWrite() {
File writeTo = new File("testGLF.glf");
BinaryCodec codec = new BinaryCodec(writeTo, true);
for (int x = 0; x < 100; x++) {
addFakeSNP(0,x);
}
} }
} }

View File

@ -80,7 +80,7 @@ public class LikelihoodObjectTest extends BaseTest {
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length); assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
int index = 0; int index = 0;
int[] ret = mLO.toByteArray(); short[] ret = mLO.toByteArray();
for (index = 0; index < ret.length; index++) { for (index = 0; index < ret.length; index++) {
assertTrue(ray[index] == ret[index]); assertTrue(ray[index] == ret[index]);
} }
@ -95,7 +95,7 @@ public class LikelihoodObjectTest extends BaseTest {
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length); assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
int index = 0; int index = 0;
int[] ret = mLO.toByteArray(); short[] ret = mLO.toByteArray();
for (index = 0; index < ret.length; index++) { for (index = 0; index < ret.length; index++) {
assertTrue(ret[index] == 128); assertTrue(ret[index] == 128);
} }