iterative changes to GLF files; also a test of checking-in over sshfs.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@850 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5e8c08ee63
commit
b43deda6c9
|
|
@ -40,14 +40,43 @@ public class GLFRecord {
|
||||||
private String referenceSequenceName = "";
|
private String referenceSequenceName = "";
|
||||||
private long referenceSequenceLength = 0;
|
private long referenceSequenceLength = 0;
|
||||||
|
|
||||||
|
private int currentOffset = -1;
|
||||||
/**
|
/**
|
||||||
* The public constructor for creating a GLF object
|
* The public constructor for creating a GLF object
|
||||||
* @param headerText the header text (currently unclear what the contents are)
|
* @param headerText the header text (currently unclear what the contents are)
|
||||||
* @param referenceSequenceName the reference sequence name
|
* @param referenceSequenceName the reference sequence name
|
||||||
*/
|
*/
|
||||||
public GLFRecord(String headerText, String referenceSequenceName) {
|
public GLFRecord(String headerText, String referenceSequenceName, int referenceSequenceLength) {
|
||||||
this.headerText = headerText;
|
this.headerText = headerText;
|
||||||
this.referenceSequenceName = referenceSequenceName;
|
this.referenceSequenceName = referenceSequenceName;
|
||||||
|
this.referenceSequenceLength = referenceSequenceLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addSNPCall(int genomicLoc, long read_depth, int rmsMapQ, LikelihoodObject lhValues) {
|
||||||
|
if (currentOffset >= genomicLoc) {
|
||||||
|
throw new IllegalArgumentException("The location supplied is less then a previous location");
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure the read depth isn't too large
|
||||||
|
if (read_depth < 0 || read_depth > 0x00FFFFFF) {
|
||||||
|
throw new IllegalArgumentException("The read depth is too large; must lie in the range 0 to 0x00ffffff");
|
||||||
|
}
|
||||||
|
|
||||||
|
// check that the rmsSquare is greater then 0, and will fit in a uint8
|
||||||
|
if (rmsMapQ > 0x000000FF || rmsMapQ < 0) {
|
||||||
|
throw new IllegalArgumentException("rms of mapping quality is too large; must lie in the range 0 to 0x000000ff");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lhValues == null) {
|
||||||
|
throw new IllegalArgumentException("likelihood object cannot be null");
|
||||||
|
}
|
||||||
|
|
||||||
|
SinglePointCall call = new SinglePointCall(genomicLoc - currentOffset,
|
||||||
|
read_depth,
|
||||||
|
rmsMapQ,
|
||||||
|
lhValues.toByteArray(),
|
||||||
|
(short)lhValues.getMinimumValue());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -84,22 +113,35 @@ interface RecordType {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* write the record out to a binary codec
|
||||||
|
* @param out
|
||||||
|
*/
|
||||||
public void write(BinaryCodec out);
|
public void write(BinaryCodec out);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the record type
|
||||||
|
* @return the record type
|
||||||
|
*/
|
||||||
public RECORD_TYPE getRecordType();
|
public RECORD_TYPE getRecordType();
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int getByteSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
// the second record type
|
// the second record type
|
||||||
class VariableLengthGenotype implements RecordType {
|
class VariableLengthCall implements RecordType {
|
||||||
public int offset;
|
public int offset = 0;
|
||||||
public int min_depth;
|
public int min_depth = 0;
|
||||||
public byte rmsMapQ;
|
public byte rmsMapQ = 0;
|
||||||
public byte lkHom1;
|
public byte lkHom1 = 0;
|
||||||
public byte lkHom2;
|
public byte lkHom2 = 0;
|
||||||
public byte lkHet;
|
public byte lkHet = 0;
|
||||||
public short indelLen1;
|
public short indelLen1 = 0;
|
||||||
public short indelLen2;
|
public short indelLen2 = 0;
|
||||||
public final byte indelSeq1[];
|
public final byte indelSeq1[];
|
||||||
public final byte indelSeq2[];
|
public final byte indelSeq2[];
|
||||||
|
|
||||||
|
|
@ -111,7 +153,7 @@ class VariableLengthGenotype implements RecordType {
|
||||||
*
|
*
|
||||||
* @param in the binary codec to get data from
|
* @param in the binary codec to get data from
|
||||||
*/
|
*/
|
||||||
VariableLengthGenotype(BinaryCodec in) {
|
VariableLengthCall(BinaryCodec in) {
|
||||||
offset = in.readInt();
|
offset = in.readInt();
|
||||||
min_depth = in.readInt();
|
min_depth = in.readInt();
|
||||||
rmsMapQ = in.readByte();
|
rmsMapQ = in.readByte();
|
||||||
|
|
@ -146,19 +188,25 @@ class VariableLengthGenotype implements RecordType {
|
||||||
public RECORD_TYPE getRecordType() {
|
public RECORD_TYPE getRecordType() {
|
||||||
return RECORD_TYPE.VARIABLE;
|
return RECORD_TYPE.VARIABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return */
|
||||||
|
public int getByteSize() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// the first record type
|
// the first record type
|
||||||
class SinglePointGenotype implements RecordType {
|
class SinglePointCall implements RecordType {
|
||||||
// our likelyhood array size
|
// our likelyhood array size
|
||||||
public static final int LIKELYHOOD_SIZE = 10;
|
public static final int LIKELYHOOD_SIZE = 10;
|
||||||
|
|
||||||
// class fields
|
// class fields
|
||||||
public int offset;
|
public int offset = 0;
|
||||||
public int min_depth;
|
public Long min_depth = 0L;
|
||||||
public byte rmsMapQ;
|
public int rmsMapQ = 0;
|
||||||
public final byte lk[] = new byte[LIKELYHOOD_SIZE];
|
public final short lk[] = new short[LIKELYHOOD_SIZE];
|
||||||
|
public short minimumLikelihood = 0;
|
||||||
|
|
||||||
// our size, we're immutable (the size at least)
|
// our size, we're immutable (the size at least)
|
||||||
private final int byteSize; // in bytes
|
private final int byteSize; // in bytes
|
||||||
|
|
@ -171,13 +219,14 @@ class SinglePointGenotype implements RecordType {
|
||||||
* @param rmsMapQ
|
* @param rmsMapQ
|
||||||
* @param lk
|
* @param lk
|
||||||
*/
|
*/
|
||||||
SinglePointGenotype(int offset, int min_depth, byte rmsMapQ, byte[] lk) {
|
SinglePointCall(int offset, long min_depth, int rmsMapQ, short[] lk, short minimumLikelihood) {
|
||||||
if (lk.length != LIKELYHOOD_SIZE) {
|
if (lk.length != LIKELYHOOD_SIZE) {
|
||||||
throw new IllegalArgumentException("SinglePointGenotype: passed in likelyhood array size != LIKELYHOOD_SIZE");
|
throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
|
||||||
}
|
}
|
||||||
this.offset = offset;
|
this.offset = offset;
|
||||||
this.min_depth = min_depth;
|
this.min_depth = min_depth;
|
||||||
this.rmsMapQ = rmsMapQ;
|
this.rmsMapQ = rmsMapQ;
|
||||||
|
this.minimumLikelihood = minimumLikelihood;
|
||||||
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
|
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
|
||||||
byteSize = 9 + lk.length;
|
byteSize = 9 + lk.length;
|
||||||
}
|
}
|
||||||
|
|
@ -187,11 +236,13 @@ class SinglePointGenotype implements RecordType {
|
||||||
*
|
*
|
||||||
* @param in the binary codec to get data from
|
* @param in the binary codec to get data from
|
||||||
*/
|
*/
|
||||||
SinglePointGenotype(BinaryCodec in) {
|
SinglePointCall(BinaryCodec in) {
|
||||||
offset = in.readInt();
|
offset = in.readInt();
|
||||||
min_depth = in.readInt();
|
min_depth = Long.valueOf(in.readInt());
|
||||||
rmsMapQ = in.readByte();
|
rmsMapQ = in.readByte();
|
||||||
in.readBytes(lk);
|
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
|
||||||
|
lk[x] = in.readUByte();
|
||||||
|
}
|
||||||
byteSize = 9 + lk.length;
|
byteSize = 9 + lk.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -202,13 +253,20 @@ class SinglePointGenotype implements RecordType {
|
||||||
*/
|
*/
|
||||||
public void write(BinaryCodec out) {
|
public void write(BinaryCodec out) {
|
||||||
out.writeInt(offset);
|
out.writeInt(offset);
|
||||||
out.writeInt(min_depth);
|
out.writeInt(min_depth.intValue());
|
||||||
out.writeByte(rmsMapQ);
|
out.writeByte(rmsMapQ);
|
||||||
out.writeBytes(lk);
|
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
|
||||||
|
out.writeUByte(lk[x]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public RECORD_TYPE getRecordType() {
|
public RECORD_TYPE getRecordType() {
|
||||||
return RECORD_TYPE.SINGLE;
|
return RECORD_TYPE.SINGLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return */
|
||||||
|
public int getByteSize() {
|
||||||
|
return byteSize;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -84,14 +84,28 @@ public class LikelihoodObject {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return a byte array representation of the likelihood object, in GLFv3 specified order
|
* find the minimum likelihood value stored in the set
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int getMinimumValue() {
|
||||||
|
int minimum = Integer.MAX_VALUE;
|
||||||
|
for (int i : likelihood.values()) {
|
||||||
|
if (i < minimum) { minimum = i;}
|
||||||
|
}
|
||||||
|
return minimum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* return a byte array representation of the likelihood object, in GLFv3 specified order.
|
||||||
|
* The return type is short[] instead of byte[], since signed bytes only store -127 to 127,
|
||||||
|
* not the 255 range we need.
|
||||||
* @return a byte array of the genotype values
|
* @return a byte array of the genotype values
|
||||||
*/
|
*/
|
||||||
public int[] toByteArray() {
|
public short[] toByteArray() {
|
||||||
int ret[] = new int[GENOTYPE.values().length];
|
short ret[] = new short[GENOTYPE.values().length];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (GENOTYPE type : GENOTYPE.values()) {
|
for (GENOTYPE type : GENOTYPE.values()) {
|
||||||
ret[index] = likelihood.get(type);
|
ret[index] = (short)likelihood.get(type).intValue();
|
||||||
++index;
|
++index;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,10 @@
|
||||||
package org.broadinstitute.sting.utils.glf;
|
package org.broadinstitute.sting.utils.glf;
|
||||||
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.junit.Before;
|
||||||
|
import net.sf.samtools.util.BinaryCodec;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -37,9 +41,37 @@ import org.junit.Test;
|
||||||
*/
|
*/
|
||||||
public class GLFRecordTest {
|
public class GLFRecordTest {
|
||||||
|
|
||||||
|
/** some made up values that we use to generate the GLF */
|
||||||
|
private final String header = "header";
|
||||||
|
private final String referenceSequenceName = "refSeq";
|
||||||
|
private final int refLength = 1000;
|
||||||
|
|
||||||
|
private GLFRecord rec;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void before() {
|
||||||
|
rec = new GLFRecord(header, referenceSequenceName, refLength);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* make a fake snp
|
||||||
|
* @param genotype the genotype, 0-15 (AA, AT, AA, ... GG)
|
||||||
|
*/
|
||||||
|
private void addFakeSNP(int genotype, int location) {
|
||||||
|
LikelihoodObject obj = new LikelihoodObject();
|
||||||
|
obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype],0.5f);
|
||||||
|
rec.addSNPCall(location,10,10,obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void basicWrite() {
|
public void basicWrite() {
|
||||||
|
File writeTo = new File("testGLF.glf");
|
||||||
|
BinaryCodec codec = new BinaryCodec(writeTo, true);
|
||||||
|
for (int x = 0; x < 100; x++) {
|
||||||
|
addFakeSNP(0,x);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@ public class LikelihoodObjectTest extends BaseTest {
|
||||||
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
int[] ret = mLO.toByteArray();
|
short[] ret = mLO.toByteArray();
|
||||||
for (index = 0; index < ret.length; index++) {
|
for (index = 0; index < ret.length; index++) {
|
||||||
assertTrue(ray[index] == ret[index]);
|
assertTrue(ray[index] == ret[index]);
|
||||||
}
|
}
|
||||||
|
|
@ -95,7 +95,7 @@ public class LikelihoodObjectTest extends BaseTest {
|
||||||
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
int[] ret = mLO.toByteArray();
|
short[] ret = mLO.toByteArray();
|
||||||
for (index = 0; index < ret.length; index++) {
|
for (index = 0; index < ret.length; index++) {
|
||||||
assertTrue(ret[index] == 128);
|
assertTrue(ret[index] == 128);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue