added some more documentation to the GLF writer and it's supporting classes, and some other fixes
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@875 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ae2eddec2d
commit
82aa0533b8
|
|
@ -184,7 +184,7 @@ abstract class GLFRecord {
|
||||||
*
|
*
|
||||||
* @param out the binary codec to write to
|
* @param out the binary codec to write to
|
||||||
*/
|
*/
|
||||||
public void write( BinaryCodec out ) {
|
void write( BinaryCodec out ) {
|
||||||
out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) ));
|
out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) ));
|
||||||
out.writeUInt(( (Long) offset ).intValue());
|
out.writeUInt(( (Long) offset ).intValue());
|
||||||
out.writeUInt((new Long(readDepth).intValue()));
|
out.writeUInt((new Long(readDepth).intValue()));
|
||||||
|
|
@ -203,6 +203,8 @@ abstract class GLFRecord {
|
||||||
*
|
*
|
||||||
* @return the size of this record type, in bytes
|
* @return the size of this record type, in bytes
|
||||||
*/
|
*/
|
||||||
public abstract int getByteSize();
|
public int getByteSize() {
|
||||||
|
return 10; // the record type field (1), offset (4), the min depth field (4), and the rms mapping (1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,12 +28,20 @@ import java.io.DataOutputStream;
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
* @version 1.0
|
* @version 1.0
|
||||||
|
*
|
||||||
|
* This class writes GLF files. You can either specify GLFRecords, or programaticly generate
|
||||||
|
* single and variable length genotype calls using the provided functions. When you've finished
|
||||||
|
* generating GLF records, make sure you close the file.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
public class GLFWriter {
|
public class GLFWriter {
|
||||||
// our output codec
|
// our output codec
|
||||||
private final BinaryCodec outputBinaryCodec;
|
private final BinaryCodec outputBinaryCodec;
|
||||||
|
|
||||||
|
// the glf magic number, which identifies a properly formatted GLF file
|
||||||
public static final short[] glfMagic = {'G', 'L', 'F', '\3'};
|
public static final short[] glfMagic = {'G', 'L', 'F', '\3'};
|
||||||
|
|
||||||
|
// our header text, reference sequence name (i.e. chr1), and it's length
|
||||||
private String headerText = "";
|
private String headerText = "";
|
||||||
private String referenceSequenceName = "";
|
private String referenceSequenceName = "";
|
||||||
private long referenceSequenceLength = 0;
|
private long referenceSequenceLength = 0;
|
||||||
|
|
@ -42,7 +50,7 @@ public class GLFWriter {
|
||||||
* The public constructor for creating a GLF object
|
* The public constructor for creating a GLF object
|
||||||
*
|
*
|
||||||
* @param headerText the header text (currently unclear what the contents are)
|
* @param headerText the header text (currently unclear what the contents are)
|
||||||
* @param referenceSequenceName the reference sequence name
|
* @param referenceSequenceName the reference sequence name, i.e. "chr1", "chr2", etc
|
||||||
*/
|
*/
|
||||||
public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) {
|
public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) {
|
||||||
this.headerText = headerText;
|
this.headerText = headerText;
|
||||||
|
|
@ -71,7 +79,6 @@ public class GLFWriter {
|
||||||
SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
|
SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
|
||||||
readDepth,
|
readDepth,
|
||||||
rmsMapQ,
|
rmsMapQ,
|
||||||
lhValues.toByteArray(),
|
|
||||||
lhValues);
|
lhValues);
|
||||||
call.write(this.outputBinaryCodec);
|
call.write(this.outputBinaryCodec);
|
||||||
}
|
}
|
||||||
|
|
@ -84,9 +91,9 @@ public class GLFWriter {
|
||||||
* @param readDepth the read depth at the specified postion
|
* @param readDepth the read depth at the specified postion
|
||||||
* @param rmsMapQ the root mean square of the mapping quality
|
* @param rmsMapQ the root mean square of the mapping quality
|
||||||
* @param minimumLikelihood the minimum likelihood value
|
* @param minimumLikelihood the minimum likelihood value
|
||||||
* @param homozygProb1 the probability of the first homozygous indel allele
|
* @param homozygProb1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
|
||||||
* @param homozygProb2 the probability of the second homozygous indel allele
|
* @param homozygProb2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
|
||||||
* @param heterozygProb the probability of the heterozygote
|
* @param heterozygProb the negitive log likelihood of the heterozygote, from 0 to 255
|
||||||
* @param indelLength1 the length of the first indel allele
|
* @param indelLength1 the length of the first indel allele
|
||||||
* @param indelLength2 the length of the second indel allele
|
* @param indelLength2 the length of the second indel allele
|
||||||
* @param indelSeq1 the sequence for the first indel allele
|
* @param indelSeq1 the sequence for the first indel allele
|
||||||
|
|
@ -140,7 +147,10 @@ public class GLFWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Write out the header information for the GLF file
|
* Write out the header information for the GLF file. The header contains
|
||||||
|
* the magic number, the length of the header text, the text itself, the reference
|
||||||
|
* sequence (null terminated) preceeded by it's length, and the the genomic
|
||||||
|
* length of the reference sequence.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private void writeHeader() {
|
private void writeHeader() {
|
||||||
|
|
@ -157,7 +167,8 @@ public class GLFWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* close the file
|
* close the file. You must close the file to ensure any remaining data gets
|
||||||
|
* written out.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public void close() {
|
public void close() {
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,9 @@ public class LikelihoodObject {
|
||||||
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
|
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// how many genotypes we're storing
|
||||||
|
public static final int genoTypeCount = GENOTYPE.values().length;
|
||||||
|
|
||||||
// the associated negitive log likelihoods for each genotype
|
// the associated negitive log likelihoods for each genotype
|
||||||
final protected HashMap<GENOTYPE, Integer> likelihood = new HashMap<GENOTYPE, Integer>();
|
final protected HashMap<GENOTYPE, Integer> likelihood = new HashMap<GENOTYPE, Integer>();
|
||||||
|
|
||||||
|
|
@ -57,7 +60,7 @@ public class LikelihoodObject {
|
||||||
}
|
}
|
||||||
|
|
||||||
// since the default case it to start with all infinity, we can choose any random base
|
// since the default case it to start with all infinity, we can choose any random base
|
||||||
GENOTYPE greatest = GENOTYPE.AA;
|
protected GENOTYPE greatest = GENOTYPE.AA;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -69,11 +72,16 @@ public class LikelihoodObject {
|
||||||
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
|
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
|
||||||
}
|
}
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
int lowestScore = 256;
|
||||||
for (GENOTYPE type : GENOTYPE.values()) {
|
for (GENOTYPE type : GENOTYPE.values()) {
|
||||||
if (values[index] < 0 || values[index] > 255) {
|
if (values[index] < 0 || values[index] > 255) {
|
||||||
throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]);
|
throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]);
|
||||||
}
|
}
|
||||||
likelihood.put(type, values[index]);
|
likelihood.put(type, values[index]);
|
||||||
|
if (values[index] < lowestScore) {
|
||||||
|
lowestScore = values[index];
|
||||||
|
greatest = type;
|
||||||
|
}
|
||||||
++index;
|
++index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,37 +29,33 @@ import net.sf.samtools.util.BinaryCodec;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* @author aaron
|
||||||
* @author aaron
|
* <p/>
|
||||||
*
|
* Class SinglePointCall
|
||||||
* Class SinglePointCall
|
* <p/>
|
||||||
*
|
* This class represents a single point geneotype call in GLF vernacular
|
||||||
* This class represents a single point geneotype call in GLF vernacular
|
*/
|
||||||
**/
|
|
||||||
class SinglePointCall extends GLFRecord {
|
class SinglePointCall extends GLFRecord {
|
||||||
|
|
||||||
// our likelyhood array size
|
|
||||||
public static final int LIKELYHOOD_SIZE = 10;
|
|
||||||
|
|
||||||
// our record type
|
// our record type
|
||||||
private final RECORD_TYPE type = RECORD_TYPE.SINGLE;
|
private final RECORD_TYPE type = RECORD_TYPE.SINGLE;
|
||||||
|
|
||||||
// our array of likelihoods
|
// our likelihood object
|
||||||
public final short lk[] = new short[LIKELYHOOD_SIZE];
|
private LikelihoodObject likelihoods;
|
||||||
|
|
||||||
// our size, we're immutable (the size at least). In bytes.
|
/**
|
||||||
private final int byteSize;
|
* create a single
|
||||||
|
*
|
||||||
|
* @param refBase the reference base, as a char
|
||||||
|
* @param offset the location, as an offset from the previous glf record
|
||||||
|
* @param readDepth the read depth at the specified postion
|
||||||
|
* @param rmsMapQ the root mean square of the mapping quality
|
||||||
|
* @param lhValues the LikelihoodObject, representing the genotype likelyhoods
|
||||||
|
*/
|
||||||
|
SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, LikelihoodObject lhValues ) {
|
||||||
|
super(refBase, offset, (short) lhValues.getMinimumValue(), readDepth, rmsMapQ);
|
||||||
|
|
||||||
|
likelihoods = lhValues;
|
||||||
SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, short[] lk, LikelihoodObject minimumLikelihood ) {
|
|
||||||
super(refBase,offset,(short)minimumLikelihood.getMinimumValue(),readDepth,rmsMapQ);
|
|
||||||
|
|
||||||
if (lk.length != LIKELYHOOD_SIZE) {
|
|
||||||
throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
|
|
||||||
}
|
|
||||||
|
|
||||||
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
|
|
||||||
byteSize = 9 + lk.length;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -68,15 +64,17 @@ class SinglePointCall extends GLFRecord {
|
||||||
*
|
*
|
||||||
* @param out
|
* @param out
|
||||||
*/
|
*/
|
||||||
public void write( BinaryCodec out ) {
|
void write( BinaryCodec out ) {
|
||||||
super.write(out);
|
super.write(out);
|
||||||
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
|
short array[] = likelihoods.toByteArray();
|
||||||
out.writeUByte(lk[x]);
|
for (int x = 0; x < array.length; x++) {
|
||||||
|
out.writeUByte(array[x]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return the record type we represent, in this case SINGLE
|
* return the record type we represent, in this case SINGLE
|
||||||
|
*
|
||||||
* @return RECORD_TYPE.SINGLE
|
* @return RECORD_TYPE.SINGLE
|
||||||
*/
|
*/
|
||||||
public RECORD_TYPE getRecordType() {
|
public RECORD_TYPE getRecordType() {
|
||||||
|
|
@ -85,10 +83,11 @@ class SinglePointCall extends GLFRecord {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return our size in bytes
|
* return our size in bytes
|
||||||
|
*
|
||||||
* @return number of bytes we represent
|
* @return number of bytes we represent
|
||||||
*/
|
*/
|
||||||
public int getByteSize() {
|
public int getByteSize() {
|
||||||
return byteSize;
|
return likelihoods.genoTypeCount + super.getByteSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,12 +30,12 @@ import net.sf.samtools.util.BinaryCodec;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
* <p/>
|
* <p/>
|
||||||
* Class VariableLengthCall
|
* Class VariableLengthCall
|
||||||
* <p/>
|
* <p/>
|
||||||
* This class represents variable length genotype calls in the GLF format.
|
* This class represents variable length genotype calls in the GLF format.
|
||||||
* Currently a lot of parameters need to be provided, but we may be able to thin
|
* Currently a lot of parameters need to be provided, but we may be able to thin
|
||||||
* those down as we understand what we have to specify and what we can infer.
|
* those down as we understand what we have to specify and what we can infer.
|
||||||
*/
|
*/
|
||||||
class VariableLengthCall extends GLFRecord {
|
class VariableLengthCall extends GLFRecord {
|
||||||
// our fields, corresponding to the glf spec
|
// our fields, corresponding to the glf spec
|
||||||
|
|
@ -51,7 +51,22 @@ class VariableLengthCall extends GLFRecord {
|
||||||
private final int size;
|
private final int size;
|
||||||
|
|
||||||
|
|
||||||
/** the default constructor */
|
/**
|
||||||
|
* the default constructor
|
||||||
|
*
|
||||||
|
* @param refBase the reference base
|
||||||
|
* @param offset the location, as an offset from the previous glf record
|
||||||
|
* @param readDepth the read depth at the specified postion
|
||||||
|
* @param rmsMapQ the root mean square of the mapping quality
|
||||||
|
* @param minimumLikelihood the minimum likelihood value
|
||||||
|
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
|
||||||
|
* @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
|
||||||
|
* @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
|
||||||
|
* @param indelLen1 the length of the first indel allele
|
||||||
|
* @param indelLen2 the length of the second indel allele
|
||||||
|
* @param indelSeq1 the sequence for the first indel allele
|
||||||
|
* @param indelSeq2 the sequence for the second indel allele
|
||||||
|
*/
|
||||||
VariableLengthCall( char refBase,
|
VariableLengthCall( char refBase,
|
||||||
long offset,
|
long offset,
|
||||||
int readDepth,
|
int readDepth,
|
||||||
|
|
@ -81,7 +96,7 @@ class VariableLengthCall extends GLFRecord {
|
||||||
*
|
*
|
||||||
* @param out the binary codec to write to
|
* @param out the binary codec to write to
|
||||||
*/
|
*/
|
||||||
public void write( BinaryCodec out ) {
|
void write( BinaryCodec out ) {
|
||||||
super.write(out);
|
super.write(out);
|
||||||
out.writeByte(lkHom1);
|
out.writeByte(lkHom1);
|
||||||
out.writeByte(lkHom2);
|
out.writeByte(lkHom2);
|
||||||
|
|
@ -101,8 +116,8 @@ class VariableLengthCall extends GLFRecord {
|
||||||
return RECORD_TYPE.VARIABLE;
|
return RECORD_TYPE.VARIABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @return */
|
/** @return the size of the record, which is the size of our fields plus the generic records fields */
|
||||||
public int getByteSize() {
|
public int getByteSize() {
|
||||||
return size;
|
return size + super.getByteSize();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,34 @@ public class LikelihoodObjectTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDefaultArrayValues() {
|
||||||
|
mLO = new LikelihoodObject();
|
||||||
|
short[] ret = mLO.toByteArray();
|
||||||
|
for (int index = 0; index < ret.length; index++) {
|
||||||
|
assertTrue(ret[index] == 255);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetMinimum() {
|
||||||
|
int[] ray = new int[10];
|
||||||
|
for (int x = 0; x < 10; x++) {
|
||||||
|
ray[x] = ( 240 );
|
||||||
|
}
|
||||||
|
ray [5] = 0;
|
||||||
|
mLO = new LikelihoodObject(ray);
|
||||||
|
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
||||||
|
short smallest = (short)mLO.getMinimumValue();
|
||||||
|
assertTrue(smallest == 0);
|
||||||
|
int index = 0;
|
||||||
|
short[] ret = mLO.toByteArray();
|
||||||
|
for (index = 0; index < ret.length; index++) {
|
||||||
|
assertTrue(smallest <= ret[index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSetLikelihood() {
|
public void testSetLikelihood() {
|
||||||
mLO = new LikelihoodObject();
|
mLO = new LikelihoodObject();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue