added some more documentation to the GLF writer and it's supporting classes, and some other fixes

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@875 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-06-02 14:53:58 +00:00
parent ae2eddec2d
commit 82aa0533b8
6 changed files with 111 additions and 48 deletions

View File

@ -184,7 +184,7 @@ abstract class GLFRecord {
* *
* @param out the binary codec to write to * @param out the binary codec to write to
*/ */
public void write( BinaryCodec out ) { void write( BinaryCodec out ) {
out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) )); out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) ));
out.writeUInt(( (Long) offset ).intValue()); out.writeUInt(( (Long) offset ).intValue());
out.writeUInt((new Long(readDepth).intValue())); out.writeUInt((new Long(readDepth).intValue()));
@ -203,6 +203,8 @@ abstract class GLFRecord {
* *
* @return the size of this record type, in bytes * @return the size of this record type, in bytes
*/ */
public abstract int getByteSize(); public int getByteSize() {
return 10; // the record type field (1), offset (4), the min depth field (4), and the rms mapping (1)
}
} }

View File

@ -28,12 +28,20 @@ import java.io.DataOutputStream;
/** /**
* @author aaron * @author aaron
* @version 1.0 * @version 1.0
*
* This class writes GLF files. You can either specify GLFRecords, or programaticly generate
* single and variable length genotype calls using the provided functions. When you've finished
* generating GLF records, make sure you close the file.
*
*/ */
public class GLFWriter { public class GLFWriter {
// our output codec // our output codec
private final BinaryCodec outputBinaryCodec; private final BinaryCodec outputBinaryCodec;
// the glf magic number, which identifies a properly formatted GLF file
public static final short[] glfMagic = {'G', 'L', 'F', '\3'}; public static final short[] glfMagic = {'G', 'L', 'F', '\3'};
// our header text, reference sequence name (i.e. chr1), and it's length
private String headerText = ""; private String headerText = "";
private String referenceSequenceName = ""; private String referenceSequenceName = "";
private long referenceSequenceLength = 0; private long referenceSequenceLength = 0;
@ -42,7 +50,7 @@ public class GLFWriter {
* The public constructor for creating a GLF object * The public constructor for creating a GLF object
* *
* @param headerText the header text (currently unclear what the contents are) * @param headerText the header text (currently unclear what the contents are)
* @param referenceSequenceName the reference sequence name * @param referenceSequenceName the reference sequence name, i.e. "chr1", "chr2", etc
*/ */
public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) { public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) {
this.headerText = headerText; this.headerText = headerText;
@ -71,7 +79,6 @@ public class GLFWriter {
SinglePointCall call = new SinglePointCall(refBase, genomicLoc, SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
readDepth, readDepth,
rmsMapQ, rmsMapQ,
lhValues.toByteArray(),
lhValues); lhValues);
call.write(this.outputBinaryCodec); call.write(this.outputBinaryCodec);
} }
@ -84,9 +91,9 @@ public class GLFWriter {
* @param readDepth the read depth at the specified postion * @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
* @param minimumLikelihood the minimum likelihood value * @param minimumLikelihood the minimum likelihood value
* @param homozygProb1 the probability of the first homozygous indel allele * @param homozygProb1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
* @param homozygProb2 the probability of the second homozygous indel allele * @param homozygProb2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
* @param heterozygProb the probability of the heterozygote * @param heterozygProb the negitive log likelihood of the heterozygote, from 0 to 255
* @param indelLength1 the length of the first indel allele * @param indelLength1 the length of the first indel allele
* @param indelLength2 the length of the second indel allele * @param indelLength2 the length of the second indel allele
* @param indelSeq1 the sequence for the first indel allele * @param indelSeq1 the sequence for the first indel allele
@ -140,7 +147,10 @@ public class GLFWriter {
} }
/** /**
* Write out the header information for the GLF file * Write out the header information for the GLF file. The header contains
* the magic number, the length of the header text, the text itself, the reference
* sequence (null terminated) preceeded by it's length, and the the genomic
* length of the reference sequence.
* *
*/ */
private void writeHeader() { private void writeHeader() {
@ -157,7 +167,8 @@ public class GLFWriter {
} }
/** /**
* close the file * close the file. You must close the file to ensure any remaining data gets
* written out.
* *
*/ */
public void close() { public void close() {

View File

@ -46,6 +46,9 @@ public class LikelihoodObject {
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
} }
// how many genotypes we're storing
public static final int genoTypeCount = GENOTYPE.values().length;
// the associated negitive log likelihoods for each genotype // the associated negitive log likelihoods for each genotype
final protected HashMap<GENOTYPE, Integer> likelihood = new HashMap<GENOTYPE, Integer>(); final protected HashMap<GENOTYPE, Integer> likelihood = new HashMap<GENOTYPE, Integer>();
@ -57,7 +60,7 @@ public class LikelihoodObject {
} }
// since the default case it to start with all infinity, we can choose any random base // since the default case it to start with all infinity, we can choose any random base
GENOTYPE greatest = GENOTYPE.AA; protected GENOTYPE greatest = GENOTYPE.AA;
/** /**
@ -69,11 +72,16 @@ public class LikelihoodObject {
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length); throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
} }
int index = 0; int index = 0;
int lowestScore = 256;
for (GENOTYPE type : GENOTYPE.values()) { for (GENOTYPE type : GENOTYPE.values()) {
if (values[index] < 0 || values[index] > 255) { if (values[index] < 0 || values[index] > 255) {
throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]); throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]);
} }
likelihood.put(type, values[index]); likelihood.put(type, values[index]);
if (values[index] < lowestScore) {
lowestScore = values[index];
greatest = type;
}
++index; ++index;
} }
} }

View File

@ -29,37 +29,33 @@ import net.sf.samtools.util.BinaryCodec;
*/ */
/** /**
* * @author aaron
* @author aaron * <p/>
* * Class SinglePointCall
* Class SinglePointCall * <p/>
* * This class represents a single point geneotype call in GLF vernacular
* This class represents a single point geneotype call in GLF vernacular */
**/
class SinglePointCall extends GLFRecord { class SinglePointCall extends GLFRecord {
// our likelyhood array size
public static final int LIKELYHOOD_SIZE = 10;
// our record type // our record type
private final RECORD_TYPE type = RECORD_TYPE.SINGLE; private final RECORD_TYPE type = RECORD_TYPE.SINGLE;
// our array of likelihoods // our likelihood object
public final short lk[] = new short[LIKELYHOOD_SIZE]; private LikelihoodObject likelihoods;
// our size, we're immutable (the size at least). In bytes. /**
private final int byteSize; * create a single
*
* @param refBase the reference base, as a char
* @param offset the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param lhValues the LikelihoodObject, representing the genotype likelyhoods
*/
SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, LikelihoodObject lhValues ) {
super(refBase, offset, (short) lhValues.getMinimumValue(), readDepth, rmsMapQ);
likelihoods = lhValues;
SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, short[] lk, LikelihoodObject minimumLikelihood ) {
super(refBase,offset,(short)minimumLikelihood.getMinimumValue(),readDepth,rmsMapQ);
if (lk.length != LIKELYHOOD_SIZE) {
throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
}
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
byteSize = 9 + lk.length;
} }
@ -68,15 +64,17 @@ class SinglePointCall extends GLFRecord {
* *
* @param out * @param out
*/ */
public void write( BinaryCodec out ) { void write( BinaryCodec out ) {
super.write(out); super.write(out);
for (int x = 0; x < LIKELYHOOD_SIZE; x++) { short array[] = likelihoods.toByteArray();
out.writeUByte(lk[x]); for (int x = 0; x < array.length; x++) {
out.writeUByte(array[x]);
} }
} }
/** /**
* return the record type we represent, in this case SINGLE * return the record type we represent, in this case SINGLE
*
* @return RECORD_TYPE.SINGLE * @return RECORD_TYPE.SINGLE
*/ */
public RECORD_TYPE getRecordType() { public RECORD_TYPE getRecordType() {
@ -85,10 +83,11 @@ class SinglePointCall extends GLFRecord {
/** /**
* return our size in bytes * return our size in bytes
*
* @return number of bytes we represent * @return number of bytes we represent
*/ */
public int getByteSize() { public int getByteSize() {
return byteSize; return likelihoods.genoTypeCount + super.getByteSize();
} }
} }

View File

@ -30,12 +30,12 @@ import net.sf.samtools.util.BinaryCodec;
/** /**
* @author aaron * @author aaron
* <p/> * <p/>
* Class VariableLengthCall * Class VariableLengthCall
* <p/> * <p/>
* This class represents variable length genotype calls in the GLF format. * This class represents variable length genotype calls in the GLF format.
* Currently a lot of parameters need to be provided, but we may be able to thin * Currently a lot of parameters need to be provided, but we may be able to thin
* those down as we understand what we have to specify and what we can infer. * those down as we understand what we have to specify and what we can infer.
*/ */
class VariableLengthCall extends GLFRecord { class VariableLengthCall extends GLFRecord {
// our fields, corresponding to the glf spec // our fields, corresponding to the glf spec
@ -51,7 +51,22 @@ class VariableLengthCall extends GLFRecord {
private final int size; private final int size;
/** the default constructor */ /**
* the default constructor
*
* @param refBase the reference base
* @param offset the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param minimumLikelihood the minimum likelihood value
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
* @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
* @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
* @param indelLen1 the length of the first indel allele
* @param indelLen2 the length of the second indel allele
* @param indelSeq1 the sequence for the first indel allele
* @param indelSeq2 the sequence for the second indel allele
*/
VariableLengthCall( char refBase, VariableLengthCall( char refBase,
long offset, long offset,
int readDepth, int readDepth,
@ -81,7 +96,7 @@ class VariableLengthCall extends GLFRecord {
* *
* @param out the binary codec to write to * @param out the binary codec to write to
*/ */
public void write( BinaryCodec out ) { void write( BinaryCodec out ) {
super.write(out); super.write(out);
out.writeByte(lkHom1); out.writeByte(lkHom1);
out.writeByte(lkHom2); out.writeByte(lkHom2);
@ -101,8 +116,8 @@ class VariableLengthCall extends GLFRecord {
return RECORD_TYPE.VARIABLE; return RECORD_TYPE.VARIABLE;
} }
/** @return */ /** @return the size of the record, which is the size of our fields plus the generic records fields */
public int getByteSize() { public int getByteSize() {
return size; return size + super.getByteSize();
} }
} }

View File

@ -86,6 +86,34 @@ public class LikelihoodObjectTest extends BaseTest {
} }
} }
@Test
public void testDefaultArrayValues() {
mLO = new LikelihoodObject();
short[] ret = mLO.toByteArray();
for (int index = 0; index < ret.length; index++) {
assertTrue(ret[index] == 255);
}
}
@Test
public void testGetMinimum() {
int[] ray = new int[10];
for (int x = 0; x < 10; x++) {
ray[x] = ( 240 );
}
ray [5] = 0;
mLO = new LikelihoodObject(ray);
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
short smallest = (short)mLO.getMinimumValue();
assertTrue(smallest == 0);
int index = 0;
short[] ret = mLO.toByteArray();
for (index = 0; index < ret.length; index++) {
assertTrue(smallest <= ret[index]);
}
}
@Test @Test
public void testSetLikelihood() { public void testSetLikelihood() {
mLO = new LikelihoodObject(); mLO = new LikelihoodObject();