added some more documentation to the GLF writer and it's supporting classes, and some other fixes

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@875 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-06-02 14:53:58 +00:00
parent ae2eddec2d
commit 82aa0533b8
6 changed files with 111 additions and 48 deletions

View File

@ -184,7 +184,7 @@ abstract class GLFRecord {
*
* @param out the binary codec to write to
*/
public void write( BinaryCodec out ) {
void write( BinaryCodec out ) {
out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) ));
out.writeUInt(( (Long) offset ).intValue());
out.writeUInt((new Long(readDepth).intValue()));
@ -203,6 +203,8 @@ abstract class GLFRecord {
*
* @return the size of this record type, in bytes
*/
public abstract int getByteSize();
public int getByteSize() {
return 10; // the record type field (1), offset (4), the min depth field (4), and the rms mapping (1)
}
}

View File

@ -28,12 +28,20 @@ import java.io.DataOutputStream;
/**
* @author aaron
* @version 1.0
*
* This class writes GLF files. You can either specify GLFRecords, or programaticly generate
* single and variable length genotype calls using the provided functions. When you've finished
* generating GLF records, make sure you close the file.
*
*/
public class GLFWriter {
// our output codec
private final BinaryCodec outputBinaryCodec;
// the glf magic number, which identifies a properly formatted GLF file
public static final short[] glfMagic = {'G', 'L', 'F', '\3'};
// our header text, reference sequence name (i.e. chr1), and it's length
private String headerText = "";
private String referenceSequenceName = "";
private long referenceSequenceLength = 0;
@ -42,7 +50,7 @@ public class GLFWriter {
* The public constructor for creating a GLF object
*
* @param headerText the header text (currently unclear what the contents are)
* @param referenceSequenceName the reference sequence name
* @param referenceSequenceName the reference sequence name, i.e. "chr1", "chr2", etc
*/
public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) {
this.headerText = headerText;
@ -71,7 +79,6 @@ public class GLFWriter {
SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
readDepth,
rmsMapQ,
lhValues.toByteArray(),
lhValues);
call.write(this.outputBinaryCodec);
}
@ -84,9 +91,9 @@ public class GLFWriter {
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param minimumLikelihood the minimum likelihood value
* @param homozygProb1 the probability of the first homozygous indel allele
* @param homozygProb2 the probability of the second homozygous indel allele
* @param heterozygProb the probability of the heterozygote
* @param homozygProb1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
* @param homozygProb2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
* @param heterozygProb the negitive log likelihood of the heterozygote, from 0 to 255
* @param indelLength1 the length of the first indel allele
* @param indelLength2 the length of the second indel allele
* @param indelSeq1 the sequence for the first indel allele
@ -140,7 +147,10 @@ public class GLFWriter {
}
/**
* Write out the header information for the GLF file
* Write out the header information for the GLF file. The header contains
* the magic number, the length of the header text, the text itself, the reference
* sequence (null terminated) preceeded by it's length, and the the genomic
* length of the reference sequence.
*
*/
private void writeHeader() {
@ -157,7 +167,8 @@ public class GLFWriter {
}
/**
* close the file
* close the file. You must close the file to ensure any remaining data gets
* written out.
*
*/
public void close() {

View File

@ -46,6 +46,9 @@ public class LikelihoodObject {
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
}
// how many genotypes we're storing
public static final int genoTypeCount = GENOTYPE.values().length;
// the associated negitive log likelihoods for each genotype
final protected HashMap<GENOTYPE, Integer> likelihood = new HashMap<GENOTYPE, Integer>();
@ -57,7 +60,7 @@ public class LikelihoodObject {
}
// since the default case it to start with all infinity, we can choose any random base
GENOTYPE greatest = GENOTYPE.AA;
protected GENOTYPE greatest = GENOTYPE.AA;
/**
@ -69,11 +72,16 @@ public class LikelihoodObject {
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
}
int index = 0;
int lowestScore = 256;
for (GENOTYPE type : GENOTYPE.values()) {
if (values[index] < 0 || values[index] > 255) {
throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]);
}
likelihood.put(type, values[index]);
if (values[index] < lowestScore) {
lowestScore = values[index];
greatest = type;
}
++index;
}
}

View File

@ -29,37 +29,33 @@ import net.sf.samtools.util.BinaryCodec;
*/
/**
*
* @author aaron
*
* Class SinglePointCall
*
* This class represents a single point geneotype call in GLF vernacular
**/
* @author aaron
* <p/>
* Class SinglePointCall
* <p/>
* This class represents a single point geneotype call in GLF vernacular
*/
class SinglePointCall extends GLFRecord {
// our likelyhood array size
public static final int LIKELYHOOD_SIZE = 10;
// our record type
private final RECORD_TYPE type = RECORD_TYPE.SINGLE;
// our array of likelihoods
public final short lk[] = new short[LIKELYHOOD_SIZE];
// our likelihood object
private LikelihoodObject likelihoods;
// our size, we're immutable (the size at least). In bytes.
private final int byteSize;
/**
* create a single
*
* @param refBase the reference base, as a char
* @param offset the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param lhValues the LikelihoodObject, representing the genotype likelyhoods
*/
SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, LikelihoodObject lhValues ) {
super(refBase, offset, (short) lhValues.getMinimumValue(), readDepth, rmsMapQ);
SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, short[] lk, LikelihoodObject minimumLikelihood ) {
super(refBase,offset,(short)minimumLikelihood.getMinimumValue(),readDepth,rmsMapQ);
if (lk.length != LIKELYHOOD_SIZE) {
throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
}
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
byteSize = 9 + lk.length;
likelihoods = lhValues;
}
@ -68,15 +64,17 @@ class SinglePointCall extends GLFRecord {
*
* @param out
*/
public void write( BinaryCodec out ) {
void write( BinaryCodec out ) {
super.write(out);
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
out.writeUByte(lk[x]);
short array[] = likelihoods.toByteArray();
for (int x = 0; x < array.length; x++) {
out.writeUByte(array[x]);
}
}
/**
* return the record type we represent, in this case SINGLE
*
* @return RECORD_TYPE.SINGLE
*/
public RECORD_TYPE getRecordType() {
@ -85,10 +83,11 @@ class SinglePointCall extends GLFRecord {
/**
* return our size in bytes
*
* @return number of bytes we represent
*/
public int getByteSize() {
return byteSize;
return likelihoods.genoTypeCount + super.getByteSize();
}
}

View File

@ -30,12 +30,12 @@ import net.sf.samtools.util.BinaryCodec;
/**
* @author aaron
* <p/>
* Class VariableLengthCall
* <p/>
* This class represents variable length genotype calls in the GLF format.
* Currently a lot of parameters need to be provided, but we may be able to thin
* those down as we understand what we have to specify and what we can infer.
* <p/>
* Class VariableLengthCall
* <p/>
* This class represents variable length genotype calls in the GLF format.
* Currently a lot of parameters need to be provided, but we may be able to thin
* those down as we understand what we have to specify and what we can infer.
*/
class VariableLengthCall extends GLFRecord {
// our fields, corresponding to the glf spec
@ -51,7 +51,22 @@ class VariableLengthCall extends GLFRecord {
private final int size;
/** the default constructor */
/**
* the default constructor
*
* @param refBase the reference base
* @param offset the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param minimumLikelihood the minimum likelihood value
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
* @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
* @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
* @param indelLen1 the length of the first indel allele
* @param indelLen2 the length of the second indel allele
* @param indelSeq1 the sequence for the first indel allele
* @param indelSeq2 the sequence for the second indel allele
*/
VariableLengthCall( char refBase,
long offset,
int readDepth,
@ -81,7 +96,7 @@ class VariableLengthCall extends GLFRecord {
*
* @param out the binary codec to write to
*/
public void write( BinaryCodec out ) {
void write( BinaryCodec out ) {
super.write(out);
out.writeByte(lkHom1);
out.writeByte(lkHom2);
@ -101,8 +116,8 @@ class VariableLengthCall extends GLFRecord {
return RECORD_TYPE.VARIABLE;
}
/** @return */
/** @return the size of the record, which is the size of our fields plus the generic records fields */
public int getByteSize() {
return size;
return size + super.getByteSize();
}
}

View File

@ -86,6 +86,34 @@ public class LikelihoodObjectTest extends BaseTest {
}
}
@Test
public void testDefaultArrayValues() {
mLO = new LikelihoodObject();
short[] ret = mLO.toByteArray();
for (int index = 0; index < ret.length; index++) {
assertTrue(ret[index] == 255);
}
}
@Test
public void testGetMinimum() {
int[] ray = new int[10];
for (int x = 0; x < 10; x++) {
ray[x] = ( 240 );
}
ray [5] = 0;
mLO = new LikelihoodObject(ray);
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
short smallest = (short)mLO.getMinimumValue();
assertTrue(smallest == 0);
int index = 0;
short[] ret = mLO.toByteArray();
for (index = 0; index < ret.length; index++) {
assertTrue(smallest <= ret[index]);
}
}
@Test
public void testSetLikelihood() {
mLO = new LikelihoodObject();