added some more documentation to the GLF writer and it's supporting classes, and some other fixes
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@875 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ae2eddec2d
commit
82aa0533b8
|
|
@ -184,7 +184,7 @@ abstract class GLFRecord {
|
|||
*
|
||||
* @param out the binary codec to write to
|
||||
*/
|
||||
public void write( BinaryCodec out ) {
|
||||
void write( BinaryCodec out ) {
|
||||
out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) ));
|
||||
out.writeUInt(( (Long) offset ).intValue());
|
||||
out.writeUInt((new Long(readDepth).intValue()));
|
||||
|
|
@ -203,6 +203,8 @@ abstract class GLFRecord {
|
|||
*
|
||||
* @return the size of this record type, in bytes
|
||||
*/
|
||||
public abstract int getByteSize();
|
||||
public int getByteSize() {
|
||||
return 10; // the record type field (1), offset (4), the min depth field (4), and the rms mapping (1)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,12 +28,20 @@ import java.io.DataOutputStream;
|
|||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
*
|
||||
* This class writes GLF files. You can either specify GLFRecords, or programaticly generate
|
||||
* single and variable length genotype calls using the provided functions. When you've finished
|
||||
* generating GLF records, make sure you close the file.
|
||||
*
|
||||
*/
|
||||
public class GLFWriter {
|
||||
// our output codec
|
||||
private final BinaryCodec outputBinaryCodec;
|
||||
|
||||
// the glf magic number, which identifies a properly formatted GLF file
|
||||
public static final short[] glfMagic = {'G', 'L', 'F', '\3'};
|
||||
|
||||
// our header text, reference sequence name (i.e. chr1), and it's length
|
||||
private String headerText = "";
|
||||
private String referenceSequenceName = "";
|
||||
private long referenceSequenceLength = 0;
|
||||
|
|
@ -42,7 +50,7 @@ public class GLFWriter {
|
|||
* The public constructor for creating a GLF object
|
||||
*
|
||||
* @param headerText the header text (currently unclear what the contents are)
|
||||
* @param referenceSequenceName the reference sequence name
|
||||
* @param referenceSequenceName the reference sequence name, i.e. "chr1", "chr2", etc
|
||||
*/
|
||||
public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) {
|
||||
this.headerText = headerText;
|
||||
|
|
@ -71,7 +79,6 @@ public class GLFWriter {
|
|||
SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
|
||||
readDepth,
|
||||
rmsMapQ,
|
||||
lhValues.toByteArray(),
|
||||
lhValues);
|
||||
call.write(this.outputBinaryCodec);
|
||||
}
|
||||
|
|
@ -84,9 +91,9 @@ public class GLFWriter {
|
|||
* @param readDepth the read depth at the specified postion
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
* @param minimumLikelihood the minimum likelihood value
|
||||
* @param homozygProb1 the probability of the first homozygous indel allele
|
||||
* @param homozygProb2 the probability of the second homozygous indel allele
|
||||
* @param heterozygProb the probability of the heterozygote
|
||||
* @param homozygProb1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
|
||||
* @param homozygProb2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
|
||||
* @param heterozygProb the negitive log likelihood of the heterozygote, from 0 to 255
|
||||
* @param indelLength1 the length of the first indel allele
|
||||
* @param indelLength2 the length of the second indel allele
|
||||
* @param indelSeq1 the sequence for the first indel allele
|
||||
|
|
@ -140,7 +147,10 @@ public class GLFWriter {
|
|||
}
|
||||
|
||||
/**
|
||||
* Write out the header information for the GLF file
|
||||
* Write out the header information for the GLF file. The header contains
|
||||
* the magic number, the length of the header text, the text itself, the reference
|
||||
* sequence (null terminated) preceeded by it's length, and the the genomic
|
||||
* length of the reference sequence.
|
||||
*
|
||||
*/
|
||||
private void writeHeader() {
|
||||
|
|
@ -157,7 +167,8 @@ public class GLFWriter {
|
|||
}
|
||||
|
||||
/**
|
||||
* close the file
|
||||
* close the file. You must close the file to ensure any remaining data gets
|
||||
* written out.
|
||||
*
|
||||
*/
|
||||
public void close() {
|
||||
|
|
|
|||
|
|
@ -46,6 +46,9 @@ public class LikelihoodObject {
|
|||
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
|
||||
}
|
||||
|
||||
// how many genotypes we're storing
|
||||
public static final int genoTypeCount = GENOTYPE.values().length;
|
||||
|
||||
// the associated negitive log likelihoods for each genotype
|
||||
final protected HashMap<GENOTYPE, Integer> likelihood = new HashMap<GENOTYPE, Integer>();
|
||||
|
||||
|
|
@ -57,7 +60,7 @@ public class LikelihoodObject {
|
|||
}
|
||||
|
||||
// since the default case it to start with all infinity, we can choose any random base
|
||||
GENOTYPE greatest = GENOTYPE.AA;
|
||||
protected GENOTYPE greatest = GENOTYPE.AA;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -69,11 +72,16 @@ public class LikelihoodObject {
|
|||
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
|
||||
}
|
||||
int index = 0;
|
||||
int lowestScore = 256;
|
||||
for (GENOTYPE type : GENOTYPE.values()) {
|
||||
if (values[index] < 0 || values[index] > 255) {
|
||||
throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]);
|
||||
}
|
||||
likelihood.put(type, values[index]);
|
||||
if (values[index] < lowestScore) {
|
||||
lowestScore = values[index];
|
||||
greatest = type;
|
||||
}
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,37 +29,33 @@ import net.sf.samtools.util.BinaryCodec;
|
|||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class SinglePointCall
|
||||
*
|
||||
* This class represents a single point geneotype call in GLF vernacular
|
||||
**/
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class SinglePointCall
|
||||
* <p/>
|
||||
* This class represents a single point geneotype call in GLF vernacular
|
||||
*/
|
||||
class SinglePointCall extends GLFRecord {
|
||||
|
||||
// our likelyhood array size
|
||||
public static final int LIKELYHOOD_SIZE = 10;
|
||||
|
||||
// our record type
|
||||
private final RECORD_TYPE type = RECORD_TYPE.SINGLE;
|
||||
|
||||
// our array of likelihoods
|
||||
public final short lk[] = new short[LIKELYHOOD_SIZE];
|
||||
// our likelihood object
|
||||
private LikelihoodObject likelihoods;
|
||||
|
||||
// our size, we're immutable (the size at least). In bytes.
|
||||
private final int byteSize;
|
||||
/**
|
||||
* create a single
|
||||
*
|
||||
* @param refBase the reference base, as a char
|
||||
* @param offset the location, as an offset from the previous glf record
|
||||
* @param readDepth the read depth at the specified postion
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
* @param lhValues the LikelihoodObject, representing the genotype likelyhoods
|
||||
*/
|
||||
SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, LikelihoodObject lhValues ) {
|
||||
super(refBase, offset, (short) lhValues.getMinimumValue(), readDepth, rmsMapQ);
|
||||
|
||||
|
||||
SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, short[] lk, LikelihoodObject minimumLikelihood ) {
|
||||
super(refBase,offset,(short)minimumLikelihood.getMinimumValue(),readDepth,rmsMapQ);
|
||||
|
||||
if (lk.length != LIKELYHOOD_SIZE) {
|
||||
throw new IllegalArgumentException("SinglePointCall: passed in likelyhood array size != LIKELYHOOD_SIZE");
|
||||
}
|
||||
|
||||
System.arraycopy(lk, 0, this.lk, 0, LIKELYHOOD_SIZE);
|
||||
byteSize = 9 + lk.length;
|
||||
likelihoods = lhValues;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -68,15 +64,17 @@ class SinglePointCall extends GLFRecord {
|
|||
*
|
||||
* @param out
|
||||
*/
|
||||
public void write( BinaryCodec out ) {
|
||||
void write( BinaryCodec out ) {
|
||||
super.write(out);
|
||||
for (int x = 0; x < LIKELYHOOD_SIZE; x++) {
|
||||
out.writeUByte(lk[x]);
|
||||
short array[] = likelihoods.toByteArray();
|
||||
for (int x = 0; x < array.length; x++) {
|
||||
out.writeUByte(array[x]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* return the record type we represent, in this case SINGLE
|
||||
*
|
||||
* @return RECORD_TYPE.SINGLE
|
||||
*/
|
||||
public RECORD_TYPE getRecordType() {
|
||||
|
|
@ -85,10 +83,11 @@ class SinglePointCall extends GLFRecord {
|
|||
|
||||
/**
|
||||
* return our size in bytes
|
||||
*
|
||||
* @return number of bytes we represent
|
||||
*/
|
||||
public int getByteSize() {
|
||||
return byteSize;
|
||||
return likelihoods.genoTypeCount + super.getByteSize();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,12 +30,12 @@ import net.sf.samtools.util.BinaryCodec;
|
|||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VariableLengthCall
|
||||
* <p/>
|
||||
* This class represents variable length genotype calls in the GLF format.
|
||||
* Currently a lot of parameters need to be provided, but we may be able to thin
|
||||
* those down as we understand what we have to specify and what we can infer.
|
||||
* <p/>
|
||||
* Class VariableLengthCall
|
||||
* <p/>
|
||||
* This class represents variable length genotype calls in the GLF format.
|
||||
* Currently a lot of parameters need to be provided, but we may be able to thin
|
||||
* those down as we understand what we have to specify and what we can infer.
|
||||
*/
|
||||
class VariableLengthCall extends GLFRecord {
|
||||
// our fields, corresponding to the glf spec
|
||||
|
|
@ -51,7 +51,22 @@ class VariableLengthCall extends GLFRecord {
|
|||
private final int size;
|
||||
|
||||
|
||||
/** the default constructor */
|
||||
/**
|
||||
* the default constructor
|
||||
*
|
||||
* @param refBase the reference base
|
||||
* @param offset the location, as an offset from the previous glf record
|
||||
* @param readDepth the read depth at the specified postion
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
* @param minimumLikelihood the minimum likelihood value
|
||||
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
|
||||
* @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
|
||||
* @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
|
||||
* @param indelLen1 the length of the first indel allele
|
||||
* @param indelLen2 the length of the second indel allele
|
||||
* @param indelSeq1 the sequence for the first indel allele
|
||||
* @param indelSeq2 the sequence for the second indel allele
|
||||
*/
|
||||
VariableLengthCall( char refBase,
|
||||
long offset,
|
||||
int readDepth,
|
||||
|
|
@ -81,7 +96,7 @@ class VariableLengthCall extends GLFRecord {
|
|||
*
|
||||
* @param out the binary codec to write to
|
||||
*/
|
||||
public void write( BinaryCodec out ) {
|
||||
void write( BinaryCodec out ) {
|
||||
super.write(out);
|
||||
out.writeByte(lkHom1);
|
||||
out.writeByte(lkHom2);
|
||||
|
|
@ -101,8 +116,8 @@ class VariableLengthCall extends GLFRecord {
|
|||
return RECORD_TYPE.VARIABLE;
|
||||
}
|
||||
|
||||
/** @return */
|
||||
/** @return the size of the record, which is the size of our fields plus the generic records fields */
|
||||
public int getByteSize() {
|
||||
return size;
|
||||
return size + super.getByteSize();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,6 +86,34 @@ public class LikelihoodObjectTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultArrayValues() {
|
||||
mLO = new LikelihoodObject();
|
||||
short[] ret = mLO.toByteArray();
|
||||
for (int index = 0; index < ret.length; index++) {
|
||||
assertTrue(ret[index] == 255);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetMinimum() {
|
||||
int[] ray = new int[10];
|
||||
for (int x = 0; x < 10; x++) {
|
||||
ray[x] = ( 240 );
|
||||
}
|
||||
ray [5] = 0;
|
||||
mLO = new LikelihoodObject(ray);
|
||||
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
||||
short smallest = (short)mLO.getMinimumValue();
|
||||
assertTrue(smallest == 0);
|
||||
int index = 0;
|
||||
short[] ret = mLO.toByteArray();
|
||||
for (index = 0; index < ret.length; index++) {
|
||||
assertTrue(smallest <= ret[index]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testSetLikelihood() {
|
||||
mLO = new LikelihoodObject();
|
||||
|
|
|
|||
Loading…
Reference in New Issue