diff --git a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java index d41688cc2..58bb4ec8b 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java @@ -161,9 +161,9 @@ public class LikelihoodObject { * * @return a float array containing our genotype likelihoods, as negitive log likelihoods */ - public float[] toFloatArray() { + public double[] toDoubleArray() { // make an array of floats - float[] ft = new float[10]; + double[] ft = new double[10]; int index = 0; for (GENOTYPE T : GENOTYPE.values()) { ft[index] = this.likelihood.get(T).floatValue(); @@ -178,18 +178,19 @@ public class LikelihoodObject { * @return a GenotypeLikelihoods object representing our data */ public GenotypeLikelihoods convert( SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase ) { - float[] ft = toFloatArray(); - + double[] ft = toDoubleArray(); + float[] db = new float[ft.length]; + int index = 0; if (this.mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) { - for (float f : ft) { - f = f * -1.0f; + for (;index < ft.length; index++) { + db[index] = ((float)ft[index] * -1.0f); } } else if (this.mLikelihoodType == LIKELIHOOD_TYPE.RAW) { - for (float f : ft) { - f = (float) Math.log(f); + for (;index < ft.length; index++) { + db[index] = (float) Math.log(ft[index]); } } - return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, ft); + return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, db); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java new file mode 100644 index 000000000..6d2ffd95f --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java @@ -0,0 +1,147 @@ +package org.broadinstitute.sting.utils.genotype.glf; + +import net.sf.samtools.util.BinaryCodec; +import net.sf.samtools.util.BlockCompressedOutputStream; + +import java.io.File; +import java.io.DataOutputStream; +import java.util.Iterator; + +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * an object for reading in GLF files + */ +public class GLFReader implements Iterator { + + // our next record + private GLFRecord nextRecord = null; + + // the glf magic number, which identifies a properly formatted GLF file + public static final short[] glfMagic = {'G', 'L', 'F', '\3'}; + + // our input codec + private final BinaryCodec inputBinaryCodec; + + // our header string + private String headerStr; + + // our reference name + private String referenceName; + + // reference length + private int referenceLength; + + GLFReader( File readFrom ) { + inputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(readFrom))); + inputBinaryCodec.setInputFileName(readFrom.getName()); + + // first verify that it's a valid GLF + for (short s: glfMagic) { + if (inputBinaryCodec.readUByte() != s) throw new StingException("Verification of GLF format failed: magic string doesn't match)"); + } + + // get the header string + headerStr = inputBinaryCodec.readLengthAndString(false); + + // get the reference name + referenceName = inputBinaryCodec.readLengthAndString(true); + + // get the reference length - this may be a problem storing an unsigned int into a signed int. but screw it. + referenceLength = (int)inputBinaryCodec.readUInt(); + + // get the next record + nextRecord = next(); + } + + private SinglePointCall generateSPC(char refBase, BinaryCodec inputBinaryCodec) { + int offset = (int)inputBinaryCodec.readUInt(); + long depth = inputBinaryCodec.readUInt(); + short min_lk = (short)((depth & 0x00000000ff000000) >> 24); + int readDepth = (int)(depth & 0x0000000000ffffff); + short rmsMapping = inputBinaryCodec.readUByte(); + double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length]; + for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) { + lkValues[x] = inputBinaryCodec.readUByte(); + } + return new SinglePointCall(refBase,offset,readDepth,rmsMapping,lkValues); + } + + + private VariableLengthCall generateVLC(char refBase, BinaryCodec inputBinaryCodec) { + int offset = (int)inputBinaryCodec.readUInt(); + int depth = (int)inputBinaryCodec.readUInt(); + short min_lk = (short)((depth & 0x00000000ff000000) >> 24); + int readDepth = (depth & 0x0000000000ffffff); + short rmsMapping = inputBinaryCodec.readUByte(); + short lkHom1 = inputBinaryCodec.readUByte(); + short lkHom2 = inputBinaryCodec.readUByte(); + short lkHet = inputBinaryCodec.readUByte(); + int indelLen1 = (int)inputBinaryCodec.readShort(); + int indelLen2 = (int)inputBinaryCodec.readShort(); + short[] indelSeq1 = new short[indelLen1]; + short[] indelSeq2 = new short[indelLen2]; + for (int x = 0; x < indelLen1; x++) { + indelSeq1[x] = inputBinaryCodec.readUByte(); + } + for (int x = 0; x < indelLen2; x++) { + indelSeq2[x] = inputBinaryCodec.readUByte(); + } + return new VariableLengthCall(refBase,offset,readDepth,rmsMapping,lkHom1,lkHom2,lkHet,indelSeq1,indelSeq2); + } + + @Override + public boolean hasNext() { + return (nextRecord != null); + } + + @Override + public GLFRecord next() { + short firstBase = inputBinaryCodec.readUByte(); + byte recordType = (byte)(firstBase & 0x00f0 >> 4); + char refBase = (char)(firstBase & 0x000f); + + GLFRecord ret = nextRecord; + if (recordType == 1) { + nextRecord = generateSPC(refBase, inputBinaryCodec); + } + else if (recordType == 2) { + nextRecord = generateVLC(refBase, inputBinaryCodec); + } + else if (recordType == 0){ + nextRecord = null; + } + return ret; + } + + @Override + public void remove() { + throw new StingException("I'm Sorry Dave, I can't let you do that (also GLFReader doesn't support remove())."); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java index 026140e43..e2d285de6 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.util.BinaryCodec; +import org.broadinstitute.sting.utils.StingException; /* @@ -216,5 +217,20 @@ abstract class GLFRecord { return (d > 255.0) ? (byte)255 : (byte)Math.round(d); } + /** + * find the minimum value in a set of doubles + * @param vals + * @return + */ + protected static double findMin(double vals[]) { + if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in"); + + double min = vals[0]; + for (double d: vals) { + if (d < min) min = d; + } + return min; + } + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 40b1b5194..31c158867 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -89,7 +89,7 @@ public class GLFWriter implements GenotypeWriter { SinglePointCall call = new SinglePointCall(refBase, genomicLoc, readDepth, (short) rmsMapQ, - lhValues); + lhValues.toDoubleArray()); call.write(this.outputBinaryCodec); } @@ -127,13 +127,10 @@ public class GLFWriter implements GenotypeWriter { VariableLengthCall call = new VariableLengthCall(refBase, genomicLoc, readDepth, - lowestLikelihood, (short) rmsMapQ, firstHomZyg.getLikelihood(), secondHomZyg.getLikelihood(), hetLikelihood, - firstHomZyg.getLengthOfIndel(), - secondHomZyg.getLengthOfIndel(), firstHomZyg.getIndelSequence(), secondHomZyg.getIndelSequence()); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java index 0ce49c176..a09633172 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.util.BinaryCodec; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; /* @@ -38,11 +37,8 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject; */ class SinglePointCall extends GLFRecord { - // our record type - private final RECORD_TYPE type = RECORD_TYPE.SINGLE; - // our likelihood object - private LikelihoodObject likelihoods; + private double likelihoods[]; /** * create a single @@ -51,25 +47,24 @@ class SinglePointCall extends GLFRecord { * @param offset the location, as an offset from the previous glf record * @param readDepth the read depth at the specified postion * @param rmsMapQ the root mean square of the mapping quality - * @param lhValues the LikelihoodObject, representing the genotype likelyhoods + * @param likelihoods the Likelihoods */ - SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, LikelihoodObject lhValues ) { - super(refBase, offset, (short) lhValues.getBestLikelihood(), readDepth, rmsMapQ); + SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[] ) { + super(refBase, offset, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ); - likelihoods = lhValues; + this.likelihoods = likelihoods; } /** * Write out the record to a binary codec * - * @param out + * @param out the codec to write to */ void write( BinaryCodec out ) { super.write(out); - short array[] = likelihoods.toByteArray(); - for (int x = 0; x < array.length; x++) { - out.writeUByte(array[x]); + for (double likelihood : likelihoods) { + out.writeUByte(GLFRecord.toCappedShort(likelihood)); } } @@ -88,7 +83,7 @@ class SinglePointCall extends GLFRecord { * @return number of bytes we represent */ public int getByteSize() { - return likelihoods.genoTypeCount + super.getByteSize(); + return likelihoods.length + super.getByteSize(); } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java index 6c18a2cf4..b228dc800 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.util.BinaryCodec; - /* * Copyright (c) 2009 The Broad Institute * @@ -58,33 +57,27 @@ class VariableLengthCall extends GLFRecord { * @param offset the location, as an offset from the previous glf record * @param readDepth the read depth at the specified postion * @param rmsMapQ the root mean square of the mapping quality - * @param minimumLikelihood the minimum likelihood value * @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255 * @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255 * @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255 - * @param indelLen1 the length of the first indel allele - * @param indelLen2 the length of the second indel allele * @param indelSeq1 the sequence for the first indel allele * @param indelSeq2 the sequence for the second indel allele */ VariableLengthCall( char refBase, long offset, int readDepth, - double minimumLikelihood, short rmsMapQ, double lkHom1, double lkHom2, double lkHet, - int indelLen1, - int indelLen2, final short indelSeq1[], final short indelSeq2[] ) { - super(refBase, offset, GLFRecord.toCappedShort(minimumLikelihood), readDepth, rmsMapQ); + super(refBase, offset, GLFRecord.toCappedShort(findMin(new double []{lkHom1,lkHom2,lkHet})), readDepth, rmsMapQ); this.lkHom1 = GLFRecord.toCappedShort(lkHom1); this.lkHom2 = GLFRecord.toCappedShort(lkHom2); this.lkHet = GLFRecord.toCappedShort(lkHet); - this.indelLen1 = indelLen1; - this.indelLen2 = indelLen2; + this.indelLen1 = indelSeq1.length; + this.indelLen2 = indelSeq2.length; this.indelSeq1 = indelSeq1; this.indelSeq2 = indelSeq2; size = 16 + indelSeq1.length + indelSeq2.length; @@ -103,11 +96,11 @@ class VariableLengthCall extends GLFRecord { out.writeByte(lkHet); out.writeShort(new Integer(indelLen1).shortValue()); out.writeShort(new Integer(indelLen2).shortValue()); - for (int x = 0; x < indelSeq1.length; x++) { - out.writeUByte(indelSeq1[x]); + for (short anIndelSeq1 : indelSeq1) { + out.writeUByte(anIndelSeq1); } - for (int x = 0; x < indelSeq2.length; x++) { - out.writeUByte(indelSeq2[x]); + for (short anIndelSeq2 : indelSeq2) { + out.writeUByte(anIndelSeq2); } } @@ -120,4 +113,5 @@ class VariableLengthCall extends GLFRecord { public int getByteSize() { return size + super.getByteSize(); } + }