diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java index 282498f2e..ac54372fc 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import net.sf.samtools.SAMFileHeader; @@ -35,9 +34,9 @@ import java.io.File; /** * @author aaron * @version 1.0 - * - * Class GeliAdapter - * Adapts the Geli file writer to the Genotype writer interface + *

+ * Class GeliAdapter + * Adapts the Geli file writer to the Genotype writer interface */ public class GeliAdapter implements GenotypeWriter { @@ -47,30 +46,41 @@ public class GeliAdapter implements GenotypeWriter { /** * wrap a GeliFileWriter in the Genotype writer interface - * @param writeTo where to write to + * + * @param writeTo where to write to * @param fileHeader the file header to write out */ - public GeliAdapter( File writeTo, final SAMFileHeader fileHeader ) { - this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo,fileHeader); + public GeliAdapter(File writeTo, final SAMFileHeader fileHeader) { + this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader); } /** * add a single point genotype call to the * + * @param contigName the name of the contig you're calling in + * @param contigLength the contig length * @param position the position on the contig * @param referenceBase the reference base * @param readDepth the read depth at the specified position * @param likelihoods the likelihoods of each of the possible alleles */ @Override - public void addGenotypeCall( int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods ) { - writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte)referenceBase)); + public void addGenotypeCall(String contigName, + int contigLength, + int position, + float rmsMapQuals, + char referenceBase, + int readDepth, + LikelihoodObject likelihoods) { + writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase)); } /** * add a variable length call to the genotyper * + * @param contigName the name of the contig you're calling in + * @param contigLength the contig length * @param position the position on the genome * @param rmsMapQuals the root mean square of the mapping qualities * @param readDepth the read depth @@ -80,7 +90,7 @@ public class GeliAdapter implements GenotypeWriter { * @param hetLikelihood the heterozygous likelihood */ @Override - public void addVariableLengthCall( int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood ) { + public void addVariableLengthCall(String contigName, int contigLength, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) { throw new UnsupportedOperationException("Geli format does not support variable length allele calls"); } @@ -91,7 +101,7 @@ public class GeliAdapter implements GenotypeWriter { * @param readDepth */ @Override - public void addNoCall( int position, int readDepth ) { + public void addNoCall(int position, int readDepth) { throw new UnsupportedOperationException("Geli format does not support no-calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 64b0af488..483754dd9 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -1,8 +1,5 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; - - /* * Copyright (c) 2009 The Broad Institute * @@ -29,22 +26,26 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject; */ /** - * - * @author aaron - * - * Class Genotype - * - * The interface for storing genotype calls. + * @author aaron + *

+ * Class Genotype + *

+ * The interface for storing genotype calls. */ public interface GenotypeWriter { /** * add a single point genotype call to the - * @param position the position on the contig + * + * @param contigName the name of the contig you're calling in + * @param contigLength the contig length + * @param position the position on the contig * @param referenceBase the reference base - * @param readDepth the read depth at the specified position - * @param likelihoods the likelihoods of each of the possible alleles + * @param readDepth the read depth at the specified position + * @param likelihoods the likelihoods of each of the possible alleles */ - public void addGenotypeCall(int position, + public void addGenotypeCall(String contigName, + int contigLength, + int position, float rmsMapQuals, char referenceBase, int readDepth, @@ -52,15 +53,20 @@ public interface GenotypeWriter { /** * add a variable length call to the genotyper - * @param position the position on the genome - * @param rmsMapQuals the root mean square of the mapping qualities - * @param readDepth the read depth - * @param refBase the reference base - * @param firstHomZyg the first homozygous indel - * @param secondHomZyg the second homozygous indel (if present, null if not) + * + * @param contigName the name of the contig you're calling in + * @param contigLength the contig length + * @param position the position on the genome + * @param rmsMapQuals the root mean square of the mapping qualities + * @param readDepth the read depth + * @param refBase the reference base + * @param firstHomZyg the first homozygous indel + * @param secondHomZyg the second homozygous indel (if present, null if not) * @param hetLikelihood the heterozygous likelihood */ - public void addVariableLengthCall(int position, + public void addVariableLengthCall(String contigName, + int contigLength, + int position, float rmsMapQuals, int readDepth, char refBase, @@ -70,15 +76,14 @@ public interface GenotypeWriter { /** * add a no call to the genotype file, if supported. + * * @param position * @param readDepth */ public void addNoCall(int position, int readDepth); - /** - * finish writing, closing any open files. - */ + /** finish writing, closing any open files. */ public void close(); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java index 6d2ffd95f..c27ea6965 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java @@ -1,15 +1,15 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.util.BinaryCodec; -import net.sf.samtools.util.BlockCompressedOutputStream; - -import java.io.File; -import java.io.DataOutputStream; -import java.util.Iterator; - +import net.sf.samtools.util.BlockCompressedInputStream; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import java.io.DataInputStream; +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + /* * Copyright (c) 2009 The Broad Institute * @@ -35,9 +35,7 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject; * OTHER DEALINGS IN THE SOFTWARE. */ -/** - * an object for reading in GLF files - */ +/** an object for reading in GLF files */ public class GLFReader implements Iterator { // our next record @@ -58,62 +56,87 @@ public class GLFReader implements Iterator { // reference length private int referenceLength; - GLFReader( File readFrom ) { - inputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(readFrom))); + /** + * create a glf reader + * + * @param readFrom the file to read from + */ + GLFReader(File readFrom) { + try { + inputBinaryCodec = new BinaryCodec(new DataInputStream(new BlockCompressedInputStream(readFrom))); + } catch (IOException e) { + throw new StingException("Unable to open " + readFrom.getName(), e); + } inputBinaryCodec.setInputFileName(readFrom.getName()); // first verify that it's a valid GLF - for (short s: glfMagic) { - if (inputBinaryCodec.readUByte() != s) throw new StingException("Verification of GLF format failed: magic string doesn't match)"); + for (short s : glfMagic) { + if (inputBinaryCodec.readUByte() != s) + throw new StingException("Verification of GLF format failed: magic string doesn't match)"); } // get the header string headerStr = inputBinaryCodec.readLengthAndString(false); - // get the reference name - referenceName = inputBinaryCodec.readLengthAndString(true); + if (advanceContig()) { + // setup the next record + next(); + } - // get the reference length - this may be a problem storing an unsigned int into a signed int. but screw it. - referenceLength = (int)inputBinaryCodec.readUInt(); - - // get the next record - nextRecord = next(); } + /** + * read in a single point call + * + * @param refBase the reference base + * @param inputBinaryCodec the binary codec + * + * @return a single point call object + */ private SinglePointCall generateSPC(char refBase, BinaryCodec inputBinaryCodec) { - int offset = (int)inputBinaryCodec.readUInt(); + int offset = (int) inputBinaryCodec.readUInt(); long depth = inputBinaryCodec.readUInt(); - short min_lk = (short)((depth & 0x00000000ff000000) >> 24); - int readDepth = (int)(depth & 0x0000000000ffffff); + short min_lk = (short) ((depth & 0x00000000ff000000) >> 24); + int readDepth = (int) (depth & 0x0000000000ffffff); short rmsMapping = inputBinaryCodec.readUByte(); double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length]; for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) { lkValues[x] = inputBinaryCodec.readUByte(); } - return new SinglePointCall(refBase,offset,readDepth,rmsMapping,lkValues); + return new SinglePointCall(refBase, offset, readDepth, rmsMapping, lkValues); } - + /** + * read in a variable length call, and generate a VLC object from the data + * + * @param refBase the reference base + * @param inputBinaryCodec the input codex + * + * @return a VariableLengthCall object + */ private VariableLengthCall generateVLC(char refBase, BinaryCodec inputBinaryCodec) { - int offset = (int)inputBinaryCodec.readUInt(); - int depth = (int)inputBinaryCodec.readUInt(); - short min_lk = (short)((depth & 0x00000000ff000000) >> 24); + int offset = (int) inputBinaryCodec.readUInt(); + int depth = (int) inputBinaryCodec.readUInt(); + short min_lk = (short) ((depth & 0x00000000ff000000) >> 24); int readDepth = (depth & 0x0000000000ffffff); short rmsMapping = inputBinaryCodec.readUByte(); short lkHom1 = inputBinaryCodec.readUByte(); short lkHom2 = inputBinaryCodec.readUByte(); short lkHet = inputBinaryCodec.readUByte(); - int indelLen1 = (int)inputBinaryCodec.readShort(); - int indelLen2 = (int)inputBinaryCodec.readShort(); - short[] indelSeq1 = new short[indelLen1]; - short[] indelSeq2 = new short[indelLen2]; - for (int x = 0; x < indelLen1; x++) { + int indelLen1 = (int) inputBinaryCodec.readShort(); + int indelLen2 = (int) inputBinaryCodec.readShort(); + + int readCnt = Math.abs(indelLen1); + short indelSeq1[] = new short[readCnt]; + for (int x = 0; x < readCnt; x++) { indelSeq1[x] = inputBinaryCodec.readUByte(); } - for (int x = 0; x < indelLen2; x++) { - indelSeq2[x] = inputBinaryCodec.readUByte(); + readCnt = Math.abs(indelLen2); + short indelSeq2[] = new short[readCnt]; + for (int x = 0; x < readCnt; x++) { + indelSeq2[x] = inputBinaryCodec.readUByte(); } - return new VariableLengthCall(refBase,offset,readDepth,rmsMapping,lkHom1,lkHom2,lkHet,indelSeq1,indelSeq2); + return new VariableLengthCall(refBase, offset, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2); } @Override @@ -124,24 +147,69 @@ public class GLFReader implements Iterator { @Override public GLFRecord next() { short firstBase = inputBinaryCodec.readUByte(); - byte recordType = (byte)(firstBase & 0x00f0 >> 4); - char refBase = (char)(firstBase & 0x000f); + byte recordType = (byte) ((firstBase & 0x0f0) >> 4); + char refBase = (char) (firstBase & 0x000f); GLFRecord ret = nextRecord; if (recordType == 1) { nextRecord = generateSPC(refBase, inputBinaryCodec); - } - else if (recordType == 2) { - nextRecord = generateVLC(refBase, inputBinaryCodec); - } - else if (recordType == 0){ + } else if (recordType == 2) { + nextRecord = generateVLC(refBase, inputBinaryCodec); + } else if (recordType == 0) { + if (advanceContig()) { + return next(); + } nextRecord = null; + } else { + throw new StingException("Unkonwn GLF record type (type = " + recordType + ")"); } + return ret; } + /** + * advance to the next contig + * + * @return true if we could advance + */ + private boolean advanceContig() { + // try to read the next sequence record + try { + // get the reference name + referenceName = inputBinaryCodec.readLengthAndString(true); + + // get the reference length - this may be a problem storing an unsigned int into a signed int. but screw it. + referenceLength = (int) inputBinaryCodec.readUInt(); + //System.err.println(referenceName.length()); + return true; + } catch (RuntimeException e) { + e.printStackTrace(); + // we're out of file space, set the next to null + nextRecord = null; + } + return false; + } + + @Override public void remove() { - throw new StingException("I'm Sorry Dave, I can't let you do that (also GLFReader doesn't support remove())."); + throw new StingException("GLFReader doesn't support remove()"); + } + + + /** + * getter methods + */ + + public String getReferenceName() { + return referenceName; + } + + public int getReferenceLength() { + return referenceLength; + } + + public String getHeaderStr() { + return headerStr; } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java index e2d285de6..e17d31eb3 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java @@ -74,9 +74,10 @@ abstract class GLFRecord { /** * private constructor, used by the enum class to makes each enum value + * * @param value the short values specified in the enum listing */ - REF_BASE( short value ) { + REF_BASE(short value) { fieldValue = value; } @@ -88,7 +89,11 @@ abstract class GLFRecord { * @return the corresponding REF_BASE * @throws IllegalArgumentException if the value passed can't be converted */ - public static REF_BASE toBase( char value ) { + public static REF_BASE toBase(char value) { + // for the case where they're passing in the enumated value + if (value <= 0x0F && value >= 0) { + return REF_BASE.values()[value]; + } String str = String.valueOf(value).toUpperCase(); for (int x = 0; x < REF_BASE.values().length; x++) { if (REF_BASE.values()[x].toString().equals(str)) { @@ -111,7 +116,7 @@ abstract class GLFRecord { private final short fieldValue; // in kilograms - RECORD_TYPE( short value ) { + RECORD_TYPE(short value) { fieldValue = value; } @@ -130,7 +135,7 @@ abstract class GLFRecord { * @param readDepth the read depth at this position * @param rmsMapQ the root mean square of the mapping quality */ - public GLFRecord( char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) { + public GLFRecord(char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) { REF_BASE newBase = REF_BASE.toBase(base); validateInput(newBase, offset, minimumLikelihood, readDepth, rmsMapQ); } @@ -144,7 +149,7 @@ abstract class GLFRecord { * @param readDepth the read depth at this position * @param rmsMapQ the root mean square of the mapping quality */ - GLFRecord( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) { + GLFRecord(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) { validateInput(base, offset, minimumLikelihood, readDepth, rmsMapQ); } @@ -157,7 +162,7 @@ abstract class GLFRecord { * @param readDepth the read depth at this position * @param rmsMapQ the root mean square of the mapping quality */ - private void validateInput( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) { + private void validateInput(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) { this.refBase = base; if (offset > 4294967295L || offset < 0) { throw new IllegalArgumentException("Offset is out of bounds (0 to 0xffffffff) value passed = " + offset); @@ -185,9 +190,10 @@ abstract class GLFRecord { * * @param out the binary codec to write to */ - void write( BinaryCodec out ) { - out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) )); - out.writeUInt(( (Long) offset ).intValue()); + void write(BinaryCodec out) { + short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f))); + out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f))); + out.writeUInt(((Long) offset).intValue()); out.writeUInt((new Long(readDepth).intValue())); out.writeUByte((short) rmsMapQ); } @@ -210,23 +216,27 @@ abstract class GLFRecord { /** * convert a double to a byte, capping it at the maximum value of 255 + * * @param d a double value - * @return a byte, capped at + * + * @return a byte, capped at */ protected static short toCappedShort(double d) { - return (d > 255.0) ? (byte)255 : (byte)Math.round(d); + return (d > 255.0) ? (short) 255 : (short) Math.round(d); } /** * find the minimum value in a set of doubles + * * @param vals + * * @return */ protected static double findMin(double vals[]) { if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in"); double min = vals[0]; - for (double d: vals) { + for (double d : vals) { if (d < min) min = d; } return min; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 31c158867..3efc3ba90 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -2,13 +2,12 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BlockCompressedOutputStream; - -import java.io.File; -import java.io.DataOutputStream; - import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.IndelLikelihood; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; + +import java.io.DataOutputStream; +import java.io.File; /* * Copyright (c) 2009 The Broad Institute * @@ -51,19 +50,17 @@ public class GLFWriter implements GenotypeWriter { // our header text, reference sequence name (i.e. chr1), and it's length private String headerText = ""; - private String referenceSequenceName = ""; + private String referenceSequenceName = null; private long referenceSequenceLength = 0; /** * The public constructor for creating a GLF object * - * @param headerText the header text (currently unclear what the contents are) - * @param referenceSequenceName the reference sequence name, i.e. "chr1", "chr2", etc + * @param headerText the header text (currently unclear what the contents are) + * @param writeTo the location to write to */ - public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) { + public GLFWriter(String headerText, File writeTo) { this.headerText = headerText; - this.referenceSequenceName = referenceSequenceName; - this.referenceSequenceLength = referenceSequenceLength; outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo))); outputBinaryCodec.setOutputFileName(writeTo.toString()); this.writeHeader(); @@ -72,21 +69,28 @@ public class GLFWriter implements GenotypeWriter { /** * add a point genotype to the GLF writer * - * @param refBase the reference base, as a char - * @param genomicLoc the location, as an offset from the previous glf record - * @param readDepth the read depth at the specified postion - * @param rmsMapQ the root mean square of the mapping quality - * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods + * @param contigName the name of the contig you're calling in + * @param contigLength the contig length + * @param refBase the reference base, as a char + * @param genomicLoc the location, as an offset from the previous glf record + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods */ @Override - public void addGenotypeCall( int genomicLoc, - float rmsMapQ, - char refBase, - int readDepth, - LikelihoodObject lhValues ) { + public void addGenotypeCall(String contigName, + int contigLength, + int genomicLoc, + float rmsMapQ, + char refBase, + int readDepth, + LikelihoodObject lhValues) { + // check if we've jumped to a new contig + checkSequence(contigName, contigLength); - SinglePointCall call = new SinglePointCall(refBase, genomicLoc, + SinglePointCall call = new SinglePointCall(refBase, + genomicLoc, readDepth, (short) rmsMapQ, lhValues.toDoubleArray()); @@ -96,6 +100,8 @@ public class GLFWriter implements GenotypeWriter { /** * add a variable length (indel, deletion, etc) to the genotype writer * + * @param contigName the name of the contig you're calling in + * @param contigLength the contig length * @param refBase the reference base * @param genomicLoc the location, as an offset from the previous glf record * @param readDepth the read depth at the specified postion @@ -105,23 +111,18 @@ public class GLFWriter implements GenotypeWriter { * @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255 */ @Override - public void addVariableLengthCall( int genomicLoc, - float rmsMapQ, - int readDepth, - char refBase, - IndelLikelihood firstHomZyg, - IndelLikelihood secondHomZyg, - byte hetLikelihood ) { + public void addVariableLengthCall(String contigName, + int contigLength, + int genomicLoc, + float rmsMapQ, + int readDepth, + char refBase, + IndelLikelihood firstHomZyg, + IndelLikelihood secondHomZyg, + byte hetLikelihood) { - // in this context, the minumum likelihood is lowest of the three options - double lowestLikelihood = Double.MAX_VALUE; - if (firstHomZyg.getLikelihood() < lowestLikelihood) { - lowestLikelihood = firstHomZyg.getLikelihood(); - } else if (secondHomZyg.getLikelihood() < lowestLikelihood) { - lowestLikelihood = secondHomZyg.getLikelihood(); - } else if (hetLikelihood < lowestLikelihood) { - lowestLikelihood = hetLikelihood; - } + // check if we've jumped to a new contig + checkSequence(contigName, contigLength); // normalize the two VariableLengthCall call = new VariableLengthCall(refBase, @@ -131,11 +132,12 @@ public class GLFWriter implements GenotypeWriter { firstHomZyg.getLikelihood(), secondHomZyg.getLikelihood(), hetLikelihood, + firstHomZyg.getLengthOfIndel(), firstHomZyg.getIndelSequence(), + secondHomZyg.getLengthOfIndel(), secondHomZyg.getIndelSequence()); call.write(this.outputBinaryCodec); - } /** @@ -145,7 +147,7 @@ public class GLFWriter implements GenotypeWriter { * @param readDepth the read depth */ @Override - public void addNoCall( int position, int readDepth ) { + public void addNoCall(int position, int readDepth) { // glf doesn't support this operation throw new UnsupportedOperationException("GLF doesn't support a 'no call' call."); } @@ -155,7 +157,8 @@ public class GLFWriter implements GenotypeWriter { * * @param rec the GLF record to write. */ - public void addGLFRecord( GLFRecord rec ) { + public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) { + checkSequence(contigName,contigLength); rec.write(this.outputBinaryCodec); } @@ -167,24 +170,52 @@ public class GLFWriter implements GenotypeWriter { */ private void writeHeader() { for (int x = 0; x < glfMagic.length; x++) { - outputBinaryCodec.writeByte(glfMagic[x]); + outputBinaryCodec.writeUByte(glfMagic[x]); } - if (!( headerText.equals("") )) { + if (!(headerText.equals(""))) { outputBinaryCodec.writeString(headerText, true, true); } else { outputBinaryCodec.writeInt(0); } + } + + /** + * check to see if we've jumped to a new contig + * + * @param sequenceName + * @param seqLength + */ + private void checkSequence(String sequenceName, int seqLength) { + if ((referenceSequenceName == null) || (!referenceSequenceName.equals(sequenceName))) { + if (this.referenceSequenceName != null) { // don't write the record the first time + this.writeEndRecord(); + } + referenceSequenceName = sequenceName; + referenceSequenceLength = seqLength; + addSequence(); + } + } + + + /** add a sequence definition to the glf */ + private void addSequence() { outputBinaryCodec.writeString(referenceSequenceName, true, true); outputBinaryCodec.writeUInt(referenceSequenceLength); } + /** write end record */ + private void writeEndRecord() { + outputBinaryCodec.writeUByte((short) 0); + } + + /** * close the file. You must close the file to ensure any remaining data gets * written out. */ @Override public void close() { - outputBinaryCodec.writeByte((byte) 0); + writeEndRecord(); outputBinaryCodec.close(); } @@ -195,21 +226,31 @@ public class GLFWriter implements GenotypeWriter { * * @return a byte array containing the normalized values */ - private byte[] normalizeToByte( double[] values ) { + private byte[] normalizeToByte(double[] values) { byte ret[] = new byte[values.length]; double min = Double.MAX_VALUE; double max = Double.MIN_VALUE; for (double d : values) { - min = ( d < min ) ? d : min; - max = ( d > max ) ? d : max; + min = (d < min) ? d : min; + max = (d > max) ? d : max; } double scale = max / 255.0; for (int x = 0; x < values.length; x++) { - ret[x] = (byte) ( ( values[x] - min ) / scale ); + ret[x] = (byte) ((values[x] - min) / scale); } return ret; } + /** + * get the reference sequence + * + * @return + */ + public String getReferenceSequenceName() { + return referenceSequenceName; + } + + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java index a09633172..2574e2e1e 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java @@ -43,15 +43,14 @@ class SinglePointCall extends GLFRecord { /** * create a single * - * @param refBase the reference base, as a char - * @param offset the location, as an offset from the previous glf record - * @param readDepth the read depth at the specified postion - * @param rmsMapQ the root mean square of the mapping quality - * @param likelihoods the Likelihoods + * @param refBase the reference base, as a char + * @param offset the location, as an offset from the previous glf record + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param likelihoods the Likelihoods */ - SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[] ) { + SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[]) { super(refBase, offset, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ); - this.likelihoods = likelihoods; } @@ -61,11 +60,15 @@ class SinglePointCall extends GLFRecord { * * @param out the codec to write to */ - void write( BinaryCodec out ) { + void write(BinaryCodec out) { super.write(out); + try { for (double likelihood : likelihoods) { out.writeUByte(GLFRecord.toCappedShort(likelihood)); } + } catch (Exception e) { + e.printStackTrace(); + } } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java index b228dc800..57f6d3606 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java @@ -53,31 +53,33 @@ class VariableLengthCall extends GLFRecord { /** * the default constructor * - * @param refBase the reference base - * @param offset the location, as an offset from the previous glf record - * @param readDepth the read depth at the specified postion - * @param rmsMapQ the root mean square of the mapping quality - * @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255 - * @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255 - * @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255 - * @param indelSeq1 the sequence for the first indel allele - * @param indelSeq2 the sequence for the second indel allele + * @param refBase the reference base + * @param offset the location, as an offset from the previous glf record + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255 + * @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255 + * @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255 + * @param indelSeq1 the sequence for the first indel allele + * @param indelSeq2 the sequence for the second indel allele */ - VariableLengthCall( char refBase, - long offset, - int readDepth, - short rmsMapQ, - double lkHom1, - double lkHom2, - double lkHet, - final short indelSeq1[], - final short indelSeq2[] ) { - super(refBase, offset, GLFRecord.toCappedShort(findMin(new double []{lkHom1,lkHom2,lkHet})), readDepth, rmsMapQ); + VariableLengthCall(char refBase, + long offset, + int readDepth, + short rmsMapQ, + double lkHom1, + double lkHom2, + double lkHet, + int indelOneLength, + final short indelSeq1[], + int indelTwoLength, + final short indelSeq2[]) { + super(refBase, offset, GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet})), readDepth, rmsMapQ); this.lkHom1 = GLFRecord.toCappedShort(lkHom1); this.lkHom2 = GLFRecord.toCappedShort(lkHom2); this.lkHet = GLFRecord.toCappedShort(lkHet); - this.indelLen1 = indelSeq1.length; - this.indelLen2 = indelSeq2.length; + this.indelLen1 = indelOneLength; + this.indelLen2 = indelTwoLength; this.indelSeq1 = indelSeq1; this.indelSeq2 = indelSeq2; size = 16 + indelSeq1.length + indelSeq2.length; @@ -89,7 +91,7 @@ class VariableLengthCall extends GLFRecord { * * @param out the binary codec to write to */ - void write( BinaryCodec out ) { + void write(BinaryCodec out) { super.write(out); out.writeByte(lkHom1); out.writeByte(lkHom2); @@ -109,7 +111,7 @@ class VariableLengthCall extends GLFRecord { return RECORD_TYPE.VARIABLE; } - /** @return the size of the record, which is the size of our fields plus the generic records fields */ + /** @return the size of the record, which is the size of our fields plus the generic records fields */ public int getByteSize() { return size + super.getByteSize(); } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java index 6bb168cb0..d9a1b4d29 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java @@ -1,14 +1,12 @@ package org.broadinstitute.sting.utils.genotype; -import org.junit.Test; +import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.junit.Test; import java.io.File; -import net.sf.samtools.SAMFileHeader; - /* * Copyright (c) 2009 The Broad Institute @@ -58,7 +56,7 @@ public class GeliAdapterTest extends BaseTest { SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10); adapter = new GeliAdapter(fl,header); LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods()); - adapter.addGenotypeCall(100,100,'A',100,obj); + adapter.addGenotypeCall("chr1",10,100,100,'A',100,obj); adapter.close(); } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java index 11f14bd71..ecd1df64b 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java @@ -1,11 +1,10 @@ package org.broadinstitute.sting.utils.genotype.glf; -import org.junit.Test; -import org.junit.Before; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.junit.Before; +import org.junit.Test; import java.io.File; @@ -88,7 +87,7 @@ public class GLFWriterTest extends BaseTest { @Test public void basicWrite() { - rec = new GLFWriter(header, referenceSequenceName, refLength, writeTo); + rec = new GLFWriter(header, writeTo); for (int x = 0; x < 100; x++) { addFakeSNP((int) Math.round(Math.random() * 9), 1); }