diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java
index 282498f2e..ac54372fc 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java
@@ -1,6 +1,5 @@
package org.broadinstitute.sting.utils.genotype;
-import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import net.sf.samtools.SAMFileHeader;
@@ -35,9 +34,9 @@ import java.io.File;
/**
* @author aaron
* @version 1.0
- *
- * Class GeliAdapter
- * Adapts the Geli file writer to the Genotype writer interface
+ *
+ * Class GeliAdapter
+ * Adapts the Geli file writer to the Genotype writer interface
*/
public class GeliAdapter implements GenotypeWriter {
@@ -47,30 +46,41 @@ public class GeliAdapter implements GenotypeWriter {
/**
* wrap a GeliFileWriter in the Genotype writer interface
- * @param writeTo where to write to
+ *
+ * @param writeTo where to write to
* @param fileHeader the file header to write out
*/
- public GeliAdapter( File writeTo, final SAMFileHeader fileHeader ) {
- this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo,fileHeader);
+ public GeliAdapter(File writeTo, final SAMFileHeader fileHeader) {
+ this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
}
/**
* add a single point genotype call to the
*
+ * @param contigName the name of the contig you're calling in
+ * @param contigLength the contig length
* @param position the position on the contig
* @param referenceBase the reference base
* @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles
*/
@Override
- public void addGenotypeCall( int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods ) {
- writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte)referenceBase));
+ public void addGenotypeCall(String contigName,
+ int contigLength,
+ int position,
+ float rmsMapQuals,
+ char referenceBase,
+ int readDepth,
+ LikelihoodObject likelihoods) {
+ writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase));
}
/**
* add a variable length call to the genotyper
*
+ * @param contigName the name of the contig you're calling in
+ * @param contigLength the contig length
* @param position the position on the genome
* @param rmsMapQuals the root mean square of the mapping qualities
* @param readDepth the read depth
@@ -80,7 +90,7 @@ public class GeliAdapter implements GenotypeWriter {
* @param hetLikelihood the heterozygous likelihood
*/
@Override
- public void addVariableLengthCall( int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood ) {
+ public void addVariableLengthCall(String contigName, int contigLength, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
throw new UnsupportedOperationException("Geli format does not support variable length allele calls");
}
@@ -91,7 +101,7 @@ public class GeliAdapter implements GenotypeWriter {
* @param readDepth
*/
@Override
- public void addNoCall( int position, int readDepth ) {
+ public void addNoCall(int position, int readDepth) {
throw new UnsupportedOperationException("Geli format does not support no-calls");
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java
index 64b0af488..483754dd9 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java
@@ -1,8 +1,5 @@
package org.broadinstitute.sting.utils.genotype;
-import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
-
-
/*
* Copyright (c) 2009 The Broad Institute
*
@@ -29,22 +26,26 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
*/
/**
- *
- * @author aaron
- *
- * Class Genotype
- *
- * The interface for storing genotype calls.
+ * @author aaron
+ *
+ * Class Genotype
+ *
+ * The interface for storing genotype calls.
*/
public interface GenotypeWriter {
/**
* add a single point genotype call to the
- * @param position the position on the contig
+ *
+ * @param contigName the name of the contig you're calling in
+ * @param contigLength the contig length
+ * @param position the position on the contig
* @param referenceBase the reference base
- * @param readDepth the read depth at the specified position
- * @param likelihoods the likelihoods of each of the possible alleles
+ * @param readDepth the read depth at the specified position
+ * @param likelihoods the likelihoods of each of the possible alleles
*/
- public void addGenotypeCall(int position,
+ public void addGenotypeCall(String contigName,
+ int contigLength,
+ int position,
float rmsMapQuals,
char referenceBase,
int readDepth,
@@ -52,15 +53,20 @@ public interface GenotypeWriter {
/**
* add a variable length call to the genotyper
- * @param position the position on the genome
- * @param rmsMapQuals the root mean square of the mapping qualities
- * @param readDepth the read depth
- * @param refBase the reference base
- * @param firstHomZyg the first homozygous indel
- * @param secondHomZyg the second homozygous indel (if present, null if not)
+ *
+ * @param contigName the name of the contig you're calling in
+ * @param contigLength the contig length
+ * @param position the position on the genome
+ * @param rmsMapQuals the root mean square of the mapping qualities
+ * @param readDepth the read depth
+ * @param refBase the reference base
+ * @param firstHomZyg the first homozygous indel
+ * @param secondHomZyg the second homozygous indel (if present, null if not)
* @param hetLikelihood the heterozygous likelihood
*/
- public void addVariableLengthCall(int position,
+ public void addVariableLengthCall(String contigName,
+ int contigLength,
+ int position,
float rmsMapQuals,
int readDepth,
char refBase,
@@ -70,15 +76,14 @@ public interface GenotypeWriter {
/**
* add a no call to the genotype file, if supported.
+ *
* @param position
* @param readDepth
*/
public void addNoCall(int position,
int readDepth);
- /**
- * finish writing, closing any open files.
- */
+ /** finish writing, closing any open files. */
public void close();
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java
index 6d2ffd95f..c27ea6965 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java
@@ -1,15 +1,15 @@
package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.util.BinaryCodec;
-import net.sf.samtools.util.BlockCompressedOutputStream;
-
-import java.io.File;
-import java.io.DataOutputStream;
-import java.util.Iterator;
-
+import net.sf.samtools.util.BlockCompressedInputStream;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
/*
* Copyright (c) 2009 The Broad Institute
*
@@ -35,9 +35,7 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
* OTHER DEALINGS IN THE SOFTWARE.
*/
-/**
- * an object for reading in GLF files
- */
+/** an object for reading in GLF files */
public class GLFReader implements Iterator {
// our next record
@@ -58,62 +56,87 @@ public class GLFReader implements Iterator {
// reference length
private int referenceLength;
- GLFReader( File readFrom ) {
- inputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(readFrom)));
+ /**
+ * create a glf reader
+ *
+ * @param readFrom the file to read from
+ */
+ GLFReader(File readFrom) {
+ try {
+ inputBinaryCodec = new BinaryCodec(new DataInputStream(new BlockCompressedInputStream(readFrom)));
+ } catch (IOException e) {
+ throw new StingException("Unable to open " + readFrom.getName(), e);
+ }
inputBinaryCodec.setInputFileName(readFrom.getName());
// first verify that it's a valid GLF
- for (short s: glfMagic) {
- if (inputBinaryCodec.readUByte() != s) throw new StingException("Verification of GLF format failed: magic string doesn't match)");
+ for (short s : glfMagic) {
+ if (inputBinaryCodec.readUByte() != s)
+ throw new StingException("Verification of GLF format failed: magic string doesn't match)");
}
// get the header string
headerStr = inputBinaryCodec.readLengthAndString(false);
- // get the reference name
- referenceName = inputBinaryCodec.readLengthAndString(true);
+ if (advanceContig()) {
+ // setup the next record
+ next();
+ }
- // get the reference length - this may be a problem storing an unsigned int into a signed int. but screw it.
- referenceLength = (int)inputBinaryCodec.readUInt();
-
- // get the next record
- nextRecord = next();
}
+ /**
+ * read in a single point call
+ *
+ * @param refBase the reference base
+ * @param inputBinaryCodec the binary codec
+ *
+ * @return a single point call object
+ */
private SinglePointCall generateSPC(char refBase, BinaryCodec inputBinaryCodec) {
- int offset = (int)inputBinaryCodec.readUInt();
+ int offset = (int) inputBinaryCodec.readUInt();
long depth = inputBinaryCodec.readUInt();
- short min_lk = (short)((depth & 0x00000000ff000000) >> 24);
- int readDepth = (int)(depth & 0x0000000000ffffff);
+ short min_lk = (short) ((depth & 0x00000000ff000000) >> 24);
+ int readDepth = (int) (depth & 0x0000000000ffffff);
short rmsMapping = inputBinaryCodec.readUByte();
double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length];
for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) {
lkValues[x] = inputBinaryCodec.readUByte();
}
- return new SinglePointCall(refBase,offset,readDepth,rmsMapping,lkValues);
+ return new SinglePointCall(refBase, offset, readDepth, rmsMapping, lkValues);
}
-
+ /**
+ * read in a variable length call, and generate a VLC object from the data
+ *
+ * @param refBase the reference base
+ * @param inputBinaryCodec the input codex
+ *
+ * @return a VariableLengthCall object
+ */
private VariableLengthCall generateVLC(char refBase, BinaryCodec inputBinaryCodec) {
- int offset = (int)inputBinaryCodec.readUInt();
- int depth = (int)inputBinaryCodec.readUInt();
- short min_lk = (short)((depth & 0x00000000ff000000) >> 24);
+ int offset = (int) inputBinaryCodec.readUInt();
+ int depth = (int) inputBinaryCodec.readUInt();
+ short min_lk = (short) ((depth & 0x00000000ff000000) >> 24);
int readDepth = (depth & 0x0000000000ffffff);
short rmsMapping = inputBinaryCodec.readUByte();
short lkHom1 = inputBinaryCodec.readUByte();
short lkHom2 = inputBinaryCodec.readUByte();
short lkHet = inputBinaryCodec.readUByte();
- int indelLen1 = (int)inputBinaryCodec.readShort();
- int indelLen2 = (int)inputBinaryCodec.readShort();
- short[] indelSeq1 = new short[indelLen1];
- short[] indelSeq2 = new short[indelLen2];
- for (int x = 0; x < indelLen1; x++) {
+ int indelLen1 = (int) inputBinaryCodec.readShort();
+ int indelLen2 = (int) inputBinaryCodec.readShort();
+
+ int readCnt = Math.abs(indelLen1);
+ short indelSeq1[] = new short[readCnt];
+ for (int x = 0; x < readCnt; x++) {
indelSeq1[x] = inputBinaryCodec.readUByte();
}
- for (int x = 0; x < indelLen2; x++) {
- indelSeq2[x] = inputBinaryCodec.readUByte();
+ readCnt = Math.abs(indelLen2);
+ short indelSeq2[] = new short[readCnt];
+ for (int x = 0; x < readCnt; x++) {
+ indelSeq2[x] = inputBinaryCodec.readUByte();
}
- return new VariableLengthCall(refBase,offset,readDepth,rmsMapping,lkHom1,lkHom2,lkHet,indelSeq1,indelSeq2);
+ return new VariableLengthCall(refBase, offset, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2);
}
@Override
@@ -124,24 +147,69 @@ public class GLFReader implements Iterator {
@Override
public GLFRecord next() {
short firstBase = inputBinaryCodec.readUByte();
- byte recordType = (byte)(firstBase & 0x00f0 >> 4);
- char refBase = (char)(firstBase & 0x000f);
+ byte recordType = (byte) ((firstBase & 0x0f0) >> 4);
+ char refBase = (char) (firstBase & 0x000f);
GLFRecord ret = nextRecord;
if (recordType == 1) {
nextRecord = generateSPC(refBase, inputBinaryCodec);
- }
- else if (recordType == 2) {
- nextRecord = generateVLC(refBase, inputBinaryCodec);
- }
- else if (recordType == 0){
+ } else if (recordType == 2) {
+ nextRecord = generateVLC(refBase, inputBinaryCodec);
+ } else if (recordType == 0) {
+ if (advanceContig()) {
+ return next();
+ }
nextRecord = null;
+ } else {
+ throw new StingException("Unkonwn GLF record type (type = " + recordType + ")");
}
+
return ret;
}
+ /**
+ * advance to the next contig
+ *
+ * @return true if we could advance
+ */
+ private boolean advanceContig() {
+ // try to read the next sequence record
+ try {
+ // get the reference name
+ referenceName = inputBinaryCodec.readLengthAndString(true);
+
+ // get the reference length - this may be a problem storing an unsigned int into a signed int. but screw it.
+ referenceLength = (int) inputBinaryCodec.readUInt();
+ //System.err.println(referenceName.length());
+ return true;
+ } catch (RuntimeException e) {
+ e.printStackTrace();
+ // we're out of file space, set the next to null
+ nextRecord = null;
+ }
+ return false;
+ }
+
+
@Override
public void remove() {
- throw new StingException("I'm Sorry Dave, I can't let you do that (also GLFReader doesn't support remove()).");
+ throw new StingException("GLFReader doesn't support remove()");
+ }
+
+
+ /**
+ * getter methods
+ */
+
+ public String getReferenceName() {
+ return referenceName;
+ }
+
+ public int getReferenceLength() {
+ return referenceLength;
+ }
+
+ public String getHeaderStr() {
+ return headerStr;
}
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
index e2d285de6..e17d31eb3 100755
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
@@ -74,9 +74,10 @@ abstract class GLFRecord {
/**
* private constructor, used by the enum class to makes each enum value
+ *
* @param value the short values specified in the enum listing
*/
- REF_BASE( short value ) {
+ REF_BASE(short value) {
fieldValue = value;
}
@@ -88,7 +89,11 @@ abstract class GLFRecord {
* @return the corresponding REF_BASE
* @throws IllegalArgumentException if the value passed can't be converted
*/
- public static REF_BASE toBase( char value ) {
+ public static REF_BASE toBase(char value) {
+ // for the case where they're passing in the enumated value
+ if (value <= 0x0F && value >= 0) {
+ return REF_BASE.values()[value];
+ }
String str = String.valueOf(value).toUpperCase();
for (int x = 0; x < REF_BASE.values().length; x++) {
if (REF_BASE.values()[x].toString().equals(str)) {
@@ -111,7 +116,7 @@ abstract class GLFRecord {
private final short fieldValue; // in kilograms
- RECORD_TYPE( short value ) {
+ RECORD_TYPE(short value) {
fieldValue = value;
}
@@ -130,7 +135,7 @@ abstract class GLFRecord {
* @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality
*/
- public GLFRecord( char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) {
+ public GLFRecord(char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) {
REF_BASE newBase = REF_BASE.toBase(base);
validateInput(newBase, offset, minimumLikelihood, readDepth, rmsMapQ);
}
@@ -144,7 +149,7 @@ abstract class GLFRecord {
* @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality
*/
- GLFRecord( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) {
+ GLFRecord(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) {
validateInput(base, offset, minimumLikelihood, readDepth, rmsMapQ);
}
@@ -157,7 +162,7 @@ abstract class GLFRecord {
* @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality
*/
- private void validateInput( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) {
+ private void validateInput(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) {
this.refBase = base;
if (offset > 4294967295L || offset < 0) {
throw new IllegalArgumentException("Offset is out of bounds (0 to 0xffffffff) value passed = " + offset);
@@ -185,9 +190,10 @@ abstract class GLFRecord {
*
* @param out the binary codec to write to
*/
- void write( BinaryCodec out ) {
- out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) ));
- out.writeUInt(( (Long) offset ).intValue());
+ void write(BinaryCodec out) {
+ short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
+ out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
+ out.writeUInt(((Long) offset).intValue());
out.writeUInt((new Long(readDepth).intValue()));
out.writeUByte((short) rmsMapQ);
}
@@ -210,23 +216,27 @@ abstract class GLFRecord {
/**
* convert a double to a byte, capping it at the maximum value of 255
+ *
* @param d a double value
- * @return a byte, capped at
+ *
+ * @return a byte, capped at
*/
protected static short toCappedShort(double d) {
- return (d > 255.0) ? (byte)255 : (byte)Math.round(d);
+ return (d > 255.0) ? (short) 255 : (short) Math.round(d);
}
/**
* find the minimum value in a set of doubles
+ *
* @param vals
+ *
* @return
*/
protected static double findMin(double vals[]) {
if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in");
double min = vals[0];
- for (double d: vals) {
+ for (double d : vals) {
if (d < min) min = d;
}
return min;
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
index 31c158867..3efc3ba90 100755
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
@@ -2,13 +2,12 @@ package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream;
-
-import java.io.File;
-import java.io.DataOutputStream;
-
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+
+import java.io.DataOutputStream;
+import java.io.File;
/*
* Copyright (c) 2009 The Broad Institute
*
@@ -51,19 +50,17 @@ public class GLFWriter implements GenotypeWriter {
// our header text, reference sequence name (i.e. chr1), and it's length
private String headerText = "";
- private String referenceSequenceName = "";
+ private String referenceSequenceName = null;
private long referenceSequenceLength = 0;
/**
* The public constructor for creating a GLF object
*
- * @param headerText the header text (currently unclear what the contents are)
- * @param referenceSequenceName the reference sequence name, i.e. "chr1", "chr2", etc
+ * @param headerText the header text (currently unclear what the contents are)
+ * @param writeTo the location to write to
*/
- public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) {
+ public GLFWriter(String headerText, File writeTo) {
this.headerText = headerText;
- this.referenceSequenceName = referenceSequenceName;
- this.referenceSequenceLength = referenceSequenceLength;
outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo)));
outputBinaryCodec.setOutputFileName(writeTo.toString());
this.writeHeader();
@@ -72,21 +69,28 @@ public class GLFWriter implements GenotypeWriter {
/**
* add a point genotype to the GLF writer
*
- * @param refBase the reference base, as a char
- * @param genomicLoc the location, as an offset from the previous glf record
- * @param readDepth the read depth at the specified postion
- * @param rmsMapQ the root mean square of the mapping quality
- * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
+ * @param contigName the name of the contig you're calling in
+ * @param contigLength the contig length
+ * @param refBase the reference base, as a char
+ * @param genomicLoc the location, as an offset from the previous glf record
+ * @param readDepth the read depth at the specified postion
+ * @param rmsMapQ the root mean square of the mapping quality
+ * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
*/
@Override
- public void addGenotypeCall( int genomicLoc,
- float rmsMapQ,
- char refBase,
- int readDepth,
- LikelihoodObject lhValues ) {
+ public void addGenotypeCall(String contigName,
+ int contigLength,
+ int genomicLoc,
+ float rmsMapQ,
+ char refBase,
+ int readDepth,
+ LikelihoodObject lhValues) {
+ // check if we've jumped to a new contig
+ checkSequence(contigName, contigLength);
- SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
+ SinglePointCall call = new SinglePointCall(refBase,
+ genomicLoc,
readDepth,
(short) rmsMapQ,
lhValues.toDoubleArray());
@@ -96,6 +100,8 @@ public class GLFWriter implements GenotypeWriter {
/**
* add a variable length (indel, deletion, etc) to the genotype writer
*
+ * @param contigName the name of the contig you're calling in
+ * @param contigLength the contig length
* @param refBase the reference base
* @param genomicLoc the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
@@ -105,23 +111,18 @@ public class GLFWriter implements GenotypeWriter {
* @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255
*/
@Override
- public void addVariableLengthCall( int genomicLoc,
- float rmsMapQ,
- int readDepth,
- char refBase,
- IndelLikelihood firstHomZyg,
- IndelLikelihood secondHomZyg,
- byte hetLikelihood ) {
+ public void addVariableLengthCall(String contigName,
+ int contigLength,
+ int genomicLoc,
+ float rmsMapQ,
+ int readDepth,
+ char refBase,
+ IndelLikelihood firstHomZyg,
+ IndelLikelihood secondHomZyg,
+ byte hetLikelihood) {
- // in this context, the minumum likelihood is lowest of the three options
- double lowestLikelihood = Double.MAX_VALUE;
- if (firstHomZyg.getLikelihood() < lowestLikelihood) {
- lowestLikelihood = firstHomZyg.getLikelihood();
- } else if (secondHomZyg.getLikelihood() < lowestLikelihood) {
- lowestLikelihood = secondHomZyg.getLikelihood();
- } else if (hetLikelihood < lowestLikelihood) {
- lowestLikelihood = hetLikelihood;
- }
+ // check if we've jumped to a new contig
+ checkSequence(contigName, contigLength);
// normalize the two
VariableLengthCall call = new VariableLengthCall(refBase,
@@ -131,11 +132,12 @@ public class GLFWriter implements GenotypeWriter {
firstHomZyg.getLikelihood(),
secondHomZyg.getLikelihood(),
hetLikelihood,
+ firstHomZyg.getLengthOfIndel(),
firstHomZyg.getIndelSequence(),
+ secondHomZyg.getLengthOfIndel(),
secondHomZyg.getIndelSequence());
call.write(this.outputBinaryCodec);
-
}
/**
@@ -145,7 +147,7 @@ public class GLFWriter implements GenotypeWriter {
* @param readDepth the read depth
*/
@Override
- public void addNoCall( int position, int readDepth ) {
+ public void addNoCall(int position, int readDepth) {
// glf doesn't support this operation
throw new UnsupportedOperationException("GLF doesn't support a 'no call' call.");
}
@@ -155,7 +157,8 @@ public class GLFWriter implements GenotypeWriter {
*
* @param rec the GLF record to write.
*/
- public void addGLFRecord( GLFRecord rec ) {
+ public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
+ checkSequence(contigName,contigLength);
rec.write(this.outputBinaryCodec);
}
@@ -167,24 +170,52 @@ public class GLFWriter implements GenotypeWriter {
*/
private void writeHeader() {
for (int x = 0; x < glfMagic.length; x++) {
- outputBinaryCodec.writeByte(glfMagic[x]);
+ outputBinaryCodec.writeUByte(glfMagic[x]);
}
- if (!( headerText.equals("") )) {
+ if (!(headerText.equals(""))) {
outputBinaryCodec.writeString(headerText, true, true);
} else {
outputBinaryCodec.writeInt(0);
}
+ }
+
+ /**
+ * check to see if we've jumped to a new contig
+ *
+ * @param sequenceName
+ * @param seqLength
+ */
+ private void checkSequence(String sequenceName, int seqLength) {
+ if ((referenceSequenceName == null) || (!referenceSequenceName.equals(sequenceName))) {
+ if (this.referenceSequenceName != null) { // don't write the record the first time
+ this.writeEndRecord();
+ }
+ referenceSequenceName = sequenceName;
+ referenceSequenceLength = seqLength;
+ addSequence();
+ }
+ }
+
+
+ /** add a sequence definition to the glf */
+ private void addSequence() {
outputBinaryCodec.writeString(referenceSequenceName, true, true);
outputBinaryCodec.writeUInt(referenceSequenceLength);
}
+ /** write end record */
+ private void writeEndRecord() {
+ outputBinaryCodec.writeUByte((short) 0);
+ }
+
+
/**
* close the file. You must close the file to ensure any remaining data gets
* written out.
*/
@Override
public void close() {
- outputBinaryCodec.writeByte((byte) 0);
+ writeEndRecord();
outputBinaryCodec.close();
}
@@ -195,21 +226,31 @@ public class GLFWriter implements GenotypeWriter {
*
* @return a byte array containing the normalized values
*/
- private byte[] normalizeToByte( double[] values ) {
+ private byte[] normalizeToByte(double[] values) {
byte ret[] = new byte[values.length];
double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE;
for (double d : values) {
- min = ( d < min ) ? d : min;
- max = ( d > max ) ? d : max;
+ min = (d < min) ? d : min;
+ max = (d > max) ? d : max;
}
double scale = max / 255.0;
for (int x = 0; x < values.length; x++) {
- ret[x] = (byte) ( ( values[x] - min ) / scale );
+ ret[x] = (byte) ((values[x] - min) / scale);
}
return ret;
}
+ /**
+ * get the reference sequence
+ *
+ * @return
+ */
+ public String getReferenceSequenceName() {
+ return referenceSequenceName;
+ }
+
+
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java
index a09633172..2574e2e1e 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java
@@ -43,15 +43,14 @@ class SinglePointCall extends GLFRecord {
/**
* create a single
*
- * @param refBase the reference base, as a char
- * @param offset the location, as an offset from the previous glf record
- * @param readDepth the read depth at the specified postion
- * @param rmsMapQ the root mean square of the mapping quality
- * @param likelihoods the Likelihoods
+ * @param refBase the reference base, as a char
+ * @param offset the location, as an offset from the previous glf record
+ * @param readDepth the read depth at the specified postion
+ * @param rmsMapQ the root mean square of the mapping quality
+ * @param likelihoods the Likelihoods
*/
- SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[] ) {
+ SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[]) {
super(refBase, offset, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ);
-
this.likelihoods = likelihoods;
}
@@ -61,11 +60,15 @@ class SinglePointCall extends GLFRecord {
*
* @param out the codec to write to
*/
- void write( BinaryCodec out ) {
+ void write(BinaryCodec out) {
super.write(out);
+ try {
for (double likelihood : likelihoods) {
out.writeUByte(GLFRecord.toCappedShort(likelihood));
}
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
}
/**
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java
index b228dc800..57f6d3606 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java
@@ -53,31 +53,33 @@ class VariableLengthCall extends GLFRecord {
/**
* the default constructor
*
- * @param refBase the reference base
- * @param offset the location, as an offset from the previous glf record
- * @param readDepth the read depth at the specified postion
- * @param rmsMapQ the root mean square of the mapping quality
- * @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
- * @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
- * @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
- * @param indelSeq1 the sequence for the first indel allele
- * @param indelSeq2 the sequence for the second indel allele
+ * @param refBase the reference base
+ * @param offset the location, as an offset from the previous glf record
+ * @param readDepth the read depth at the specified postion
+ * @param rmsMapQ the root mean square of the mapping quality
+ * @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
+ * @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
+ * @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
+ * @param indelSeq1 the sequence for the first indel allele
+ * @param indelSeq2 the sequence for the second indel allele
*/
- VariableLengthCall( char refBase,
- long offset,
- int readDepth,
- short rmsMapQ,
- double lkHom1,
- double lkHom2,
- double lkHet,
- final short indelSeq1[],
- final short indelSeq2[] ) {
- super(refBase, offset, GLFRecord.toCappedShort(findMin(new double []{lkHom1,lkHom2,lkHet})), readDepth, rmsMapQ);
+ VariableLengthCall(char refBase,
+ long offset,
+ int readDepth,
+ short rmsMapQ,
+ double lkHom1,
+ double lkHom2,
+ double lkHet,
+ int indelOneLength,
+ final short indelSeq1[],
+ int indelTwoLength,
+ final short indelSeq2[]) {
+ super(refBase, offset, GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet})), readDepth, rmsMapQ);
this.lkHom1 = GLFRecord.toCappedShort(lkHom1);
this.lkHom2 = GLFRecord.toCappedShort(lkHom2);
this.lkHet = GLFRecord.toCappedShort(lkHet);
- this.indelLen1 = indelSeq1.length;
- this.indelLen2 = indelSeq2.length;
+ this.indelLen1 = indelOneLength;
+ this.indelLen2 = indelTwoLength;
this.indelSeq1 = indelSeq1;
this.indelSeq2 = indelSeq2;
size = 16 + indelSeq1.length + indelSeq2.length;
@@ -89,7 +91,7 @@ class VariableLengthCall extends GLFRecord {
*
* @param out the binary codec to write to
*/
- void write( BinaryCodec out ) {
+ void write(BinaryCodec out) {
super.write(out);
out.writeByte(lkHom1);
out.writeByte(lkHom2);
@@ -109,7 +111,7 @@ class VariableLengthCall extends GLFRecord {
return RECORD_TYPE.VARIABLE;
}
- /** @return the size of the record, which is the size of our fields plus the generic records fields */
+ /** @return the size of the record, which is the size of our fields plus the generic records fields */
public int getByteSize() {
return size + super.getByteSize();
}
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java
index 6bb168cb0..d9a1b4d29 100644
--- a/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java
+++ b/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java
@@ -1,14 +1,12 @@
package org.broadinstitute.sting.utils.genotype;
-import org.junit.Test;
+import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
-import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+import org.junit.Test;
import java.io.File;
-import net.sf.samtools.SAMFileHeader;
-
/*
* Copyright (c) 2009 The Broad Institute
@@ -58,7 +56,7 @@ public class GeliAdapterTest extends BaseTest {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10);
adapter = new GeliAdapter(fl,header);
LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods());
- adapter.addGenotypeCall(100,100,'A',100,obj);
+ adapter.addGenotypeCall("chr1",10,100,100,'A',100,obj);
adapter.close();
}
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java
index 11f14bd71..ecd1df64b 100755
--- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java
+++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java
@@ -1,11 +1,10 @@
package org.broadinstitute.sting.utils.genotype.glf;
-import org.junit.Test;
-import org.junit.Before;
import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
-import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+import org.junit.Before;
+import org.junit.Test;
import java.io.File;
@@ -88,7 +87,7 @@ public class GLFWriterTest extends BaseTest {
@Test
public void basicWrite() {
- rec = new GLFWriter(header, referenceSequenceName, refLength, writeTo);
+ rec = new GLFWriter(header, writeTo);
for (int x = 0; x < 100; x++) {
addFakeSNP((int) Math.round(Math.random() * 9), 1);
}