diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java
new file mode 100644
index 000000000..c9a031870
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java
@@ -0,0 +1,127 @@
+package org.broadinstitute.sting.gatk.refdata;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
+import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
+import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
+import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+
+
+/**
+ * @author aaron
+ *
+ * Class RodGLF
+ *
+ * the rod class for GLF data.
+ */
+public class RodGLF extends BasicReferenceOrderedDatum {
+ protected GLFRecord mRecord;
+ private GenomeLoc mLoc;
+ private static GLFRODIterator mWrap;
+ private String contig;
+ private int contigLength;
+
+ public RodGLF(String name) {
+ super(name);
+ }
+
+ @Override
+ public boolean parseLine(Object header, String[] parts) throws IOException {
+ return false; //To change body of implemented methods use File | Settings | File Templates.
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder builder = new StringBuilder();
+ builder.append(contig);
+ builder.append("\t");
+ builder.append(mRecord.getOffset());
+ builder.append("\t");
+ builder.append(mRecord.getRefBase());
+ builder.append("\t");
+ builder.append(mRecord.getReadDepth());
+ builder.append("\t");
+ builder.append(mRecord.getRmsMapQ());
+ builder.append("\t");
+ builder.append(mRecord.getMinimumLikelihood());
+
+ if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.SINGLE) {
+ for (double d : ((SinglePointCall) mRecord).getLikelihoods()) {
+ builder.append("\t");
+ builder.append(d);
+ }
+ } else if (mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) {
+ VariableLengthCall call = (VariableLengthCall) mRecord;
+ builder.append("\t");
+ builder.append(call.getLkHom1());
+ builder.append("\t");
+ builder.append(call.getLkHom2());
+ builder.append("\t");
+ builder.append(call.getLkHet());
+ builder.append("\t");
+ builder.append(call.getIndelLen1());
+ builder.append("\t");
+ builder.append(call.getIndelSeq1());
+ builder.append("\t");
+ builder.append(call.getIndelLen2());
+ builder.append("\t");
+ builder.append(call.getIndelSeq2());
+ }
+ return builder.toString();
+ }
+
+ @Override
+ public GenomeLoc getLocation() {
+ return mLoc;
+ }
+
+ public static Iterator createIterator(String name, File file) {
+ if (mWrap == null) {
+ return new RodGLF.GLFWrapper(name, file);
+ }
+ return mWrap;
+ }
+
+ static void setGLFWrapper(GLFRODIterator iter) {
+ mWrap = iter;
+ }
+
+ private static class GLFWrapper implements GLFRODIterator {
+ private GLFReader mReader;
+ private String mName;
+
+ public GLFWrapper(String name, File file) {
+ mName = name;
+ mReader = new GLFReader(file);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return (mReader.hasNext());
+ }
+
+ @Override
+ public RodGLF next() {
+ RodGLF glf = new RodGLF(mName);
+ glf.mRecord = mReader.next();
+ glf.mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation());
+ glf.contig = mReader.getReferenceName();
+ glf.contigLength = mReader.getReferenceLength();
+ return glf;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("GLF Rods don't support the remove() function");
+ }
+ }
+}
+
+// for testing
+interface GLFRODIterator extends Iterator {
+}
diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java
index 8e69af716..6ac926182 100644
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java
@@ -325,7 +325,11 @@ public class SingleSampleGenotyper extends LocusWalker {
// reference length
private int referenceLength;
+ // the current location, keeping track of the offsets
+ private int currentLocation = 1;
+
+ // we have this variable becuase there is no eof for glf's
+ private int lastRecordType = -1;
+
/**
* create a glf reader
*
* @param readFrom the file to read from
*/
- GLFReader(File readFrom) {
+ public GLFReader(File readFrom) {
try {
inputBinaryCodec = new BinaryCodec(new DataInputStream(new BlockCompressedInputStream(readFrom)));
} catch (IOException e) {
@@ -146,11 +153,15 @@ public class GLFReader implements Iterator {
@Override
public GLFRecord next() {
- short firstBase = inputBinaryCodec.readUByte();
+ GLFRecord ret = nextRecord;
+ short firstBase = protectedByteReadForFile();
+ if (firstBase == -1) return ret;
+
+ // parse out the record type and reference base
byte recordType = (byte) ((firstBase & 0x0f0) >> 4);
char refBase = (char) (firstBase & 0x000f);
+ lastRecordType = recordType;
- GLFRecord ret = nextRecord;
if (recordType == 1) {
nextRecord = generateSPC(refBase, inputBinaryCodec);
} else if (recordType == 2) {
@@ -159,14 +170,31 @@ public class GLFReader implements Iterator {
if (advanceContig()) {
return next();
}
- nextRecord = null;
+ //nextRecord = null;
} else {
throw new StingException("Unkonwn GLF record type (type = " + recordType + ")");
}
-
+ if (ret != null) currentLocation += ret.getOffset();
return ret;
}
+ /**
+ * read a short, and if we see an exception only throw it if it's unexpected (not after a zero)
+ * @return a short
+ */
+ private short protectedByteReadForFile() {
+ short st = -1;
+ try {
+ st = inputBinaryCodec.readUByte();
+ } catch (RuntimeEOFException exp) {
+ nextRecord = null;
+ if (lastRecordType != 0) {
+ throw exp; // if the last record was a zero, this is an ok condition. Otherwise throw an exception
+ }
+ }
+ return st;
+ }
+
/**
* advance to the next contig
*
@@ -181,22 +209,22 @@ public class GLFReader implements Iterator {
// get the reference length - this may be a problem storing an unsigned int into a signed int. but screw it.
referenceLength = (int) inputBinaryCodec.readUInt();
//System.err.println(referenceName.length());
+ currentLocation = 1;
return true;
} catch (RuntimeException e) {
- e.printStackTrace();
- // we're out of file space, set the next to null
+ if (lastRecordType != 0) {
+ throw e; // if the last record was a zero, this is an ok condition. Otherwise throw an exception
+ }
nextRecord = null;
}
return false;
}
-
@Override
public void remove() {
throw new StingException("GLFReader doesn't support remove()");
}
-
/**
* getter methods
*/
@@ -212,4 +240,8 @@ public class GLFReader implements Iterator {
public String getHeaderStr() {
return headerStr;
}
+
+ public int getCurrentLocation() {
+ return currentLocation;
+ }
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
index f107286aa..94017c8a7 100755
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
@@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.StingException;
* which represents the accepted FASTA nucleotide symbols and their assocated GLF
* field values.
*/
-abstract class GLFRecord {
+public abstract class GLFRecord {
// fields common to all records
protected REF_BASE refBase;
@@ -110,7 +110,7 @@ abstract class GLFRecord {
}
/** the record type enum, which enumerates the different records we can have in a GLF */
- enum RECORD_TYPE {
+ public enum RECORD_TYPE {
SINGLE((short) 1),
VARIABLE((short) 2);
@@ -243,5 +243,24 @@ abstract class GLFRecord {
return (min > 255) ? 255 : (short)min;
}
+ public REF_BASE getRefBase() {
+ return refBase;
+ }
+
+ public long getOffset() {
+ return offset;
+ }
+
+ public short getMinimumLikelihood() {
+ return minimumLikelihood;
+ }
+
+ public int getReadDepth() {
+ return readDepth;
+ }
+
+ public short getRmsMapQ() {
+ return rmsMapQ;
+ }
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java
index a8fb075c5..03d9d34db 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java
@@ -35,7 +35,7 @@ import net.sf.samtools.util.BinaryCodec;
*
* This class represents a single point geneotype call in GLF vernacular
*/
-class SinglePointCall extends GLFRecord {
+public class SinglePointCall extends GLFRecord {
// our likelihoods object
private double likelihoods[];
@@ -49,7 +49,7 @@ class SinglePointCall extends GLFRecord {
* @param rmsMapQ the root mean square of the mapping quality
* @param likelihoods the Likelihoods
*/
- SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[]) {
+ public SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[]) {
super(refBase, offset, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ);
this.likelihoods = likelihoods;
}
@@ -89,4 +89,7 @@ class SinglePointCall extends GLFRecord {
return likelihoods.length + super.getByteSize();
}
+ public double[] getLikelihoods() {
+ return likelihoods;
+ }
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java
index 57f6d3606..b2109cca4 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java
@@ -36,7 +36,7 @@ import net.sf.samtools.util.BinaryCodec;
* Currently a lot of parameters need to be provided, but we may be able to thin
* those down as we understand what we have to specify and what we can infer.
*/
-class VariableLengthCall extends GLFRecord {
+public class VariableLengthCall extends GLFRecord {
// our fields, corresponding to the glf spec
private short lkHom1 = 0;
private short lkHom2 = 0;
@@ -63,7 +63,7 @@ class VariableLengthCall extends GLFRecord {
* @param indelSeq1 the sequence for the first indel allele
* @param indelSeq2 the sequence for the second indel allele
*/
- VariableLengthCall(char refBase,
+ public VariableLengthCall(char refBase,
long offset,
int readDepth,
short rmsMapQ,
@@ -116,4 +116,31 @@ class VariableLengthCall extends GLFRecord {
return size + super.getByteSize();
}
+ public short getLkHom1() {
+ return lkHom1;
+ }
+
+ public short getLkHom2() {
+ return lkHom2;
+ }
+
+ public short getLkHet() {
+ return lkHet;
+ }
+
+ public short[] getIndelSeq1() {
+ return indelSeq1;
+ }
+
+ public short[] getIndelSeq2() {
+ return indelSeq2;
+ }
+
+ public int getIndelLen2() {
+ return indelLen2;
+ }
+
+ public int getIndelLen1() {
+ return indelLen1;
+ }
}
diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java
new file mode 100644
index 000000000..fd8b2e1c7
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodGLFTest.java
@@ -0,0 +1,107 @@
+package org.broadinstitute.sting.gatk.refdata;
+
+import net.sf.picard.reference.ReferenceSequenceFile;
+import net.sf.picard.reference.ReferenceSequenceFileFactory;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.File;
+import java.util.Iterator;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: aaronmckenna
+ * Date: Jul 15, 2009
+ * Time: 12:18:50 AM
+ */
+public class RodGLFTest extends BaseTest {
+ static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf");
+ static final int finalRecordCount = 484140; // the number of records in the above file
+ static final int contigCount = 25;
+ static final String ref = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
+
+ @BeforeClass
+ public static void before() {
+ ReferenceSequenceFile r = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(ref));
+ GenomeLocParser.setupRefContigOrdering(r);
+ }
+
+ static class TestRODClass implements GLFRODIterator {
+ private int count = 0;
+ private int readCount = 0;
+
+ public TestRODClass(int count) {
+ this.count = count;
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (count <= 0) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public RodGLF next() {
+ RodGLF glf = new RodGLF("Test");
+ glf.mRecord = new SinglePointCall('A', readCount, 0, (short) 0, new double[]{0.0, 0.0});
+ --count;
+ readCount++;
+ return glf;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("BOO");
+ }
+ }
+
+ //@Test
+ public void testNext() {
+ RodGLF.setGLFWrapper(new RodGLFTest.TestRODClass(10));
+ Iterator iter = RodGLF.createIterator("test", new File(""));
+ int counter = 0;
+ while (iter.hasNext()) {
+ iter.next();
+ counter++;
+ }
+ assertEquals(10, counter);
+ }
+
+ @Test
+ public void testRodCount() {
+
+ Iterator iter = RodGLF.createIterator("test", glfFile);
+ int counter = 0;
+ while (iter.hasNext()) {
+ RodGLF glf = iter.next();
+ counter++;
+ }
+ assertEquals(finalRecordCount, counter);
+ }
+
+ @Test
+ public void testRodLocations() {
+
+ Iterator iter = RodGLF.createIterator("test", glfFile);
+ GenomeLoc loc = null;
+ while (iter.hasNext()) {
+ RodGLF glf = iter.next();
+ if (loc != null) {
+ if (glf.getLocation().isBefore(loc)) {
+ fail("locations in the GLF came out of order loc = " + loc.toString() + " new loc = " + glf.getLocation().toString());
+ }
+ }
+ loc = glf.getLocation();
+ }
+ }
+
+
+}
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java
new file mode 100644
index 000000000..b0dd75c14
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFReaderTest.java
@@ -0,0 +1,51 @@
+package org.broadinstitute.sting.utils.genotype.glf;
+
+import org.broadinstitute.sting.BaseTest;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * @author aaron
+ *
+ * Class GLFReaderTest
+ *
+ * A descriptions should go here. Blame aaron if it's missing.
+ */
+public class GLFReaderTest extends BaseTest {
+
+
+ // our test file
+ static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf");
+ static final int finalRecordCount = 484140; // the number of records in the above file
+ static final int contigCount = 25;
+
+ /** read in the records from the file */
+ @Test
+ public void testReadRecords() {
+ int recCount = 0;
+ List contigs = new ArrayList();
+ try {
+ GLFReader reader = new GLFReader(glfFile);
+ long location = 1;
+ while (reader.hasNext()) {
+ GLFRecord rec = reader.next();
+ if (!contigs.contains(reader.getReferenceName())) {
+ contigs.add(reader.getReferenceName());
+ }
+ location = location + rec.offset;
+ //System.err.println("Record count = " + finalRecordCount + " offset " + rec.offset + " location = " + location + " type = " + rec.getRecordType());
+ ++recCount;
+ }
+ } catch (Exception e) {
+ System.err.println("Record count = " + recCount);
+ e.printStackTrace();
+ }
+ Assert.assertEquals(finalRecordCount, recCount);
+ Assert.assertEquals(contigCount, contigs.size());
+ }
+}