Changed the GLF record to store it's contig name and position in each record instead of in the Reader. Integration tests all stay the same.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2410 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-12-18 22:54:56 +00:00
parent 80b3eb85fa
commit 7e0f69dab5
11 changed files with 189 additions and 136 deletions

View File

@ -93,10 +93,8 @@ public class GenotypeWriterStorage implements GenotypeWriter, Storage<GenotypeWr
else if ( targetStream instanceof GLFWriter ) { else if ( targetStream instanceof GLFWriter ) {
GLFReader reader = new GLFReader(file); GLFReader reader = new GLFReader(file);
while ( reader.hasNext() ) { while ( reader.hasNext() ) {
// TODO -- Find out from Aaron if this is correct. Looking through the code, GLFRecord rec = reader.next();
// TODO -- it looks like this will exhibit the correct behavior - but it feels ((GLFWriter)targetStream).addGLFRecord(rec.getContig(),(int)rec.getPosition(),rec);
// TODO -- wrong that we get the contig/length of the record before we call next()
((GLFWriter)targetStream).addGLFRecord(reader.getReferenceName(), reader.getReferenceLength(), reader.next());
} }
reader.close(); reader.close();
} }

View File

@ -7,8 +7,8 @@ import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.glf.GLFReader; import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord; import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall; import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall; import org.broadinstitute.sting.utils.genotype.glf.GLFVariableLengthCall;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -27,7 +27,6 @@ import java.util.NoSuchElementException;
* the rod class for GLF data. * the rod class for GLF data.
*/ */
public class RodGLF implements VariationRod, Iterator<RodGLF> { public class RodGLF implements VariationRod, Iterator<RodGLF> {
static int count = 0;
public GLFReader mReader; public GLFReader mReader;
private final String mName; private final String mName;
private GenomeLoc mLoc; private GenomeLoc mLoc;
@ -75,16 +74,16 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
mRecord.getReadDepth(), mRecord.getReadDepth(),
mRecord.getRmsMapQ(), mRecord.getRmsMapQ(),
getBestGenotypeValue(1), getBestGenotypeValue(1),
((SinglePointCall) mRecord).getLikelihoods()[0], ((GLFSingleCall) mRecord).getLikelihoods()[0],
((SinglePointCall) mRecord).getLikelihoods()[1], ((GLFSingleCall) mRecord).getLikelihoods()[1],
((SinglePointCall) mRecord).getLikelihoods()[2], ((GLFSingleCall) mRecord).getLikelihoods()[2],
((SinglePointCall) mRecord).getLikelihoods()[3], ((GLFSingleCall) mRecord).getLikelihoods()[3],
((SinglePointCall) mRecord).getLikelihoods()[4], ((GLFSingleCall) mRecord).getLikelihoods()[4],
((SinglePointCall) mRecord).getLikelihoods()[5], ((GLFSingleCall) mRecord).getLikelihoods()[5],
((SinglePointCall) mRecord).getLikelihoods()[6], ((GLFSingleCall) mRecord).getLikelihoods()[6],
((SinglePointCall) mRecord).getLikelihoods()[7], ((GLFSingleCall) mRecord).getLikelihoods()[7],
((SinglePointCall) mRecord).getLikelihoods()[8], ((GLFSingleCall) mRecord).getLikelihoods()[8],
((SinglePointCall) mRecord).getLikelihoods()[9] ((GLFSingleCall) mRecord).getLikelihoods()[9]
); );
@ -209,7 +208,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
* @return a GENOTYPE object representing the nth best genotype * @return a GENOTYPE object representing the nth best genotype
*/ */
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) { public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods()); Integer[] sorted = Utils.SortPermutation(((GLFSingleCall) mRecord).getLikelihoods());
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest - 1]]; return LikelihoodObject.GENOTYPE.values()[sorted[nthBest - 1]];
} }
@ -222,8 +221,8 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
* @return a GENOTYPE object representing the nth best genotype * @return a GENOTYPE object representing the nth best genotype
*/ */
public double getBestGenotypeValue(int nthBest) { public double getBestGenotypeValue(int nthBest) {
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods()); Integer[] sorted = Utils.SortPermutation(((GLFSingleCall) mRecord).getLikelihoods());
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest - 1]]; return (((GLFSingleCall) mRecord).getLikelihoods())[sorted[nthBest - 1]];
} }
/** /**
@ -235,7 +234,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
@Override @Override
public boolean isInsertion() { public boolean isInsertion() {
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) && return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
((VariableLengthCall) mRecord).getIndelLen1() > 0); ((GLFVariableLengthCall) mRecord).getIndelLen1() > 0);
} }
/** /**
@ -247,7 +246,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
@Override @Override
public boolean isDeletion() { public boolean isDeletion() {
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) && return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
((VariableLengthCall) mRecord).getIndelLen1() < 0); ((GLFVariableLengthCall) mRecord).getIndelLen1() < 0);
} }
/** /**
@ -283,7 +282,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
if (g.toString().equals(ref)) break; if (g.toString().equals(ref)) break;
index++; index++;
} }
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR; return Math.abs(getBestGenotypeValue(1) - ((GLFSingleCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
} }
/** /**
@ -358,7 +357,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
public RodGLF next() { public RodGLF next() {
if (!this.hasNext()) throw new NoSuchElementException("RodGLF next called on iterator with no more elements"); if (!this.hasNext()) throw new NoSuchElementException("RodGLF next called on iterator with no more elements");
mRecord = mReader.next(); mRecord = mReader.next();
mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation(), mReader.getCurrentLocation()); mLoc = GenomeLocParser.createGenomeLoc(mRecord.getContig(), mRecord.getPosition(), mRecord.getPosition());
return this; return this;
} }

View File

@ -58,7 +58,7 @@ public class GLFReader implements Iterator<GLFRecord> {
private int referenceLength; private int referenceLength;
// the current location, keeping track of the offsets // the current location, keeping track of the offsets
private int currentLocation = 1; private long currentLocation = 1;
// we have this variable becuase there is no eof for glf's // we have this variable becuase there is no eof for glf's
private int lastRecordType = -1; private int lastRecordType = -1;
@ -100,7 +100,7 @@ public class GLFReader implements Iterator<GLFRecord> {
* *
* @return a single point call object * @return a single point call object
*/ */
private SinglePointCall generateSPC(char refBase, BinaryCodec inputBinaryCodec) { private GLFSingleCall generateSPC(char refBase, BinaryCodec inputBinaryCodec) {
int offset = (int) inputBinaryCodec.readUInt(); int offset = (int) inputBinaryCodec.readUInt();
long depth = inputBinaryCodec.readUInt(); long depth = inputBinaryCodec.readUInt();
short min_lk = (short) ((depth & 0x00000000ff000000) >> 24); short min_lk = (short) ((depth & 0x00000000ff000000) >> 24);
@ -110,7 +110,7 @@ public class GLFReader implements Iterator<GLFRecord> {
for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) { for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) {
lkValues[x] = (inputBinaryCodec.readUByte() / GLFRecord.LIKELIHOOD_SCALE_FACTOR + min_lk); lkValues[x] = (inputBinaryCodec.readUByte() / GLFRecord.LIKELIHOOD_SCALE_FACTOR + min_lk);
} }
return new SinglePointCall(refBase, offset, readDepth, rmsMapping, lkValues); return new GLFSingleCall(referenceName, refBase, (int)(offset+currentLocation), readDepth, rmsMapping, lkValues);
} }
/** /**
@ -119,9 +119,9 @@ public class GLFReader implements Iterator<GLFRecord> {
* @param refBase the reference base * @param refBase the reference base
* @param inputBinaryCodec the input codex * @param inputBinaryCodec the input codex
* *
* @return a VariableLengthCall object * @return a GLFVariableLengthCall object
*/ */
private VariableLengthCall generateVLC(char refBase, BinaryCodec inputBinaryCodec) { private GLFVariableLengthCall generateVLC(char refBase, BinaryCodec inputBinaryCodec) {
int offset = (int) inputBinaryCodec.readUInt(); int offset = (int) inputBinaryCodec.readUInt();
int depth = (int) inputBinaryCodec.readUInt(); int depth = (int) inputBinaryCodec.readUInt();
short min_lk = (short) ((depth & 0x00000000ff000000) >> 24); short min_lk = (short) ((depth & 0x00000000ff000000) >> 24);
@ -143,7 +143,7 @@ public class GLFReader implements Iterator<GLFRecord> {
for (int x = 0; x < readCnt; x++) { for (int x = 0; x < readCnt; x++) {
indelSeq2[x] = inputBinaryCodec.readUByte(); indelSeq2[x] = inputBinaryCodec.readUByte();
} }
return new VariableLengthCall(refBase, offset, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2); return new GLFVariableLengthCall(referenceName, refBase, offset+currentLocation, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2);
} }
public boolean hasNext() { public boolean hasNext() {
@ -172,7 +172,7 @@ public class GLFReader implements Iterator<GLFRecord> {
} else { } else {
throw new StingException("Unkonwn GLF record type (type = " + recordType + ")"); throw new StingException("Unkonwn GLF record type (type = " + recordType + ")");
} }
if (ret != null) currentLocation += ret.getOffset(); if (nextRecord != null) currentLocation = nextRecord.getPosition();
return ret; return ret;
} }
@ -225,24 +225,9 @@ public class GLFReader implements Iterator<GLFRecord> {
public void close() { public void close() {
inputBinaryCodec.close(); inputBinaryCodec.close();
} }
/**
* getter methods
*/
public String getReferenceName() {
return referenceName;
}
public int getReferenceLength() {
return referenceLength;
}
public String getHeaderStr() { public String getHeaderStr() {
return headerStr; return headerStr;
} }
public int getCurrentLocation() {
return currentLocation;
}
} }

View File

@ -40,12 +40,13 @@ import org.broadinstitute.sting.utils.StingException;
* field values. * field values.
*/ */
public abstract class GLFRecord { public abstract class GLFRecord {
public final static double LIKELIHOOD_SCALE_FACTOR = 10; public final static double LIKELIHOOD_SCALE_FACTOR = 10;
// fields common to all records // fields common to all records
protected String contig;
protected REF_BASE refBase; protected REF_BASE refBase;
protected long offset = 1; protected long position = 1;
protected short minimumLikelihood = 0; protected short minimumLikelihood = 0;
protected int readDepth = 0; protected int readDepth = 0;
protected short rmsMapQ = 0; protected short rmsMapQ = 0;
@ -72,7 +73,7 @@ public abstract class GLFRecord {
B((short) 0x0E), B((short) 0x0E),
N((short) 0x0F); N((short) 0x0F);
private final short fieldValue; // in kilograms private final short fieldValue;
/** /**
* private constructor, used by the enum class to makes each enum value * private constructor, used by the enum class to makes each enum value
@ -85,6 +86,7 @@ public abstract class GLFRecord {
/** /**
* return the character representation * return the character representation
*
* @return the char for the reference base * @return the char for the reference base
*/ */
public char toChar() { public char toChar() {
@ -139,45 +141,54 @@ public abstract class GLFRecord {
/** /**
* Constructor, given the base a character reference base * Constructor, given the base a character reference base
* *
* @param contig the contig string
* @param base the reference base in the reference * @param base the reference base in the reference
* @param offset the offset from the beginning of the reference seq * @param position the distance from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood * @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position * @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
*/ */
public GLFRecord(char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) { public GLFRecord(String contig, char base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
REF_BASE newBase = REF_BASE.toBase(base); REF_BASE newBase = REF_BASE.toBase(base);
validateInput(newBase, offset, minimumLikelihood, readDepth, rmsMapQ); validateInput(contig, newBase, position, minimumLikelihood, readDepth, rmsMapQ);
} }
/** /**
* Constructor, given the base a REF_BASE * Constructor, given the base a REF_BASE
* *
* @param contig the contig string
* @param base the reference base in the reference * @param base the reference base in the reference
* @param offset the offset from the beginning of the reference seq * @param position the distance from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood * @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position * @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
*/ */
GLFRecord(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) { GLFRecord(String contig, REF_BASE base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
validateInput(base, offset, minimumLikelihood, readDepth, rmsMapQ); validateInput(contig, base, position, minimumLikelihood, readDepth, rmsMapQ);
} }
/** /**
* validate the input during construction, and store valid values * validate the input during construction, and store valid values
* *
* @param chromosome the reference contig, as a String
* @param base the reference base in the reference, as a REF_BASE * @param base the reference base in the reference, as a REF_BASE
* @param offset the offset from the beginning of the reference seq * @param offset the offset from the last call
* @param position the distance from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood * @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position * @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
*/ */
private void validateInput(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) { private void validateInput(String chromosome, REF_BASE base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
// add any validation to the contig string here
this.contig = chromosome;
this.refBase = base; this.refBase = base;
if (offset > 4294967295L || offset < 0) {
throw new IllegalArgumentException("Offset is out of bounds (0 to 0xffffffff) value passed = " + offset); if (position > 4294967295L || position < 0) {
throw new IllegalArgumentException("Position is out of bounds (0 to 0xffffffff) value passed = " + position);
} }
this.offset = offset; this.position = position;
if (minimumLikelihood > 255 || minimumLikelihood < 0) { if (minimumLikelihood > 255 || minimumLikelihood < 0) {
throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood); throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood);
@ -200,11 +211,16 @@ public abstract class GLFRecord {
* *
* @param out the binary codec to write to * @param out the binary codec to write to
*/ */
void write(BinaryCodec out) { void write(BinaryCodec out, GLFRecord lastRecord) {
long offset = 0;
if (lastRecord != null && lastRecord.getContig() == this.getContig())
offset = this.position - lastRecord.getPosition();
else
offset = this.position - 1; // we start at one, we need to subtract that off
short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f))); short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f))); out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
out.writeUInt(((Long) offset).intValue()); out.writeUInt(((Long) (offset)).intValue()); // we have to subtract one, we're an offset
long write = (long) ((long)(readDepth & 0xffffff) | (long)(this.minimumLikelihood & 0xff) << 24); long write = (long) ((long) (readDepth & 0xffffff) | (long) (this.minimumLikelihood & 0xff) << 24);
out.writeUInt(write); out.writeUInt(write);
out.writeUByte((short) rmsMapQ); out.writeUByte((short) rmsMapQ);
} }
@ -257,8 +273,8 @@ public abstract class GLFRecord {
return refBase; return refBase;
} }
public long getOffset() { public long getPosition() {
return offset; return position;
} }
public short getMinimumLikelihood() { public short getMinimumLikelihood() {
@ -272,5 +288,9 @@ public abstract class GLFRecord {
public short getRmsMapQ() { public short getRmsMapQ() {
return rmsMapQ; return rmsMapQ;
} }
public String getContig() {
return this.contig;
}
} }

View File

@ -31,11 +31,11 @@ import net.sf.samtools.util.BinaryCodec;
/** /**
* @author aaron * @author aaron
* <p/> * <p/>
* Class SinglePointCall * Class GLFSingleCall
* <p/> * <p/>
* This class represents a single point geneotype call in GLF vernacular * This class represents a single point geneotype call in GLF vernacular
*/ */
public class SinglePointCall extends GLFRecord { public class GLFSingleCall extends GLFRecord {
// our likelihoods object // our likelihoods object
private double likelihoods[]; private double likelihoods[];
@ -43,14 +43,15 @@ public class SinglePointCall extends GLFRecord {
/** /**
* create a single * create a single
* *
* @param contig the contig this record is on
* @param refBase the reference base, as a char * @param refBase the reference base, as a char
* @param offset the location, as an offset from the previous glf record * @param position the location, as an offset from the start of the contig
* @param readDepth the read depth at the specified postion * @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
* @param likelihoods the Likelihoods * @param likelihoods the Likelihoods
*/ */
public SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[]) { public GLFSingleCall(String contig, char refBase, int position, int readDepth, short rmsMapQ, double likelihoods[]) {
super(refBase, offset, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ); super(contig, refBase, position, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ);
this.likelihoods = likelihoods; this.likelihoods = likelihoods;
} }
@ -60,8 +61,8 @@ public class SinglePointCall extends GLFRecord {
* *
* @param out the codec to write to * @param out the codec to write to
*/ */
void write(BinaryCodec out) { void write(BinaryCodec out, GLFRecord lastRec) {
super.write(out); super.write(out, lastRec);
short[] adjusted = new short[likelihoods.length]; short[] adjusted = new short[likelihoods.length];
// we want to scale our values // we want to scale our values
for (int x = 0; x < likelihoods.length; x++) { for (int x = 0; x < likelihoods.length; x++) {

View File

@ -30,13 +30,13 @@ import net.sf.samtools.util.BinaryCodec;
/** /**
* @author aaron * @author aaron
* <p/> * <p/>
* Class VariableLengthCall * Class GLFVariableLengthCall
* <p/> * <p/>
* This class represents variable length genotype calls in the GLF format. * This class represents variable length genotype calls in the GLF format.
* Currently a lot of parameters need to be provided, but we may be able to thin * Currently a lot of parameters need to be provided, but we may be able to thin
* those down as we understand what we have to specify and what we can infer. * those down as we understand what we have to specify and what we can infer.
*/ */
public class VariableLengthCall extends GLFRecord { public class GLFVariableLengthCall extends GLFRecord {
// our fields, corresponding to the glf spec // our fields, corresponding to the glf spec
private short lkHom1 = 0; private short lkHom1 = 0;
private short lkHom2 = 0; private short lkHom2 = 0;
@ -53,28 +53,32 @@ public class VariableLengthCall extends GLFRecord {
/** /**
* the default constructor * the default constructor
* *
* @param refBase the reference base * @param contig the contig this record is on
* @param offset the location, as an offset from the previous glf record * @param refBase the reference base
* @param readDepth the read depth at the specified postion * @param offset the location, as an offset from the previous glf record
* @param rmsMapQ the root mean square of the mapping quality * @param offset the location, as an offset from the previous glf record
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
* @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255 * @param readDepth the read depth at the specified postion
* @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255 * @param rmsMapQ the root mean square of the mapping quality
* @param indelSeq1 the sequence for the first indel allele * @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
* @param indelSeq2 the sequence for the second indel allele * @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
* @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
* @param indelSeq1 the sequence for the first indel allele
* @param indelSeq2 the sequence for the second indel allele
*/ */
public VariableLengthCall(char refBase, public GLFVariableLengthCall(String contig,
long offset, char refBase,
int readDepth, long offset,
short rmsMapQ, int readDepth,
double lkHom1, short rmsMapQ,
double lkHom2, double lkHom1,
double lkHet, double lkHom2,
int indelOneLength, double lkHet,
final short indelSeq1[], int indelOneLength,
int indelTwoLength, final short indelSeq1[],
final short indelSeq2[]) { int indelTwoLength,
super(refBase, offset, GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet})), readDepth, rmsMapQ); final short indelSeq2[]) {
super(contig, refBase, offset, GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet})), readDepth, rmsMapQ);
this.lkHom1 = GLFRecord.toCappedShort(lkHom1); this.lkHom1 = GLFRecord.toCappedShort(lkHom1);
this.lkHom2 = GLFRecord.toCappedShort(lkHom2); this.lkHom2 = GLFRecord.toCappedShort(lkHom2);
this.lkHet = GLFRecord.toCappedShort(lkHet); this.lkHet = GLFRecord.toCappedShort(lkHet);
@ -91,8 +95,8 @@ public class VariableLengthCall extends GLFRecord {
* *
* @param out the binary codec to write to * @param out the binary codec to write to
*/ */
void write(BinaryCodec out) { void write(BinaryCodec out, GLFRecord rec) {
super.write(out); super.write(out,rec);
out.writeByte(lkHom1); out.writeByte(lkHom1);
out.writeByte(lkHom2); out.writeByte(lkHom2);
out.writeByte(lkHet); out.writeByte(lkHet);

View File

@ -57,6 +57,9 @@ public class GLFWriter implements GenotypeWriter {
private String referenceSequenceName = null; private String referenceSequenceName = null;
private long referenceSequenceLength = 0; private long referenceSequenceLength = 0;
// we need to store the last record so we can calculate the offsets
private GLFRecord mLastRecord = null;
// the last position written // the last position written
private int lastPos = 1; private int lastPos = 1;
@ -122,13 +125,15 @@ public class GLFWriter implements GenotypeWriter {
// check if we've jumped to a new contig // check if we've jumped to a new contig
checkSequence(contig.getSequenceName(), contig.getSequenceLength()); checkSequence(contig.getSequenceName(), contig.getSequenceLength());
SinglePointCall call = new SinglePointCall(refBase, GLFSingleCall callGLF = new GLFSingleCall(contig.getSequenceName(),
genomicLoc - lastPos, refBase,
readDepth, genomicLoc,
(short) rmsMapQ, readDepth,
lhValues.toDoubleArray()); (short) rmsMapQ,
lhValues.toDoubleArray());
lastPos = genomicLoc; lastPos = genomicLoc;
call.write(this.outputBinaryCodec); callGLF.write(this.outputBinaryCodec,mLastRecord);
mLastRecord = callGLF;
} }
/** /**
@ -141,8 +146,8 @@ public class GLFWriter implements GenotypeWriter {
throw new IllegalStateException("The GLF Header must be written before calls can be added"); throw new IllegalStateException("The GLF Header must be written before calls can be added");
if ( !(call instanceof GLFGenotypeCall) ) if ( !(call instanceof GLFGenotypeCall) )
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); throw new IllegalArgumentException("Only GLFGenotypeCall should be passed in to the GLF writers");
GLFGenotypeCall gCall = (GLFGenotypeCall)call; GLFGenotypeCall gCall = (GLFGenotypeCall) call;
char ref = gCall.getReference(); char ref = gCall.getReference();
@ -152,10 +157,10 @@ public class GLFWriter implements GenotypeWriter {
// calculate the RMS mapping qualities and the read depth // calculate the RMS mapping qualities and the read depth
double rms = 0.0; double rms = 0.0;
if ( gCall.getPileup() != null ) if (gCall.getPileup() != null)
rms = calculateRMS(gCall.getPileup().getReads()); rms = calculateRMS(gCall.getPileup().getReads());
int readCount = gCall.getReadCount(); int readCount = gCall.getReadCount();
this.addGenotypeCall(GenomeLocParser.getContigInfo(gCall.getLocation().getContig()),(int)gCall.getLocation().getStart(),(float)rms,ref,readCount,obj); this.addGenotypeCall(GenomeLocParser.getContigInfo(gCall.getLocation().getContig()), (int) gCall.getLocation().getStart(), (float) rms, ref, readCount, obj);
} }
@ -173,6 +178,7 @@ public class GLFWriter implements GenotypeWriter {
} }
return MathUtils.rms(qualities); return MathUtils.rms(qualities);
} }
/** /**
* add a variable length (indel, deletion, etc) to the genotype writer * add a variable length (indel, deletion, etc) to the genotype writer
* *
@ -201,7 +207,8 @@ public class GLFWriter implements GenotypeWriter {
checkSequence(contig.getSequenceName(), contig.getSequenceLength()); checkSequence(contig.getSequenceName(), contig.getSequenceLength());
// normalize the two // normalize the two
VariableLengthCall call = new VariableLengthCall(refBase, GLFVariableLengthCall call = new GLFVariableLengthCall(contig.getSequenceName(),
refBase,
genomicLoc - lastPos, genomicLoc - lastPos,
readDepth, readDepth,
(short) rmsMapQ, (short) rmsMapQ,
@ -213,7 +220,8 @@ public class GLFWriter implements GenotypeWriter {
secondHomZyg.getLengthOfIndel(), secondHomZyg.getLengthOfIndel(),
secondHomZyg.getIndelSequence()); secondHomZyg.getIndelSequence());
lastPos = genomicLoc; lastPos = genomicLoc;
call.write(this.outputBinaryCodec); call.write(this.outputBinaryCodec,mLastRecord);
mLastRecord = call;
} }
/** /**
@ -238,7 +246,8 @@ public class GLFWriter implements GenotypeWriter {
throw new IllegalStateException("The GLF Header must be written before records can be added"); throw new IllegalStateException("The GLF Header must be written before records can be added");
checkSequence(contigName, contigLength); checkSequence(contigName, contigLength);
rec.write(this.outputBinaryCodec); rec.write(this.outputBinaryCodec,mLastRecord);
mLastRecord = rec;
} }
/** /**
@ -287,16 +296,6 @@ public class GLFWriter implements GenotypeWriter {
outputBinaryCodec.close(); outputBinaryCodec.close();
} }
/**
* get the reference sequence
*
* @return the reference sequence
*/
public String getReferenceSequenceName() {
return referenceSequenceName;
}
/** /**
* add a multi-sample call if we support it * add a multi-sample call if we support it
* *

View File

@ -87,14 +87,14 @@ public class RodGLFTest extends BaseTest {
@Test @Test
public void testCompareTo() { public void testCompareTo() {
RodGLF iter2 = RodGLF.createIterator("test", glfFile); RodGLF iter2 = RodGLF.createIterator("test", glfFile);
RodGLF glf = iter.next(); RodGLF glf = iter.next();
glf = iter2.next(); RodGLF glf2 = iter2.next();
assertEquals(0, iter.compareTo(iter2)); assertEquals(0, glf.compareTo(glf2));
RodGLF glf2 = iter.next(); glf2 = iter2.next();
assertEquals(-1, iter2.compareTo(iter)); assertEquals(-1, glf.compareTo(glf2));
assertEquals(1, iter.compareTo(iter2)); assertEquals(1, glf2.compareTo(glf));
} }

View File

@ -23,7 +23,6 @@ public class GLFReaderTest extends BaseTest {
static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf"); static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf");
//static final File glfFile = new File("CALLS.glf"); //static final File glfFile = new File("CALLS.glf");
static final int finalRecordCount = 484140; // the number of records in the above file static final int finalRecordCount = 484140; // the number of records in the above file
//static final int finalRecordCount = 484445;
static final int contigCount = 25; static final int contigCount = 25;
/** read in the records from the file */ /** read in the records from the file */
@ -36,10 +35,10 @@ public class GLFReaderTest extends BaseTest {
long location = 1; long location = 1;
while (reader.hasNext()) { while (reader.hasNext()) {
GLFRecord rec = reader.next(); GLFRecord rec = reader.next();
if (!contigs.contains(reader.getReferenceName())) { if (!contigs.contains(rec.getContig())) {
contigs.add(reader.getReferenceName()); contigs.add(rec.getContig());
} }
location = location + rec.offset; location = rec.getPosition();
//System.err.println("Record count = " + finalRecordCount + " offset " + rec.offset + " location = " + location + " type = " + rec.getRecordType()); //System.err.println("Record count = " + finalRecordCount + " offset " + rec.offset + " location = " + location + " type = " + rec.getRecordType());
++recCount; ++recCount;
} }

View File

@ -0,0 +1,35 @@
package org.broadinstitute.sting.utils.genotype.glf;
import org.broadinstitute.sting.BaseTest;
import org.junit.Assert;
import org.junit.Test;
/**
*
* @author aaron
*
* Class GLFRecordTest
*
* Test out the basics of a GLFRecord
*/
public class GLFRecordTest extends BaseTest {
@Test
public void testConstructingGLFRecord() {
double likelihoods[] = new double[10];
for (int i = 0; i < 10; i++) {
likelihoods[i] = 10.0;
}
GLFRecord rec = new GLFSingleCall("1",'A',1,100,(short)200,likelihoods);
Assert.assertTrue("1".equals(rec.contig));
Assert.assertEquals('A',rec.getRefBase().toChar());
Assert.assertEquals(1,rec.getPosition());
Assert.assertEquals(10,rec.getMinimumLikelihood());
Assert.assertEquals(200,rec.getRmsMapQ());
Assert.assertEquals(100,rec.getReadDepth());
}
}

View File

@ -54,8 +54,6 @@ public class GLFWriterTest extends BaseTest {
/** some made up values that we use to generate the GLF */ /** some made up values that we use to generate the GLF */
private final String header = ""; private final String header = "";
private final String referenceSequenceName = "chr1";
private final int refLength = 1000;
private static final int GENOTYPE_COUNT = 10; private static final int GENOTYPE_COUNT = 10;
private GenotypeWriter rec; private GenotypeWriter rec;
protected static final String[] genotypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"}; protected static final String[] genotypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"};
@ -78,7 +76,13 @@ public class GLFWriterTest extends BaseTest {
} }
/**
* create a fake genotype
* @param bestGenotype the best genotype, as an index into the array of values
* @param location the location we're creating the genotype at
* @param ref the reference base
* @return a FakeGenotype (a fake genotype)
*/
private FakeGenotype createGenotype(int bestGenotype, GenomeLoc location, char ref) { private FakeGenotype createGenotype(int bestGenotype, GenomeLoc location, char ref) {
double lk[] = new double[GENOTYPE_COUNT]; double lk[] = new double[GENOTYPE_COUNT];
for (int x = 0; x < GENOTYPE_COUNT; x++) { for (int x = 0; x < GENOTYPE_COUNT; x++) {
@ -89,6 +93,10 @@ public class GLFWriterTest extends BaseTest {
return new FakeGenotype(location, genotypes[bestGenotype], ref, SIGNIFICANCE, lk); return new FakeGenotype(location, genotypes[bestGenotype], ref, SIGNIFICANCE, lk);
} }
/**
* can we actually write a file?
*/
@Test @Test
public void basicWrite() { public void basicWrite() {
File writeTo = new File("testGLF.glf"); File writeTo = new File("testGLF.glf");
@ -106,6 +114,11 @@ public class GLFWriterTest extends BaseTest {
} }
/**
* write a bunch of fake records a GLF file, and then read it back from the
* same file. We want to make sure a round trip is successful; that we write
* and then read the same information back.
*/
@Test @Test
public void basicWriteThenRead() { public void basicWriteThenRead() {
File writeTo = new File("testGLF2.glf"); File writeTo = new File("testGLF2.glf");
@ -124,7 +137,7 @@ public class GLFWriterTest extends BaseTest {
int count = 0; int count = 0;
while (reader.hasNext()) { while (reader.hasNext()) {
GLFRecord rec = reader.next(); GLFRecord rec = reader.next();
Assert.assertTrue(types.get(count).compareTo(FakeGenotype.toFakeGenotype((SinglePointCall) rec, reader.getReferenceName(), reader.getCurrentLocation())) == 0); Assert.assertTrue(types.get(count).compareTo(FakeGenotype.toFakeGenotype((GLFSingleCall) rec, rec.getContig(), (int)rec.getPosition())) == 0);
count++; count++;
} }
} }
@ -185,7 +198,7 @@ class FakeGenotype extends GLFGenotypeCall implements Comparable<FakeGenotype> {
return 0; return 0;
} }
public static FakeGenotype toFakeGenotype(SinglePointCall record, String contig, int postition) { public static FakeGenotype toFakeGenotype(GLFSingleCall record, String contig, int postition) {
double likelihoods[] = record.getLikelihoods(); double likelihoods[] = record.getLikelihoods();
char ref = record.getRefBase().toChar(); char ref = record.getRefBase().toChar();
double significance = GLFWriterTest.SIGNIFICANCE; double significance = GLFWriterTest.SIGNIFICANCE;