Changed the GLF record to store it's contig name and position in each record instead of in the Reader. Integration tests all stay the same.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2410 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
80b3eb85fa
commit
7e0f69dab5
|
|
@ -93,10 +93,8 @@ public class GenotypeWriterStorage implements GenotypeWriter, Storage<GenotypeWr
|
|||
else if ( targetStream instanceof GLFWriter ) {
|
||||
GLFReader reader = new GLFReader(file);
|
||||
while ( reader.hasNext() ) {
|
||||
// TODO -- Find out from Aaron if this is correct. Looking through the code,
|
||||
// TODO -- it looks like this will exhibit the correct behavior - but it feels
|
||||
// TODO -- wrong that we get the contig/length of the record before we call next()
|
||||
((GLFWriter)targetStream).addGLFRecord(reader.getReferenceName(), reader.getReferenceLength(), reader.next());
|
||||
GLFRecord rec = reader.next();
|
||||
((GLFWriter)targetStream).addGLFRecord(rec.getContig(),(int)rec.getPosition(),rec);
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.SinglePointCall;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.VariableLengthCall;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFVariableLengthCall;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -27,7 +27,6 @@ import java.util.NoSuchElementException;
|
|||
* the rod class for GLF data.
|
||||
*/
|
||||
public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
||||
static int count = 0;
|
||||
public GLFReader mReader;
|
||||
private final String mName;
|
||||
private GenomeLoc mLoc;
|
||||
|
|
@ -75,16 +74,16 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
mRecord.getReadDepth(),
|
||||
mRecord.getRmsMapQ(),
|
||||
getBestGenotypeValue(1),
|
||||
((SinglePointCall) mRecord).getLikelihoods()[0],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[1],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[2],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[3],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[4],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[5],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[6],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[7],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[8],
|
||||
((SinglePointCall) mRecord).getLikelihoods()[9]
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[0],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[1],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[2],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[3],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[4],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[5],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[6],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[7],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[8],
|
||||
((GLFSingleCall) mRecord).getLikelihoods()[9]
|
||||
|
||||
|
||||
);
|
||||
|
|
@ -209,7 +208,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
* @return a GENOTYPE object representing the nth best genotype
|
||||
*/
|
||||
public LikelihoodObject.GENOTYPE getBestGenotype(int nthBest) {
|
||||
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
||||
Integer[] sorted = Utils.SortPermutation(((GLFSingleCall) mRecord).getLikelihoods());
|
||||
return LikelihoodObject.GENOTYPE.values()[sorted[nthBest - 1]];
|
||||
}
|
||||
|
||||
|
|
@ -222,8 +221,8 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
* @return a GENOTYPE object representing the nth best genotype
|
||||
*/
|
||||
public double getBestGenotypeValue(int nthBest) {
|
||||
Integer[] sorted = Utils.SortPermutation(((SinglePointCall) mRecord).getLikelihoods());
|
||||
return (((SinglePointCall) mRecord).getLikelihoods())[sorted[nthBest - 1]];
|
||||
Integer[] sorted = Utils.SortPermutation(((GLFSingleCall) mRecord).getLikelihoods());
|
||||
return (((GLFSingleCall) mRecord).getLikelihoods())[sorted[nthBest - 1]];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -235,7 +234,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
@Override
|
||||
public boolean isInsertion() {
|
||||
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
|
||||
((VariableLengthCall) mRecord).getIndelLen1() > 0);
|
||||
((GLFVariableLengthCall) mRecord).getIndelLen1() > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -247,7 +246,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
@Override
|
||||
public boolean isDeletion() {
|
||||
return ((mRecord.getRecordType() == GLFRecord.RECORD_TYPE.VARIABLE) &&
|
||||
((VariableLengthCall) mRecord).getIndelLen1() < 0);
|
||||
((GLFVariableLengthCall) mRecord).getIndelLen1() < 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -283,7 +282,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
if (g.toString().equals(ref)) break;
|
||||
index++;
|
||||
}
|
||||
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
return Math.abs(getBestGenotypeValue(1) - ((GLFSingleCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -358,7 +357,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
public RodGLF next() {
|
||||
if (!this.hasNext()) throw new NoSuchElementException("RodGLF next called on iterator with no more elements");
|
||||
mRecord = mReader.next();
|
||||
mLoc = GenomeLocParser.createGenomeLoc(mReader.getReferenceName(), mReader.getCurrentLocation(), mReader.getCurrentLocation());
|
||||
mLoc = GenomeLocParser.createGenomeLoc(mRecord.getContig(), mRecord.getPosition(), mRecord.getPosition());
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ public class GLFReader implements Iterator<GLFRecord> {
|
|||
private int referenceLength;
|
||||
|
||||
// the current location, keeping track of the offsets
|
||||
private int currentLocation = 1;
|
||||
private long currentLocation = 1;
|
||||
|
||||
// we have this variable becuase there is no eof for glf's
|
||||
private int lastRecordType = -1;
|
||||
|
|
@ -100,7 +100,7 @@ public class GLFReader implements Iterator<GLFRecord> {
|
|||
*
|
||||
* @return a single point call object
|
||||
*/
|
||||
private SinglePointCall generateSPC(char refBase, BinaryCodec inputBinaryCodec) {
|
||||
private GLFSingleCall generateSPC(char refBase, BinaryCodec inputBinaryCodec) {
|
||||
int offset = (int) inputBinaryCodec.readUInt();
|
||||
long depth = inputBinaryCodec.readUInt();
|
||||
short min_lk = (short) ((depth & 0x00000000ff000000) >> 24);
|
||||
|
|
@ -110,7 +110,7 @@ public class GLFReader implements Iterator<GLFRecord> {
|
|||
for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) {
|
||||
lkValues[x] = (inputBinaryCodec.readUByte() / GLFRecord.LIKELIHOOD_SCALE_FACTOR + min_lk);
|
||||
}
|
||||
return new SinglePointCall(refBase, offset, readDepth, rmsMapping, lkValues);
|
||||
return new GLFSingleCall(referenceName, refBase, (int)(offset+currentLocation), readDepth, rmsMapping, lkValues);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -119,9 +119,9 @@ public class GLFReader implements Iterator<GLFRecord> {
|
|||
* @param refBase the reference base
|
||||
* @param inputBinaryCodec the input codex
|
||||
*
|
||||
* @return a VariableLengthCall object
|
||||
* @return a GLFVariableLengthCall object
|
||||
*/
|
||||
private VariableLengthCall generateVLC(char refBase, BinaryCodec inputBinaryCodec) {
|
||||
private GLFVariableLengthCall generateVLC(char refBase, BinaryCodec inputBinaryCodec) {
|
||||
int offset = (int) inputBinaryCodec.readUInt();
|
||||
int depth = (int) inputBinaryCodec.readUInt();
|
||||
short min_lk = (short) ((depth & 0x00000000ff000000) >> 24);
|
||||
|
|
@ -143,7 +143,7 @@ public class GLFReader implements Iterator<GLFRecord> {
|
|||
for (int x = 0; x < readCnt; x++) {
|
||||
indelSeq2[x] = inputBinaryCodec.readUByte();
|
||||
}
|
||||
return new VariableLengthCall(refBase, offset, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2);
|
||||
return new GLFVariableLengthCall(referenceName, refBase, offset+currentLocation, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
@ -172,7 +172,7 @@ public class GLFReader implements Iterator<GLFRecord> {
|
|||
} else {
|
||||
throw new StingException("Unkonwn GLF record type (type = " + recordType + ")");
|
||||
}
|
||||
if (ret != null) currentLocation += ret.getOffset();
|
||||
if (nextRecord != null) currentLocation = nextRecord.getPosition();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -225,24 +225,9 @@ public class GLFReader implements Iterator<GLFRecord> {
|
|||
public void close() {
|
||||
inputBinaryCodec.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* getter methods
|
||||
*/
|
||||
|
||||
public String getReferenceName() {
|
||||
return referenceName;
|
||||
}
|
||||
|
||||
public int getReferenceLength() {
|
||||
return referenceLength;
|
||||
}
|
||||
|
||||
|
||||
public String getHeaderStr() {
|
||||
return headerStr;
|
||||
}
|
||||
|
||||
public int getCurrentLocation() {
|
||||
return currentLocation;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,12 +40,13 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
* field values.
|
||||
*/
|
||||
public abstract class GLFRecord {
|
||||
public final static double LIKELIHOOD_SCALE_FACTOR = 10;
|
||||
public final static double LIKELIHOOD_SCALE_FACTOR = 10;
|
||||
|
||||
|
||||
// fields common to all records
|
||||
protected String contig;
|
||||
protected REF_BASE refBase;
|
||||
protected long offset = 1;
|
||||
protected long position = 1;
|
||||
protected short minimumLikelihood = 0;
|
||||
protected int readDepth = 0;
|
||||
protected short rmsMapQ = 0;
|
||||
|
|
@ -72,7 +73,7 @@ public abstract class GLFRecord {
|
|||
B((short) 0x0E),
|
||||
N((short) 0x0F);
|
||||
|
||||
private final short fieldValue; // in kilograms
|
||||
private final short fieldValue;
|
||||
|
||||
/**
|
||||
* private constructor, used by the enum class to makes each enum value
|
||||
|
|
@ -85,6 +86,7 @@ public abstract class GLFRecord {
|
|||
|
||||
/**
|
||||
* return the character representation
|
||||
*
|
||||
* @return the char for the reference base
|
||||
*/
|
||||
public char toChar() {
|
||||
|
|
@ -139,45 +141,54 @@ public abstract class GLFRecord {
|
|||
/**
|
||||
* Constructor, given the base a character reference base
|
||||
*
|
||||
* @param contig the contig string
|
||||
* @param base the reference base in the reference
|
||||
* @param offset the offset from the beginning of the reference seq
|
||||
* @param position the distance from the beginning of the reference seq
|
||||
* @param minimumLikelihood it's minimum likelihood
|
||||
* @param readDepth the read depth at this position
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
*/
|
||||
public GLFRecord(char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) {
|
||||
public GLFRecord(String contig, char base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
|
||||
REF_BASE newBase = REF_BASE.toBase(base);
|
||||
validateInput(newBase, offset, minimumLikelihood, readDepth, rmsMapQ);
|
||||
validateInput(contig, newBase, position, minimumLikelihood, readDepth, rmsMapQ);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor, given the base a REF_BASE
|
||||
*
|
||||
* @param contig the contig string
|
||||
* @param base the reference base in the reference
|
||||
* @param offset the offset from the beginning of the reference seq
|
||||
* @param position the distance from the beginning of the reference seq
|
||||
* @param minimumLikelihood it's minimum likelihood
|
||||
* @param readDepth the read depth at this position
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
*/
|
||||
GLFRecord(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) {
|
||||
validateInput(base, offset, minimumLikelihood, readDepth, rmsMapQ);
|
||||
GLFRecord(String contig, REF_BASE base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
|
||||
validateInput(contig, base, position, minimumLikelihood, readDepth, rmsMapQ);
|
||||
}
|
||||
|
||||
/**
|
||||
* validate the input during construction, and store valid values
|
||||
*
|
||||
* @param chromosome the reference contig, as a String
|
||||
* @param base the reference base in the reference, as a REF_BASE
|
||||
* @param offset the offset from the beginning of the reference seq
|
||||
* @param offset the offset from the last call
|
||||
* @param position the distance from the beginning of the reference seq
|
||||
* @param minimumLikelihood it's minimum likelihood
|
||||
* @param readDepth the read depth at this position
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
*/
|
||||
private void validateInput(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) {
|
||||
private void validateInput(String chromosome, REF_BASE base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
|
||||
// add any validation to the contig string here
|
||||
this.contig = chromosome;
|
||||
|
||||
this.refBase = base;
|
||||
if (offset > 4294967295L || offset < 0) {
|
||||
throw new IllegalArgumentException("Offset is out of bounds (0 to 0xffffffff) value passed = " + offset);
|
||||
|
||||
if (position > 4294967295L || position < 0) {
|
||||
throw new IllegalArgumentException("Position is out of bounds (0 to 0xffffffff) value passed = " + position);
|
||||
}
|
||||
this.offset = offset;
|
||||
this.position = position;
|
||||
|
||||
|
||||
if (minimumLikelihood > 255 || minimumLikelihood < 0) {
|
||||
throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood);
|
||||
|
|
@ -200,11 +211,16 @@ public abstract class GLFRecord {
|
|||
*
|
||||
* @param out the binary codec to write to
|
||||
*/
|
||||
void write(BinaryCodec out) {
|
||||
void write(BinaryCodec out, GLFRecord lastRecord) {
|
||||
long offset = 0;
|
||||
if (lastRecord != null && lastRecord.getContig() == this.getContig())
|
||||
offset = this.position - lastRecord.getPosition();
|
||||
else
|
||||
offset = this.position - 1; // we start at one, we need to subtract that off
|
||||
short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||
out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||
out.writeUInt(((Long) offset).intValue());
|
||||
long write = (long) ((long)(readDepth & 0xffffff) | (long)(this.minimumLikelihood & 0xff) << 24);
|
||||
out.writeUInt(((Long) (offset)).intValue()); // we have to subtract one, we're an offset
|
||||
long write = (long) ((long) (readDepth & 0xffffff) | (long) (this.minimumLikelihood & 0xff) << 24);
|
||||
out.writeUInt(write);
|
||||
out.writeUByte((short) rmsMapQ);
|
||||
}
|
||||
|
|
@ -257,8 +273,8 @@ public abstract class GLFRecord {
|
|||
return refBase;
|
||||
}
|
||||
|
||||
public long getOffset() {
|
||||
return offset;
|
||||
public long getPosition() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public short getMinimumLikelihood() {
|
||||
|
|
@ -272,5 +288,9 @@ public abstract class GLFRecord {
|
|||
public short getRmsMapQ() {
|
||||
return rmsMapQ;
|
||||
}
|
||||
|
||||
public String getContig() {
|
||||
return this.contig;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,11 +31,11 @@ import net.sf.samtools.util.BinaryCodec;
|
|||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class SinglePointCall
|
||||
* Class GLFSingleCall
|
||||
* <p/>
|
||||
* This class represents a single point geneotype call in GLF vernacular
|
||||
*/
|
||||
public class SinglePointCall extends GLFRecord {
|
||||
public class GLFSingleCall extends GLFRecord {
|
||||
|
||||
// our likelihoods object
|
||||
private double likelihoods[];
|
||||
|
|
@ -43,14 +43,15 @@ public class SinglePointCall extends GLFRecord {
|
|||
/**
|
||||
* create a single
|
||||
*
|
||||
* @param contig the contig this record is on
|
||||
* @param refBase the reference base, as a char
|
||||
* @param offset the location, as an offset from the previous glf record
|
||||
* @param position the location, as an offset from the start of the contig
|
||||
* @param readDepth the read depth at the specified postion
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
* @param likelihoods the Likelihoods
|
||||
*/
|
||||
public SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[]) {
|
||||
super(refBase, offset, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ);
|
||||
public GLFSingleCall(String contig, char refBase, int position, int readDepth, short rmsMapQ, double likelihoods[]) {
|
||||
super(contig, refBase, position, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ);
|
||||
this.likelihoods = likelihoods;
|
||||
}
|
||||
|
||||
|
|
@ -60,8 +61,8 @@ public class SinglePointCall extends GLFRecord {
|
|||
*
|
||||
* @param out the codec to write to
|
||||
*/
|
||||
void write(BinaryCodec out) {
|
||||
super.write(out);
|
||||
void write(BinaryCodec out, GLFRecord lastRec) {
|
||||
super.write(out, lastRec);
|
||||
short[] adjusted = new short[likelihoods.length];
|
||||
// we want to scale our values
|
||||
for (int x = 0; x < likelihoods.length; x++) {
|
||||
|
|
@ -30,13 +30,13 @@ import net.sf.samtools.util.BinaryCodec;
|
|||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VariableLengthCall
|
||||
* Class GLFVariableLengthCall
|
||||
* <p/>
|
||||
* This class represents variable length genotype calls in the GLF format.
|
||||
* Currently a lot of parameters need to be provided, but we may be able to thin
|
||||
* those down as we understand what we have to specify and what we can infer.
|
||||
*/
|
||||
public class VariableLengthCall extends GLFRecord {
|
||||
public class GLFVariableLengthCall extends GLFRecord {
|
||||
// our fields, corresponding to the glf spec
|
||||
private short lkHom1 = 0;
|
||||
private short lkHom2 = 0;
|
||||
|
|
@ -53,28 +53,32 @@ public class VariableLengthCall extends GLFRecord {
|
|||
/**
|
||||
* the default constructor
|
||||
*
|
||||
* @param refBase the reference base
|
||||
* @param offset the location, as an offset from the previous glf record
|
||||
* @param readDepth the read depth at the specified postion
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
|
||||
* @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
|
||||
* @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
|
||||
* @param indelSeq1 the sequence for the first indel allele
|
||||
* @param indelSeq2 the sequence for the second indel allele
|
||||
* @param contig the contig this record is on
|
||||
* @param refBase the reference base
|
||||
* @param offset the location, as an offset from the previous glf record
|
||||
* @param offset the location, as an offset from the previous glf record
|
||||
|
||||
* @param readDepth the read depth at the specified postion
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
|
||||
* @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
|
||||
* @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
|
||||
* @param indelSeq1 the sequence for the first indel allele
|
||||
* @param indelSeq2 the sequence for the second indel allele
|
||||
*/
|
||||
public VariableLengthCall(char refBase,
|
||||
long offset,
|
||||
int readDepth,
|
||||
short rmsMapQ,
|
||||
double lkHom1,
|
||||
double lkHom2,
|
||||
double lkHet,
|
||||
int indelOneLength,
|
||||
final short indelSeq1[],
|
||||
int indelTwoLength,
|
||||
final short indelSeq2[]) {
|
||||
super(refBase, offset, GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet})), readDepth, rmsMapQ);
|
||||
public GLFVariableLengthCall(String contig,
|
||||
char refBase,
|
||||
long offset,
|
||||
int readDepth,
|
||||
short rmsMapQ,
|
||||
double lkHom1,
|
||||
double lkHom2,
|
||||
double lkHet,
|
||||
int indelOneLength,
|
||||
final short indelSeq1[],
|
||||
int indelTwoLength,
|
||||
final short indelSeq2[]) {
|
||||
super(contig, refBase, offset, GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet})), readDepth, rmsMapQ);
|
||||
this.lkHom1 = GLFRecord.toCappedShort(lkHom1);
|
||||
this.lkHom2 = GLFRecord.toCappedShort(lkHom2);
|
||||
this.lkHet = GLFRecord.toCappedShort(lkHet);
|
||||
|
|
@ -91,8 +95,8 @@ public class VariableLengthCall extends GLFRecord {
|
|||
*
|
||||
* @param out the binary codec to write to
|
||||
*/
|
||||
void write(BinaryCodec out) {
|
||||
super.write(out);
|
||||
void write(BinaryCodec out, GLFRecord rec) {
|
||||
super.write(out,rec);
|
||||
out.writeByte(lkHom1);
|
||||
out.writeByte(lkHom2);
|
||||
out.writeByte(lkHet);
|
||||
|
|
@ -57,6 +57,9 @@ public class GLFWriter implements GenotypeWriter {
|
|||
private String referenceSequenceName = null;
|
||||
private long referenceSequenceLength = 0;
|
||||
|
||||
// we need to store the last record so we can calculate the offsets
|
||||
private GLFRecord mLastRecord = null;
|
||||
|
||||
// the last position written
|
||||
private int lastPos = 1;
|
||||
|
||||
|
|
@ -122,13 +125,15 @@ public class GLFWriter implements GenotypeWriter {
|
|||
// check if we've jumped to a new contig
|
||||
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
|
||||
|
||||
SinglePointCall call = new SinglePointCall(refBase,
|
||||
genomicLoc - lastPos,
|
||||
readDepth,
|
||||
(short) rmsMapQ,
|
||||
lhValues.toDoubleArray());
|
||||
GLFSingleCall callGLF = new GLFSingleCall(contig.getSequenceName(),
|
||||
refBase,
|
||||
genomicLoc,
|
||||
readDepth,
|
||||
(short) rmsMapQ,
|
||||
lhValues.toDoubleArray());
|
||||
lastPos = genomicLoc;
|
||||
call.write(this.outputBinaryCodec);
|
||||
callGLF.write(this.outputBinaryCodec,mLastRecord);
|
||||
mLastRecord = callGLF;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -141,8 +146,8 @@ public class GLFWriter implements GenotypeWriter {
|
|||
throw new IllegalStateException("The GLF Header must be written before calls can be added");
|
||||
|
||||
if ( !(call instanceof GLFGenotypeCall) )
|
||||
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
|
||||
GLFGenotypeCall gCall = (GLFGenotypeCall)call;
|
||||
throw new IllegalArgumentException("Only GLFGenotypeCall should be passed in to the GLF writers");
|
||||
GLFGenotypeCall gCall = (GLFGenotypeCall) call;
|
||||
|
||||
char ref = gCall.getReference();
|
||||
|
||||
|
|
@ -152,10 +157,10 @@ public class GLFWriter implements GenotypeWriter {
|
|||
|
||||
// calculate the RMS mapping qualities and the read depth
|
||||
double rms = 0.0;
|
||||
if ( gCall.getPileup() != null )
|
||||
if (gCall.getPileup() != null)
|
||||
rms = calculateRMS(gCall.getPileup().getReads());
|
||||
int readCount = gCall.getReadCount();
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(gCall.getLocation().getContig()),(int)gCall.getLocation().getStart(),(float)rms,ref,readCount,obj);
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(gCall.getLocation().getContig()), (int) gCall.getLocation().getStart(), (float) rms, ref, readCount, obj);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -173,6 +178,7 @@ public class GLFWriter implements GenotypeWriter {
|
|||
}
|
||||
return MathUtils.rms(qualities);
|
||||
}
|
||||
|
||||
/**
|
||||
* add a variable length (indel, deletion, etc) to the genotype writer
|
||||
*
|
||||
|
|
@ -201,7 +207,8 @@ public class GLFWriter implements GenotypeWriter {
|
|||
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
|
||||
|
||||
// normalize the two
|
||||
VariableLengthCall call = new VariableLengthCall(refBase,
|
||||
GLFVariableLengthCall call = new GLFVariableLengthCall(contig.getSequenceName(),
|
||||
refBase,
|
||||
genomicLoc - lastPos,
|
||||
readDepth,
|
||||
(short) rmsMapQ,
|
||||
|
|
@ -213,7 +220,8 @@ public class GLFWriter implements GenotypeWriter {
|
|||
secondHomZyg.getLengthOfIndel(),
|
||||
secondHomZyg.getIndelSequence());
|
||||
lastPos = genomicLoc;
|
||||
call.write(this.outputBinaryCodec);
|
||||
call.write(this.outputBinaryCodec,mLastRecord);
|
||||
mLastRecord = call;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -238,7 +246,8 @@ public class GLFWriter implements GenotypeWriter {
|
|||
throw new IllegalStateException("The GLF Header must be written before records can be added");
|
||||
|
||||
checkSequence(contigName, contigLength);
|
||||
rec.write(this.outputBinaryCodec);
|
||||
rec.write(this.outputBinaryCodec,mLastRecord);
|
||||
mLastRecord = rec;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -287,16 +296,6 @@ public class GLFWriter implements GenotypeWriter {
|
|||
outputBinaryCodec.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference sequence
|
||||
*
|
||||
* @return the reference sequence
|
||||
*/
|
||||
public String getReferenceSequenceName() {
|
||||
return referenceSequenceName;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* add a multi-sample call if we support it
|
||||
*
|
||||
|
|
|
|||
|
|
@ -87,14 +87,14 @@ public class RodGLFTest extends BaseTest {
|
|||
|
||||
|
||||
@Test
|
||||
public void testCompareTo() {
|
||||
public void testCompareTo() {
|
||||
RodGLF iter2 = RodGLF.createIterator("test", glfFile);
|
||||
RodGLF glf = iter.next();
|
||||
glf = iter2.next();
|
||||
assertEquals(0, iter.compareTo(iter2));
|
||||
RodGLF glf2 = iter.next();
|
||||
assertEquals(-1, iter2.compareTo(iter));
|
||||
assertEquals(1, iter.compareTo(iter2));
|
||||
RodGLF glf2 = iter2.next();
|
||||
assertEquals(0, glf.compareTo(glf2));
|
||||
glf2 = iter2.next();
|
||||
assertEquals(-1, glf.compareTo(glf2));
|
||||
assertEquals(1, glf2.compareTo(glf));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ public class GLFReaderTest extends BaseTest {
|
|||
static final File glfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/index_test_likelihoods.glf");
|
||||
//static final File glfFile = new File("CALLS.glf");
|
||||
static final int finalRecordCount = 484140; // the number of records in the above file
|
||||
//static final int finalRecordCount = 484445;
|
||||
static final int contigCount = 25;
|
||||
|
||||
/** read in the records from the file */
|
||||
|
|
@ -36,10 +35,10 @@ public class GLFReaderTest extends BaseTest {
|
|||
long location = 1;
|
||||
while (reader.hasNext()) {
|
||||
GLFRecord rec = reader.next();
|
||||
if (!contigs.contains(reader.getReferenceName())) {
|
||||
contigs.add(reader.getReferenceName());
|
||||
if (!contigs.contains(rec.getContig())) {
|
||||
contigs.add(rec.getContig());
|
||||
}
|
||||
location = location + rec.offset;
|
||||
location = rec.getPosition();
|
||||
//System.err.println("Record count = " + finalRecordCount + " offset " + rec.offset + " location = " + location + " type = " + rec.getRecordType());
|
||||
++recCount;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
package org.broadinstitute.sting.utils.genotype.glf;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class GLFRecordTest
|
||||
*
|
||||
* Test out the basics of a GLFRecord
|
||||
*/
|
||||
public class GLFRecordTest extends BaseTest {
|
||||
|
||||
@Test
|
||||
public void testConstructingGLFRecord() {
|
||||
double likelihoods[] = new double[10];
|
||||
for (int i = 0; i < 10; i++) {
|
||||
likelihoods[i] = 10.0;
|
||||
}
|
||||
GLFRecord rec = new GLFSingleCall("1",'A',1,100,(short)200,likelihoods);
|
||||
|
||||
Assert.assertTrue("1".equals(rec.contig));
|
||||
Assert.assertEquals('A',rec.getRefBase().toChar());
|
||||
Assert.assertEquals(1,rec.getPosition());
|
||||
Assert.assertEquals(10,rec.getMinimumLikelihood());
|
||||
Assert.assertEquals(200,rec.getRmsMapQ());
|
||||
Assert.assertEquals(100,rec.getReadDepth());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -54,8 +54,6 @@ public class GLFWriterTest extends BaseTest {
|
|||
|
||||
/** some made up values that we use to generate the GLF */
|
||||
private final String header = "";
|
||||
private final String referenceSequenceName = "chr1";
|
||||
private final int refLength = 1000;
|
||||
private static final int GENOTYPE_COUNT = 10;
|
||||
private GenotypeWriter rec;
|
||||
protected static final String[] genotypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"};
|
||||
|
|
@ -78,7 +76,13 @@ public class GLFWriterTest extends BaseTest {
|
|||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a fake genotype
|
||||
* @param bestGenotype the best genotype, as an index into the array of values
|
||||
* @param location the location we're creating the genotype at
|
||||
* @param ref the reference base
|
||||
* @return a FakeGenotype (a fake genotype)
|
||||
*/
|
||||
private FakeGenotype createGenotype(int bestGenotype, GenomeLoc location, char ref) {
|
||||
double lk[] = new double[GENOTYPE_COUNT];
|
||||
for (int x = 0; x < GENOTYPE_COUNT; x++) {
|
||||
|
|
@ -89,6 +93,10 @@ public class GLFWriterTest extends BaseTest {
|
|||
return new FakeGenotype(location, genotypes[bestGenotype], ref, SIGNIFICANCE, lk);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* can we actually write a file?
|
||||
*/
|
||||
@Test
|
||||
public void basicWrite() {
|
||||
File writeTo = new File("testGLF.glf");
|
||||
|
|
@ -106,6 +114,11 @@ public class GLFWriterTest extends BaseTest {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* write a bunch of fake records a GLF file, and then read it back from the
|
||||
* same file. We want to make sure a round trip is successful; that we write
|
||||
* and then read the same information back.
|
||||
*/
|
||||
@Test
|
||||
public void basicWriteThenRead() {
|
||||
File writeTo = new File("testGLF2.glf");
|
||||
|
|
@ -124,7 +137,7 @@ public class GLFWriterTest extends BaseTest {
|
|||
int count = 0;
|
||||
while (reader.hasNext()) {
|
||||
GLFRecord rec = reader.next();
|
||||
Assert.assertTrue(types.get(count).compareTo(FakeGenotype.toFakeGenotype((SinglePointCall) rec, reader.getReferenceName(), reader.getCurrentLocation())) == 0);
|
||||
Assert.assertTrue(types.get(count).compareTo(FakeGenotype.toFakeGenotype((GLFSingleCall) rec, rec.getContig(), (int)rec.getPosition())) == 0);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
|
@ -185,7 +198,7 @@ class FakeGenotype extends GLFGenotypeCall implements Comparable<FakeGenotype> {
|
|||
return 0;
|
||||
}
|
||||
|
||||
public static FakeGenotype toFakeGenotype(SinglePointCall record, String contig, int postition) {
|
||||
public static FakeGenotype toFakeGenotype(GLFSingleCall record, String contig, int postition) {
|
||||
double likelihoods[] = record.getLikelihoods();
|
||||
char ref = record.getRefBase().toChar();
|
||||
double significance = GLFWriterTest.SIGNIFICANCE;
|
||||
|
|
|
|||
Loading…
Reference in New Issue