adding code to deal with the off-spec situation where our minimum likelihood is above the GLF max of 255.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2871 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-02-22 22:27:39 +00:00
parent 88d0677379
commit 5546aa4416
5 changed files with 94 additions and 25 deletions

View File

@ -47,7 +47,6 @@ public abstract class GLFRecord {
protected String contig;
protected REF_BASE refBase;
protected long position = 1;
protected short minimumLikelihood = 0;
protected int readDepth = 0;
protected short rmsMapQ = 0;
@ -126,7 +125,7 @@ public abstract class GLFRecord {
SINGLE((short) 1),
VARIABLE((short) 2);
private final short fieldValue; // in kilograms
private final short fieldValue; // a place to store the type
RECORD_TYPE(short value) {
fieldValue = value;
@ -144,13 +143,12 @@ public abstract class GLFRecord {
* @param contig the contig string
* @param base the reference base in the reference
* @param position the distance from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality
*/
public GLFRecord(String contig, char base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
public GLFRecord(String contig, char base, long position, int readDepth, short rmsMapQ) {
REF_BASE newBase = REF_BASE.toBase(base);
validateInput(contig, newBase, position, minimumLikelihood, readDepth, rmsMapQ);
validateInput(contig, newBase, position, readDepth, rmsMapQ);
}
/**
@ -159,12 +157,11 @@ public abstract class GLFRecord {
* @param contig the contig string
* @param base the reference base in the reference
* @param position the distance from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality
*/
GLFRecord(String contig, REF_BASE base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
validateInput(contig, base, position, minimumLikelihood, readDepth, rmsMapQ);
GLFRecord(String contig, REF_BASE base, long position, int readDepth, short rmsMapQ) {
validateInput(contig, base, position, readDepth, rmsMapQ);
}
/**
@ -173,11 +170,10 @@ public abstract class GLFRecord {
* @param chromosome the reference contig, as a String
* @param base the reference base in the reference, as a REF_BASE
* @param position the distance from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality
*/
private void validateInput(String chromosome, REF_BASE base, long position, double minimumLikelihood, int readDepth, short rmsMapQ) {
private void validateInput(String chromosome, REF_BASE base, long position, int readDepth, short rmsMapQ) {
// add any validation to the contig string here
this.contig = chromosome;
@ -188,10 +184,10 @@ public abstract class GLFRecord {
}
this.position = position;
if (minimumLikelihood > 255 || minimumLikelihood < 0) {
throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood);
}
this.minimumLikelihood = GLFRecord.toCappedShort(minimumLikelihood);
// if (minimumLikelihood > 255 || minimumLikelihood < 0) {
// throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood);
// }
// this.minimumLikelihood = GLFRecord.toCappedShort(minimumLikelihood);
if (readDepth > 16777215 || readDepth < 0) {
throw new IllegalArgumentException("readDepth is out of bounds (0 to 0xffffff) value passed = " + readDepth);
@ -218,7 +214,7 @@ public abstract class GLFRecord {
short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
out.writeUInt(((Long) (offset)).intValue()); // we have to subtract one, we're an offset
long write = (long) ((long) (readDepth & 0xffffff) | (long) (this.minimumLikelihood & 0xff) << 24);
long write = (long) ((long) (readDepth & 0xffffff) | (long) (this.getMinimumLikelihood() & 0xff) << 24);
out.writeUInt(write);
out.writeUByte((short) rmsMapQ);
}
@ -276,7 +272,7 @@ public abstract class GLFRecord {
}
public short getMinimumLikelihood() {
return minimumLikelihood;
return calculateMinLikelihood();
}
public int getReadDepth() {
@ -290,5 +286,12 @@ public abstract class GLFRecord {
public String getContig() {
return this.contig;
}
/**
* this method had to be abstracted so that the underlying records could set the minimum likelihood (ML) in the event
* that the ML is above 255. In this case the records need to scale the value appropriately, and warn the users.
* @return a short of the minimum likelihood.
*/
protected abstract short calculateMinLikelihood();
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.util.BinaryCodec;
import org.broadinstitute.sting.utils.Utils;
/*
@ -39,7 +40,7 @@ public class GLFSingleCall extends GLFRecord {
// our likelihoods array
private double likelihoods[];
private double minLikelihood;
/**
* create a single
*
@ -51,7 +52,8 @@ public class GLFSingleCall extends GLFRecord {
* @param likelihoods the Likelihoods
*/
public GLFSingleCall(String contig, char refBase, int position, int readDepth, short rmsMapQ, double likelihoods[]) {
super(contig, refBase, position, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ);
super(contig, refBase, position, readDepth, rmsMapQ);
minLikelihood = GLFRecord.findMin(likelihoods);
this.likelihoods = likelihoods;
}
@ -64,8 +66,6 @@ public class GLFSingleCall extends GLFRecord {
void write(BinaryCodec out, GLFRecord lastRec) {
super.write(out, lastRec);
short[] adjusted = new short[likelihoods.length];
// find the minimum likelihood as a double
double minLikelihood = GLFRecord.findMin(likelihoods);
// we want to scale our values
for (int x = 0; x < likelihoods.length; x++) {
@ -98,7 +98,27 @@ public class GLFSingleCall extends GLFRecord {
return likelihoods.length + super.getByteSize();
}
/**
* this method had to be abstracted so that the underlying records could set the minimum likelihood (ML) in the event
* that the ML is above 255. In this case the records need to scale their likelihood values appropriately, and warn the user.
*
* @return a short of the minimum likelihood.
*/
@Override
protected short calculateMinLikelihood() {
if (minLikelihood > 255.0) {
double scale = minLikelihood - 255.0;
this.minLikelihood = 255.0;
for (int x = 0; x < this.likelihoods.length; x++)
this.likelihoods[x] = this.likelihoods[x] - scale;
Utils.warnUser("GLFRecord: Locus " + this.getContig() + ":" + this.position + " had it's likelihood information scaled, the original likelihood values are unrecoverable");
}
return toCappedShort(minLikelihood);
}
public double[] getLikelihoods() {
return likelihoods;
}
}

View File

@ -45,7 +45,7 @@ public class GLFVariableLengthCall extends GLFRecord {
private int indelLen2 = 0;
private final short indelSeq1[];
private final short indelSeq2[];
private short minlikelihood;
// our size, which is immutable, in bytes
private final int size;
@ -57,7 +57,7 @@ public class GLFVariableLengthCall extends GLFRecord {
* @param refBase the reference base
* @param offset the location, as an offset from the previous glf record
* @param offset the location, as an offset from the previous glf record
s
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
@ -78,7 +78,7 @@ public class GLFVariableLengthCall extends GLFRecord {
final short indelSeq1[],
int indelTwoLength,
final short indelSeq2[]) {
super(contig, refBase, offset, GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet})), readDepth, rmsMapQ);
super(contig, refBase, offset, readDepth, rmsMapQ);
this.lkHom1 = GLFRecord.toCappedShort(lkHom1);
this.lkHom2 = GLFRecord.toCappedShort(lkHom2);
this.lkHet = GLFRecord.toCappedShort(lkHet);
@ -87,7 +87,7 @@ public class GLFVariableLengthCall extends GLFRecord {
this.indelSeq1 = indelSeq1;
this.indelSeq2 = indelSeq2;
size = 16 + indelSeq1.length + indelSeq2.length;
this.minlikelihood = GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet}));
}
/**
@ -120,6 +120,17 @@ public class GLFVariableLengthCall extends GLFRecord {
return size + super.getByteSize();
}
/**
* this method had to be abstracted so that the underlying records could set the minimum likelihood (ML) in the event
* that the ML is above 255. In this case the records need to scale the value appropriately, and warn the users.
*
* @return a short of the minimum likelihood.
*/
@Override
protected short calculateMinLikelihood() {
return minlikelihood;
}
public short getLkHom1() {
return lkHom1;
}

View File

@ -108,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOtherFormat() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "GLF", "d5d1c5ea8d42712d5509cd1c9c38359d" );
e.put( "GLF", "ddb1074b6f4a0fd1e15e4381476f1055" );
e.put( "GELI_BINARY", "764a0fed1b3cf089230fd91f3be9c2df" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {

View File

@ -93,6 +93,23 @@ public class GLFWriterTest extends BaseTest {
return new FakeGenotype(location, genotypes[bestGenotype], ref, SIGNIFICANCE, lk);
}
/**
* create a fake genotype with a minimum likelihood greater than 255
* @param bestGenotype the best genotype, as an index into the array of values
* @param location the location we're creating the genotype at
* @param ref the reference base
* @return a FakeGenotype (a fake genotype)
*/
private FakeGenotype createGreaterThan255MinimumGenotype(int bestGenotype, GenomeLoc location, char ref) {
double lk[] = new double[GENOTYPE_COUNT];
for (int x = 0; x < GENOTYPE_COUNT; x++) {
lk[x] = -355.0 - (double) x; // they'll all be unique like a snowflake
}
lk[bestGenotype] = -256.0; // lets make the best way better
return new FakeGenotype(location, genotypes[bestGenotype], ref, SIGNIFICANCE, lk);
}
/**
* can we actually write a file?
@ -113,6 +130,24 @@ public class GLFWriterTest extends BaseTest {
}
/**
* can we actually write a file?
*/
@Test
public void basicWriteGreaterMinimumLikelihood() {
File writeTo = new File("testGLF2.glf");
writeTo.deleteOnExit();
rec = new GLFWriter(writeTo);
((GLFWriter)rec).writeHeader(header);
for (int x = 0; x < 5; x++) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1);
Genotype type = createGreaterThan255MinimumGenotype(x % 10, loc, 'A');
rec.addGenotypeCall(type);
}
rec.close();
}
/**
* write a bunch of fake records a GLF file, and then read it back from the