adding code to deal with the off-spec situation where our minimum likelihood is above the GLF max of 255.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2871 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
88d0677379
commit
5546aa4416
|
|
@ -47,7 +47,6 @@ public abstract class GLFRecord {
|
|||
protected String contig;
|
||||
protected REF_BASE refBase;
|
||||
protected long position = 1;
|
||||
protected short minimumLikelihood = 0;
|
||||
protected int readDepth = 0;
|
||||
protected short rmsMapQ = 0;
|
||||
|
||||
|
|
@ -126,7 +125,7 @@ public abstract class GLFRecord {
|
|||
SINGLE((short) 1),
|
||||
VARIABLE((short) 2);
|
||||
|
||||
private final short fieldValue; // in kilograms
|
||||
private final short fieldValue; // a place to store the type
|
||||
|
||||
RECORD_TYPE(short value) {
|
||||
fieldValue = value;
|
||||
|
|
@ -144,13 +143,12 @@ public abstract class GLFRecord {
|
|||
* @param contig the contig string
|
||||
* @param base the reference base in the reference
|
||||
* @param position the distance from the beginning of the reference seq
|
||||
* @param minimumLikelihood it's minimum likelihood
|
||||
* @param readDepth the read depth at this position
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
*/
|
||||
public GLFRecord(String contig, char base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
|
||||
public GLFRecord(String contig, char base, long position, int readDepth, short rmsMapQ) {
|
||||
REF_BASE newBase = REF_BASE.toBase(base);
|
||||
validateInput(contig, newBase, position, minimumLikelihood, readDepth, rmsMapQ);
|
||||
validateInput(contig, newBase, position, readDepth, rmsMapQ);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -159,12 +157,11 @@ public abstract class GLFRecord {
|
|||
* @param contig the contig string
|
||||
* @param base the reference base in the reference
|
||||
* @param position the distance from the beginning of the reference seq
|
||||
* @param minimumLikelihood it's minimum likelihood
|
||||
* @param readDepth the read depth at this position
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
*/
|
||||
GLFRecord(String contig, REF_BASE base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) {
|
||||
validateInput(contig, base, position, minimumLikelihood, readDepth, rmsMapQ);
|
||||
GLFRecord(String contig, REF_BASE base, long position, int readDepth, short rmsMapQ) {
|
||||
validateInput(contig, base, position, readDepth, rmsMapQ);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -173,11 +170,10 @@ public abstract class GLFRecord {
|
|||
* @param chromosome the reference contig, as a String
|
||||
* @param base the reference base in the reference, as a REF_BASE
|
||||
* @param position the distance from the beginning of the reference seq
|
||||
* @param minimumLikelihood it's minimum likelihood
|
||||
* @param readDepth the read depth at this position
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
*/
|
||||
private void validateInput(String chromosome, REF_BASE base, long position, double minimumLikelihood, int readDepth, short rmsMapQ) {
|
||||
private void validateInput(String chromosome, REF_BASE base, long position, int readDepth, short rmsMapQ) {
|
||||
// add any validation to the contig string here
|
||||
this.contig = chromosome;
|
||||
|
||||
|
|
@ -188,10 +184,10 @@ public abstract class GLFRecord {
|
|||
}
|
||||
this.position = position;
|
||||
|
||||
if (minimumLikelihood > 255 || minimumLikelihood < 0) {
|
||||
throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood);
|
||||
}
|
||||
this.minimumLikelihood = GLFRecord.toCappedShort(minimumLikelihood);
|
||||
// if (minimumLikelihood > 255 || minimumLikelihood < 0) {
|
||||
// throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood);
|
||||
// }
|
||||
// this.minimumLikelihood = GLFRecord.toCappedShort(minimumLikelihood);
|
||||
|
||||
if (readDepth > 16777215 || readDepth < 0) {
|
||||
throw new IllegalArgumentException("readDepth is out of bounds (0 to 0xffffff) value passed = " + readDepth);
|
||||
|
|
@ -218,7 +214,7 @@ public abstract class GLFRecord {
|
|||
short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||
out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||
out.writeUInt(((Long) (offset)).intValue()); // we have to subtract one, we're an offset
|
||||
long write = (long) ((long) (readDepth & 0xffffff) | (long) (this.minimumLikelihood & 0xff) << 24);
|
||||
long write = (long) ((long) (readDepth & 0xffffff) | (long) (this.getMinimumLikelihood() & 0xff) << 24);
|
||||
out.writeUInt(write);
|
||||
out.writeUByte((short) rmsMapQ);
|
||||
}
|
||||
|
|
@ -276,7 +272,7 @@ public abstract class GLFRecord {
|
|||
}
|
||||
|
||||
public short getMinimumLikelihood() {
|
||||
return minimumLikelihood;
|
||||
return calculateMinLikelihood();
|
||||
}
|
||||
|
||||
public int getReadDepth() {
|
||||
|
|
@ -290,5 +286,12 @@ public abstract class GLFRecord {
|
|||
public String getContig() {
|
||||
return this.contig;
|
||||
}
|
||||
|
||||
/**
|
||||
* this method had to be abstracted so that the underlying records could set the minimum likelihood (ML) in the event
|
||||
* that the ML is above 255. In this case the records need to scale the value appropriately, and warn the users.
|
||||
* @return a short of the minimum likelihood.
|
||||
*/
|
||||
protected abstract short calculateMinLikelihood();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.genotype.glf;
|
||||
|
||||
import net.sf.samtools.util.BinaryCodec;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -39,7 +40,7 @@ public class GLFSingleCall extends GLFRecord {
|
|||
|
||||
// our likelihoods array
|
||||
private double likelihoods[];
|
||||
|
||||
private double minLikelihood;
|
||||
/**
|
||||
* create a single
|
||||
*
|
||||
|
|
@ -51,7 +52,8 @@ public class GLFSingleCall extends GLFRecord {
|
|||
* @param likelihoods the Likelihoods
|
||||
*/
|
||||
public GLFSingleCall(String contig, char refBase, int position, int readDepth, short rmsMapQ, double likelihoods[]) {
|
||||
super(contig, refBase, position, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ);
|
||||
super(contig, refBase, position, readDepth, rmsMapQ);
|
||||
minLikelihood = GLFRecord.findMin(likelihoods);
|
||||
this.likelihoods = likelihoods;
|
||||
}
|
||||
|
||||
|
|
@ -64,8 +66,6 @@ public class GLFSingleCall extends GLFRecord {
|
|||
void write(BinaryCodec out, GLFRecord lastRec) {
|
||||
super.write(out, lastRec);
|
||||
short[] adjusted = new short[likelihoods.length];
|
||||
// find the minimum likelihood as a double
|
||||
double minLikelihood = GLFRecord.findMin(likelihoods);
|
||||
|
||||
// we want to scale our values
|
||||
for (int x = 0; x < likelihoods.length; x++) {
|
||||
|
|
@ -98,7 +98,27 @@ public class GLFSingleCall extends GLFRecord {
|
|||
return likelihoods.length + super.getByteSize();
|
||||
}
|
||||
|
||||
/**
|
||||
* this method had to be abstracted so that the underlying records could set the minimum likelihood (ML) in the event
|
||||
* that the ML is above 255. In this case the records need to scale their likelihood values appropriately, and warn the user.
|
||||
*
|
||||
* @return a short of the minimum likelihood.
|
||||
*/
|
||||
@Override
|
||||
protected short calculateMinLikelihood() {
|
||||
if (minLikelihood > 255.0) {
|
||||
double scale = minLikelihood - 255.0;
|
||||
this.minLikelihood = 255.0;
|
||||
for (int x = 0; x < this.likelihoods.length; x++)
|
||||
this.likelihoods[x] = this.likelihoods[x] - scale;
|
||||
Utils.warnUser("GLFRecord: Locus " + this.getContig() + ":" + this.position + " had it's likelihood information scaled, the original likelihood values are unrecoverable");
|
||||
}
|
||||
return toCappedShort(minLikelihood);
|
||||
}
|
||||
|
||||
public double[] getLikelihoods() {
|
||||
return likelihoods;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class GLFVariableLengthCall extends GLFRecord {
|
|||
private int indelLen2 = 0;
|
||||
private final short indelSeq1[];
|
||||
private final short indelSeq2[];
|
||||
|
||||
private short minlikelihood;
|
||||
// our size, which is immutable, in bytes
|
||||
private final int size;
|
||||
|
||||
|
|
@ -57,7 +57,7 @@ public class GLFVariableLengthCall extends GLFRecord {
|
|||
* @param refBase the reference base
|
||||
* @param offset the location, as an offset from the previous glf record
|
||||
* @param offset the location, as an offset from the previous glf record
|
||||
|
||||
s
|
||||
* @param readDepth the read depth at the specified postion
|
||||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
|
||||
|
|
@ -78,7 +78,7 @@ public class GLFVariableLengthCall extends GLFRecord {
|
|||
final short indelSeq1[],
|
||||
int indelTwoLength,
|
||||
final short indelSeq2[]) {
|
||||
super(contig, refBase, offset, GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet})), readDepth, rmsMapQ);
|
||||
super(contig, refBase, offset, readDepth, rmsMapQ);
|
||||
this.lkHom1 = GLFRecord.toCappedShort(lkHom1);
|
||||
this.lkHom2 = GLFRecord.toCappedShort(lkHom2);
|
||||
this.lkHet = GLFRecord.toCappedShort(lkHet);
|
||||
|
|
@ -87,7 +87,7 @@ public class GLFVariableLengthCall extends GLFRecord {
|
|||
this.indelSeq1 = indelSeq1;
|
||||
this.indelSeq2 = indelSeq2;
|
||||
size = 16 + indelSeq1.length + indelSeq2.length;
|
||||
|
||||
this.minlikelihood = GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet}));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -120,6 +120,17 @@ public class GLFVariableLengthCall extends GLFRecord {
|
|||
return size + super.getByteSize();
|
||||
}
|
||||
|
||||
/**
|
||||
* this method had to be abstracted so that the underlying records could set the minimum likelihood (ML) in the event
|
||||
* that the ML is above 255. In this case the records need to scale the value appropriately, and warn the users.
|
||||
*
|
||||
* @return a short of the minimum likelihood.
|
||||
*/
|
||||
@Override
|
||||
protected short calculateMinLikelihood() {
|
||||
return minlikelihood;
|
||||
}
|
||||
|
||||
public short getLkHom1() {
|
||||
return lkHom1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testOtherFormat() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "GLF", "d5d1c5ea8d42712d5509cd1c9c38359d" );
|
||||
e.put( "GLF", "ddb1074b6f4a0fd1e15e4381476f1055" );
|
||||
e.put( "GELI_BINARY", "764a0fed1b3cf089230fd91f3be9c2df" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
|
|
|
|||
|
|
@ -93,6 +93,23 @@ public class GLFWriterTest extends BaseTest {
|
|||
return new FakeGenotype(location, genotypes[bestGenotype], ref, SIGNIFICANCE, lk);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a fake genotype with a minimum likelihood greater than 255
|
||||
* @param bestGenotype the best genotype, as an index into the array of values
|
||||
* @param location the location we're creating the genotype at
|
||||
* @param ref the reference base
|
||||
* @return a FakeGenotype (a fake genotype)
|
||||
*/
|
||||
private FakeGenotype createGreaterThan255MinimumGenotype(int bestGenotype, GenomeLoc location, char ref) {
|
||||
double lk[] = new double[GENOTYPE_COUNT];
|
||||
for (int x = 0; x < GENOTYPE_COUNT; x++) {
|
||||
lk[x] = -355.0 - (double) x; // they'll all be unique like a snowflake
|
||||
}
|
||||
lk[bestGenotype] = -256.0; // lets make the best way better
|
||||
|
||||
return new FakeGenotype(location, genotypes[bestGenotype], ref, SIGNIFICANCE, lk);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* can we actually write a file?
|
||||
|
|
@ -113,6 +130,24 @@ public class GLFWriterTest extends BaseTest {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* can we actually write a file?
|
||||
*/
|
||||
@Test
|
||||
public void basicWriteGreaterMinimumLikelihood() {
|
||||
File writeTo = new File("testGLF2.glf");
|
||||
writeTo.deleteOnExit();
|
||||
|
||||
rec = new GLFWriter(writeTo);
|
||||
((GLFWriter)rec).writeHeader(header);
|
||||
for (int x = 0; x < 5; x++) {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1);
|
||||
Genotype type = createGreaterThan255MinimumGenotype(x % 10, loc, 'A');
|
||||
rec.addGenotypeCall(type);
|
||||
}
|
||||
rec.close();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* write a bunch of fake records a GLF file, and then read it back from the
|
||||
|
|
|
|||
Loading…
Reference in New Issue