a quick change to GLF to keep as much precision in our likelihoods as long as possible, before we put it into byte space. Sanger was doing a diff at low coverage and noticed our calls didn't contain as much precision as theirs. Updated the MD5 for unified genotyper output.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2644 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-01-20 19:36:49 +00:00
parent 908d399670
commit 8d1d37302c
4 changed files with 11 additions and 10 deletions

View File

@ -108,7 +108,7 @@ public class GLFReader implements Iterator<GLFRecord> {
short rmsMapping = inputBinaryCodec.readUByte(); short rmsMapping = inputBinaryCodec.readUByte();
double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length]; double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length];
for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) { for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) {
lkValues[x] = (inputBinaryCodec.readUByte() / GLFRecord.LIKELIHOOD_SCALE_FACTOR + min_lk); lkValues[x] = ((double)inputBinaryCodec.readUByte() / GLFRecord.LIKELIHOOD_SCALE_FACTOR + (double)min_lk);
} }
return new GLFSingleCall(referenceName, refBase, (int)(offset+currentLocation), readDepth, rmsMapping, lkValues); return new GLFSingleCall(referenceName, refBase, (int)(offset+currentLocation), readDepth, rmsMapping, lkValues);
} }

View File

@ -172,13 +172,12 @@ public abstract class GLFRecord {
* *
* @param chromosome the reference contig, as a String * @param chromosome the reference contig, as a String
* @param base the reference base in the reference, as a REF_BASE * @param base the reference base in the reference, as a REF_BASE
* @param offset the offset from the last call
* @param position the distance from the beginning of the reference seq * @param position the distance from the beginning of the reference seq
* @param minimumLikelihood it's minimum likelihood * @param minimumLikelihood it's minimum likelihood
* @param readDepth the read depth at this position * @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
*/ */
private void validateInput(String chromosome, REF_BASE base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) { private void validateInput(String chromosome, REF_BASE base, long position, double minimumLikelihood, int readDepth, short rmsMapQ) {
// add any validation to the contig string here // add any validation to the contig string here
this.contig = chromosome; this.contig = chromosome;
@ -189,11 +188,10 @@ public abstract class GLFRecord {
} }
this.position = position; this.position = position;
if (minimumLikelihood > 255 || minimumLikelihood < 0) { if (minimumLikelihood > 255 || minimumLikelihood < 0) {
throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood); throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood);
} }
this.minimumLikelihood = minimumLikelihood; this.minimumLikelihood = GLFRecord.toCappedShort(minimumLikelihood);
if (readDepth > 16777215 || readDepth < 0) { if (readDepth > 16777215 || readDepth < 0) {
throw new IllegalArgumentException("readDepth is out of bounds (0 to 0xffffff) value passed = " + readDepth); throw new IllegalArgumentException("readDepth is out of bounds (0 to 0xffffff) value passed = " + readDepth);
@ -259,14 +257,14 @@ public abstract class GLFRecord {
* *
* @return * @return
*/ */
protected static short findMin(double vals[]) { protected static double findMin(double vals[]) {
if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in"); if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in");
double min = vals[0]; double min = vals[0];
for (double d : vals) for (double d : vals)
if (d < min) min = d; if (d < min) min = d;
return GLFRecord.toCappedShort(min); return min;
} }
public REF_BASE getRefBase() { public REF_BASE getRefBase() {

View File

@ -37,7 +37,7 @@ import net.sf.samtools.util.BinaryCodec;
*/ */
public class GLFSingleCall extends GLFRecord { public class GLFSingleCall extends GLFRecord {
// our likelihoods object // our likelihoods array
private double likelihoods[]; private double likelihoods[];
/** /**
@ -64,9 +64,12 @@ public class GLFSingleCall extends GLFRecord {
void write(BinaryCodec out, GLFRecord lastRec) { void write(BinaryCodec out, GLFRecord lastRec) {
super.write(out, lastRec); super.write(out, lastRec);
short[] adjusted = new short[likelihoods.length]; short[] adjusted = new short[likelihoods.length];
// find the minimum likelihood as a double
double minLikelihood = GLFRecord.findMin(likelihoods);
// we want to scale our values // we want to scale our values
for (int x = 0; x < likelihoods.length; x++) { for (int x = 0; x < likelihoods.length; x++) {
adjusted[x] = GLFRecord.toCappedShort(LIKELIHOOD_SCALE_FACTOR * (Math.round(likelihoods[x]) - this.minimumLikelihood)); adjusted[x] = GLFRecord.toCappedShort(Math.round(LIKELIHOOD_SCALE_FACTOR * (likelihoods[x] - minLikelihood)));
} }
try { try {
for (short value : adjusted) { for (short value : adjusted) {

View File

@ -129,7 +129,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testOtherFormat() { public void testOtherFormat() {
HashMap<String, String> e = new HashMap<String, String>(); HashMap<String, String> e = new HashMap<String, String>();
e.put( "GLF", "cc1782d734e0a02fef00900a6db0e550" ); e.put( "GLF", "d5d1c5ea8d42712d5509cd1c9c38359d" );
e.put( "GELI_BINARY", "764a0fed1b3cf089230fd91f3be9c2df" ); e.put( "GELI_BINARY", "764a0fed1b3cf089230fd91f3be9c2df" );
for ( Map.Entry<String, String> entry : e.entrySet() ) { for ( Map.Entry<String, String> entry : e.entrySet() ) {