From 8d1d37302c9162964a121231e5ee7ea3f1f803d7 Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 20 Jan 2010 19:36:49 +0000 Subject: [PATCH] a quick change to GLF to keep as much precision in our likelihoods as long as possible, before we put it into byte space. Sanger was doing a diff at low coverage and noticed our calls didn't contain as much precision as theirs. Updated the MD5 for unified genotyper output. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2644 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/genotype/glf/GLFReader.java | 2 +- .../sting/utils/genotype/glf/GLFRecord.java | 10 ++++------ .../sting/utils/genotype/glf/GLFSingleCall.java | 7 +++++-- .../genotyper/UnifiedGenotyperIntegrationTest.java | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java index 29bcea641..44239d9d2 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java @@ -108,7 +108,7 @@ public class GLFReader implements Iterator { short rmsMapping = inputBinaryCodec.readUByte(); double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length]; for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) { - lkValues[x] = (inputBinaryCodec.readUByte() / GLFRecord.LIKELIHOOD_SCALE_FACTOR + min_lk); + lkValues[x] = ((double)inputBinaryCodec.readUByte() / GLFRecord.LIKELIHOOD_SCALE_FACTOR + (double)min_lk); } return new GLFSingleCall(referenceName, refBase, (int)(offset+currentLocation), readDepth, rmsMapping, lkValues); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java index 2677cae66..3089aae3c 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java @@ -172,13 +172,12 @@ public abstract class GLFRecord { * * @param chromosome the reference contig, as a String * @param base the reference base in the reference, as a REF_BASE - * @param offset the offset from the last call * @param position the distance from the beginning of the reference seq * @param minimumLikelihood it's minimum likelihood * @param readDepth the read depth at this position * @param rmsMapQ the root mean square of the mapping quality */ - private void validateInput(String chromosome, REF_BASE base, long position, short minimumLikelihood, int readDepth, short rmsMapQ) { + private void validateInput(String chromosome, REF_BASE base, long position, double minimumLikelihood, int readDepth, short rmsMapQ) { // add any validation to the contig string here this.contig = chromosome; @@ -189,11 +188,10 @@ public abstract class GLFRecord { } this.position = position; - if (minimumLikelihood > 255 || minimumLikelihood < 0) { throw new IllegalArgumentException("minimumLikelihood is out of bounds (0 to 0xffffffff) value passed = " + minimumLikelihood); } - this.minimumLikelihood = minimumLikelihood; + this.minimumLikelihood = GLFRecord.toCappedShort(minimumLikelihood); if (readDepth > 16777215 || readDepth < 0) { throw new IllegalArgumentException("readDepth is out of bounds (0 to 0xffffff) value passed = " + readDepth); @@ -259,14 +257,14 @@ public abstract class GLFRecord { * * @return */ - protected static short findMin(double vals[]) { + protected static double findMin(double vals[]) { if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in"); double min = vals[0]; for (double d : vals) if (d < min) min = d; - return GLFRecord.toCappedShort(min); + return min; } public REF_BASE getRefBase() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFSingleCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFSingleCall.java index 38a0ab147..e1f880d24 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFSingleCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFSingleCall.java @@ -37,7 +37,7 @@ import net.sf.samtools.util.BinaryCodec; */ public class GLFSingleCall extends GLFRecord { - // our likelihoods object + // our likelihoods array private double likelihoods[]; /** @@ -64,9 +64,12 @@ public class GLFSingleCall extends GLFRecord { void write(BinaryCodec out, GLFRecord lastRec) { super.write(out, lastRec); short[] adjusted = new short[likelihoods.length]; + // find the minimum likelihood as a double + double minLikelihood = GLFRecord.findMin(likelihoods); + // we want to scale our values for (int x = 0; x < likelihoods.length; x++) { - adjusted[x] = GLFRecord.toCappedShort(LIKELIHOOD_SCALE_FACTOR * (Math.round(likelihoods[x]) - this.minimumLikelihood)); + adjusted[x] = GLFRecord.toCappedShort(Math.round(LIKELIHOOD_SCALE_FACTOR * (likelihoods[x] - minLikelihood))); } try { for (short value : adjusted) { diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 5c773ccd5..c549fbe5f 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -129,7 +129,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOtherFormat() { HashMap e = new HashMap(); - e.put( "GLF", "cc1782d734e0a02fef00900a6db0e550" ); + e.put( "GLF", "d5d1c5ea8d42712d5509cd1c9c38359d" ); e.put( "GELI_BINARY", "764a0fed1b3cf089230fd91f3be9c2df" ); for ( Map.Entry entry : e.entrySet() ) {