diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java index be6336c8d..74d51053b 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java @@ -18,7 +18,7 @@ import java.util.List; * The single sample implementation of the genotype interface, which contains * extra information for the various genotype outputs */ -public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked { +public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked { private final char mRefBase; private final GenotypeLikelihoods mGenotypeLikelihoods; @@ -279,11 +279,19 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li * @return */ public double[] getLikelihoods() { - // TODO: this is wrong, obviously, but we've kept it for now to stay backward compatible with previous calls - return this.mGenotypeLikelihoods.getPosteriors(); + return this.mGenotypeLikelihoods.getLikelihoods(); } + /** + * get the likelihood information for this + * + * @return + */ + @Override + public double[] getPosteriors() { + return this.mGenotypeLikelihoods.getPosteriors(); + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java index ebd3cfa95..5d5c3ea04 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java @@ -110,7 +110,7 @@ public class CoverageEvalWalker extends LocusWalker, String> { // a method to support getting the geli string, since the AlleleFrequencyEstimate is going away public String toGeliString (Genotype locus) { - double likelihoods[]; + double posteriors[]; int readDepth = -1; double nextVrsBest = 0; double nextVrsRef = 0; @@ -121,17 +121,17 @@ public class CoverageEvalWalker extends LocusWalker, String> { readDepth = ((ReadBacked)locus).getReadCount(); } if (!(locus instanceof GenotypesBacked)) { - likelihoods = new double[10]; - Arrays.fill(likelihoods, 0.0); + posteriors = new double[10]; + Arrays.fill(posteriors, 0.0); } else { - likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + posteriors = ((PosteriorsBacked) locus).getPosteriors(); double[] lks; - lks = Arrays.copyOf(likelihoods,likelihoods.length); + lks = Arrays.copyOf(posteriors,posteriors.length); Arrays.sort(lks); nextVrsBest = lks[9] - lks[8]; if (ref != 'X') { int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal()); - nextVrsRef = lks[9] - likelihoods[index]; + nextVrsRef = lks[9] - posteriors[index]; } } // we have to calcuate our own @@ -145,16 +145,16 @@ public class CoverageEvalWalker extends LocusWalker, String> { locus.getBases(), nextVrsRef, nextVrsBest, - likelihoods[0], - likelihoods[1], - likelihoods[2], - likelihoods[3], - likelihoods[4], - likelihoods[5], - likelihoods[6], - likelihoods[7], - likelihoods[8], - likelihoods[9])); + posteriors[0], + posteriors[1], + posteriors[2], + posteriors[3], + posteriors[4], + posteriors[5], + posteriors[6], + posteriors[7], + posteriors[8], + posteriors[9])); } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java new file mode 100644 index 000000000..3fa9b59ff --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java @@ -0,0 +1,18 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author aaron + *

+ * Interface PosteriorsBacked + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public interface PosteriorsBacked { + + /** + * get the likelihood information for this + * @return + */ + public double[] getPosteriors(); +} + diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 0155c0308..2a6a26750 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -98,15 +98,15 @@ public class GeliAdapter implements GenotypeWriter { */ @Override public void addGenotypeCall(Genotype locus) { - double likelihoods[]; + double posteriors[]; int readDepth = -1; double nextVrsBest = 0; double nextVrsRef = 0; if (!(locus instanceof GenotypesBacked)) { - likelihoods = new double[10]; - Arrays.fill(likelihoods, Double.MIN_VALUE); + posteriors = new double[10]; + Arrays.fill(posteriors, Double.MIN_VALUE); } else { - likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + posteriors = ((PosteriorsBacked) locus).getPosteriors(); } char ref = locus.getReference(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index eb562fda0..cdb9b766c 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -50,7 +50,7 @@ public class GeliTextWriter implements GenotypeWriter { * @param locus the locus to add */ public void addGenotypeCall(Genotype locus) { - double likelihoods[]; + double posteriors[]; int readDepth = -1; double nextVrsBest = 0; double nextVrsRef = 0; @@ -61,17 +61,17 @@ public class GeliTextWriter implements GenotypeWriter { readDepth = ((ReadBacked)locus).getReadCount(); } if (!(locus instanceof GenotypesBacked)) { - likelihoods = new double[10]; - Arrays.fill(likelihoods, 0.0); + posteriors = new double[10]; + Arrays.fill(posteriors, 0.0); } else { - likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + posteriors = ((PosteriorsBacked) locus).getPosteriors(); double[] lks; - lks = Arrays.copyOf(likelihoods,likelihoods.length); + lks = Arrays.copyOf(posteriors,posteriors.length); Arrays.sort(lks); nextVrsBest = lks[9] - lks[8]; if (ref != 'X') { int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal()); - nextVrsRef = lks[9] - likelihoods[index]; + nextVrsRef = lks[9] - posteriors[index]; } } // we have to calcuate our own @@ -85,16 +85,16 @@ public class GeliTextWriter implements GenotypeWriter { locus.getBases(), nextVrsRef, nextVrsBest, - likelihoods[0], - likelihoods[1], - likelihoods[2], - likelihoods[3], - likelihoods[4], - likelihoods[5], - likelihoods[6], - likelihoods[7], - likelihoods[8], - likelihoods[9])); + posteriors[0], + posteriors[1], + posteriors[2], + posteriors[3], + posteriors[4], + posteriors[5], + posteriors[6], + posteriors[7], + posteriors[8], + posteriors[9])); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java index 219c34aec..434352850 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFReader.java @@ -108,7 +108,7 @@ public class GLFReader implements Iterator { short rmsMapping = inputBinaryCodec.readUByte(); double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length]; for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) { - lkValues[x] = inputBinaryCodec.readUByte(); + lkValues[x] = (inputBinaryCodec.readUByte() / GLFRecord.LIKELIHOOD_SCALE_FACTOR + min_lk); } return new SinglePointCall(refBase, offset, readDepth, rmsMapping, lkValues); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java index 6a1fed8cb..1ae61fbc8 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java @@ -40,7 +40,7 @@ import org.broadinstitute.sting.utils.StingException; * field values. */ public abstract class GLFRecord { - public final static double LIKELIHOOD_SCALE_FACTOR = 1; + public final static double LIKELIHOOD_SCALE_FACTOR = 10; // fields common to all records @@ -247,9 +247,9 @@ public abstract class GLFRecord { if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in"); double min = vals[0]; - for (double d : vals) { + for (double d : vals) if (d < min) min = d; - } + return GLFRecord.toCappedShort(min); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 4deee697b..9f6e096c8 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -113,7 +113,7 @@ public class GLFWriter implements GenotypeWriter { // get likelihood information if available LikelihoodObject obj; - if (locus instanceof GenotypesBacked) { + if (locus instanceof LikelihoodsBacked) { obj = new LikelihoodObject(((LikelihoodsBacked) locus).getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); } else { double values[] = new double[10]; @@ -122,12 +122,14 @@ public class GLFWriter implements GenotypeWriter { } obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG); // transform! ... to negitive log likelihoods - double rms = -1; + double rms = 0; + int readCount = 0; // calculate the RMS mapping qualities and the read depth if (locus instanceof ReadBacked) { rms = calculateRMS(((ReadBacked)locus).getReads()); + readCount = ((ReadBacked)locus).getReadCount(); } - this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)rms,ref,((ReadBacked)locus).getReadCount(),obj); + this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)rms,ref,readCount,obj); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java index 03d9d34db..83dac4998 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java @@ -62,10 +62,15 @@ public class SinglePointCall extends GLFRecord { */ void write(BinaryCodec out) { super.write(out); - try { - for (double likelihood : likelihoods) { - out.writeUByte(GLFRecord.toCappedShort(likelihood)); + short[] adjusted = new short[likelihoods.length]; + // we want to scale our values + for (int x = 0; x < likelihoods.length; x++) { + adjusted[x] = GLFRecord.toCappedShort(LIKELIHOOD_SCALE_FACTOR * (Math.round(likelihoods[x]) - this.minimumLikelihood)); } + try { + for (short value : adjusted) { + out.writeUByte(value); + } } catch (Exception e) { e.printStackTrace(); } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java index ecd1df64b..62d4c0200 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java @@ -1,12 +1,23 @@ package org.broadinstitute.sting.utils.genotype.glf; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.genotype.BasicGenotype; +import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.broadinstitute.sting.utils.genotype.LikelihoodsBacked; +import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import java.io.File; +import java.io.FileNotFoundException; +import java.util.ArrayList; +import java.util.List; /* @@ -47,51 +58,142 @@ public class GLFWriterTest extends BaseTest { private final String header = ""; private final String referenceSequenceName = "chr1"; private final int refLength = 1000; - File writeTo = new File("testGLF.glf"); - + private static final int GENOTYPE_COUNT = 10; private GenotypeWriter rec; + protected static final String[] genotypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"}; + private static IndexedFastaSequenceFile seq; + protected final static double SIGNIFICANCE = 5.1; @Before public void before() { } - /** - * make a fake snp - * - * @param genotype the genotype, 0-15 (AA, AT, AA, ... GG) - */ - private void addFakeSNP( int genotype, int location ) { - LikelihoodObject obj = new LikelihoodObject(); - obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype], 128); - int ran = (int) Math.floor(Math.random() * 4.0); - char let = 'A'; - switch (ran) { - case 0: - let = 'T'; - break; - case 1: - let = 'C'; - break; - case 2: - let = 'G'; - break; + @BeforeClass + public static void beforeTests() { + try { + seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta")); + } catch (FileNotFoundException e) { + throw new StingException("unable to load the sequence dictionary"); } - /*try { - rec.addPointCall(let, location, 10, (short) 10, obj); - } catch (IllegalArgumentException e) { - e.printStackTrace(); - } */ + GenomeLocParser.setupRefContigOrdering(seq); + + } + + + private FakeGenotype createGenotype(int bestGenotype, GenomeLoc location, char ref) { + double lk[] = new double[GENOTYPE_COUNT]; + for (int x = 0; x < GENOTYPE_COUNT; x++) { + lk[x] = -15.0 - (double) x; // they'll all be unique like a snowflake + } + lk[bestGenotype] = -10.0; // lets make the best way better + + return new FakeGenotype(location, genotypes[bestGenotype], ref, SIGNIFICANCE, lk); + } + + @Test + public void basicWrite() { + File writeTo = new File("testGLF.glf"); + writeTo.deleteOnExit(); + + rec = new GLFWriter(header, writeTo); + for (int x = 0; x < 100; x++) { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); + Genotype type = createGenotype(x % 10, loc, 'A'); + rec.addGenotypeCall(type); + } + rec.close(); + } @Test - public void basicWrite() { + public void basicWriteThenRead() { + File writeTo = new File("testGLF2.glf"); + //writeTo.deleteOnExit(); + List types = new ArrayList(); rec = new GLFWriter(header, writeTo); for (int x = 0; x < 100; x++) { - addFakeSNP((int) Math.round(Math.random() * 9), 1); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1); + FakeGenotype type = createGenotype(x % 10, loc, 'A'); + types.add(type); + rec.addGenotypeCall(type); } rec.close(); + GLFReader reader = new GLFReader(writeTo); + int count = 0; + while (reader.hasNext()) { + GLFRecord rec = reader.next(); + Assert.assertTrue(types.get(count).compareTo(FakeGenotype.toFakeGenotype((SinglePointCall) rec, reader.getReferenceName(), reader.getCurrentLocation())) == 0); + count++; + } } + } + +class FakeGenotype extends BasicGenotype implements LikelihoodsBacked, Comparable { + + private double[] likelihoods; + + /** + * create a basic genotype, given the following fields + * + * @param location the genomic location + * @param genotype the genotype, as a string, where ploidy = string.length + * @param ref the reference base as a char + * @param negLog10PError the confidence score + */ + public FakeGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError, double likelihoods[]) { + super(location, genotype, ref, negLog10PError); + this.likelihoods = likelihoods; + } + + /** + * get the likelihood information for this + * + * @return + */ + @Override + public double[] getLikelihoods() { + return likelihoods; + } + + + @Override + public int compareTo(FakeGenotype that) { + if (this.getLocation().compareTo(that.getLocation()) != 0) { + System.err.println("Location's aren't equal; this = " + this.getLocation() + " that = " + that.getLocation()); + return this.getLocation().compareTo(that.getLocation()); + } + if (!this.getBases().equals(that.getBases())) { + System.err.println("getBases's aren't equal; this = " + this.getBases() + " that = " + that.getBases()); + return -1; + } + for (int x = 0; x < this.likelihoods.length; x++) { + if (this.likelihoods[x] != that.getLikelihoods()[x]) { + System.err.println("likelihoods' aren't equal; this = " + this.likelihoods[x] + " that = " + that.getLikelihoods()[x]); + return -1; + } + } + return 0; + } + + public static FakeGenotype toFakeGenotype(SinglePointCall record, String contig, int postition) { + double likelihoods[] = record.getLikelihoods(); + char ref = record.getRefBase().toChar(); + double significance = GLFWriterTest.SIGNIFICANCE; + int minIndex = 0; + for (int i = 0; i < likelihoods.length; i++) { + if (likelihoods[i] < likelihoods[minIndex]) minIndex = i; + } + for (int i = 0; i < likelihoods.length; i++) { + likelihoods[i] = likelihoods[i] * -1; + } + + String genotype = GLFWriterTest.genotypes[minIndex]; + GenomeLoc loc = GenomeLocParser.createGenomeLoc(contig, postition); + return new FakeGenotype(loc, genotype, ref, significance, likelihoods); + } + +} \ No newline at end of file