changes for corrected GLF likelihood output, along with better tests
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1754 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2309d19f6f
commit
e885cc4b21
|
|
@ -18,7 +18,7 @@ import java.util.List;
|
|||
* The single sample implementation of the genotype interface, which contains
|
||||
* extra information for the various genotype outputs
|
||||
*/
|
||||
public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked {
|
||||
public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked {
|
||||
private final char mRefBase;
|
||||
private final GenotypeLikelihoods mGenotypeLikelihoods;
|
||||
|
||||
|
|
@ -279,11 +279,19 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
|||
* @return
|
||||
*/
|
||||
public double[] getLikelihoods() {
|
||||
// TODO: this is wrong, obviously, but we've kept it for now to stay backward compatible with previous calls
|
||||
return this.mGenotypeLikelihoods.getPosteriors();
|
||||
return this.mGenotypeLikelihoods.getLikelihoods();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the likelihood information for this
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public double[] getPosteriors() {
|
||||
return this.mGenotypeLikelihoods.getPosteriors();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
|||
|
||||
// a method to support getting the geli string, since the AlleleFrequencyEstimate is going away
|
||||
public String toGeliString (Genotype locus) {
|
||||
double likelihoods[];
|
||||
double posteriors[];
|
||||
int readDepth = -1;
|
||||
double nextVrsBest = 0;
|
||||
double nextVrsRef = 0;
|
||||
|
|
@ -121,17 +121,17 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
|||
readDepth = ((ReadBacked)locus).getReadCount();
|
||||
}
|
||||
if (!(locus instanceof GenotypesBacked)) {
|
||||
likelihoods = new double[10];
|
||||
Arrays.fill(likelihoods, 0.0);
|
||||
posteriors = new double[10];
|
||||
Arrays.fill(posteriors, 0.0);
|
||||
} else {
|
||||
likelihoods = ((LikelihoodsBacked) locus).getLikelihoods();
|
||||
posteriors = ((PosteriorsBacked) locus).getPosteriors();
|
||||
double[] lks;
|
||||
lks = Arrays.copyOf(likelihoods,likelihoods.length);
|
||||
lks = Arrays.copyOf(posteriors,posteriors.length);
|
||||
Arrays.sort(lks);
|
||||
nextVrsBest = lks[9] - lks[8];
|
||||
if (ref != 'X') {
|
||||
int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal());
|
||||
nextVrsRef = lks[9] - likelihoods[index];
|
||||
nextVrsRef = lks[9] - posteriors[index];
|
||||
}
|
||||
}
|
||||
// we have to calcuate our own
|
||||
|
|
@ -145,16 +145,16 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
|||
locus.getBases(),
|
||||
nextVrsRef,
|
||||
nextVrsBest,
|
||||
likelihoods[0],
|
||||
likelihoods[1],
|
||||
likelihoods[2],
|
||||
likelihoods[3],
|
||||
likelihoods[4],
|
||||
likelihoods[5],
|
||||
likelihoods[6],
|
||||
likelihoods[7],
|
||||
likelihoods[8],
|
||||
likelihoods[9]));
|
||||
posteriors[0],
|
||||
posteriors[1],
|
||||
posteriors[2],
|
||||
posteriors[3],
|
||||
posteriors[4],
|
||||
posteriors[5],
|
||||
posteriors[6],
|
||||
posteriors[7],
|
||||
posteriors[8],
|
||||
posteriors[9]));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface PosteriorsBacked
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public interface PosteriorsBacked {
|
||||
|
||||
/**
|
||||
* get the likelihood information for this
|
||||
* @return
|
||||
*/
|
||||
public double[] getPosteriors();
|
||||
}
|
||||
|
||||
|
|
@ -98,15 +98,15 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(Genotype locus) {
|
||||
double likelihoods[];
|
||||
double posteriors[];
|
||||
int readDepth = -1;
|
||||
double nextVrsBest = 0;
|
||||
double nextVrsRef = 0;
|
||||
if (!(locus instanceof GenotypesBacked)) {
|
||||
likelihoods = new double[10];
|
||||
Arrays.fill(likelihoods, Double.MIN_VALUE);
|
||||
posteriors = new double[10];
|
||||
Arrays.fill(posteriors, Double.MIN_VALUE);
|
||||
} else {
|
||||
likelihoods = ((LikelihoodsBacked) locus).getLikelihoods();
|
||||
posteriors = ((PosteriorsBacked) locus).getPosteriors();
|
||||
|
||||
}
|
||||
char ref = locus.getReference();
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
* @param locus the locus to add
|
||||
*/
|
||||
public void addGenotypeCall(Genotype locus) {
|
||||
double likelihoods[];
|
||||
double posteriors[];
|
||||
int readDepth = -1;
|
||||
double nextVrsBest = 0;
|
||||
double nextVrsRef = 0;
|
||||
|
|
@ -61,17 +61,17 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
readDepth = ((ReadBacked)locus).getReadCount();
|
||||
}
|
||||
if (!(locus instanceof GenotypesBacked)) {
|
||||
likelihoods = new double[10];
|
||||
Arrays.fill(likelihoods, 0.0);
|
||||
posteriors = new double[10];
|
||||
Arrays.fill(posteriors, 0.0);
|
||||
} else {
|
||||
likelihoods = ((LikelihoodsBacked) locus).getLikelihoods();
|
||||
posteriors = ((PosteriorsBacked) locus).getPosteriors();
|
||||
double[] lks;
|
||||
lks = Arrays.copyOf(likelihoods,likelihoods.length);
|
||||
lks = Arrays.copyOf(posteriors,posteriors.length);
|
||||
Arrays.sort(lks);
|
||||
nextVrsBest = lks[9] - lks[8];
|
||||
if (ref != 'X') {
|
||||
int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal());
|
||||
nextVrsRef = lks[9] - likelihoods[index];
|
||||
nextVrsRef = lks[9] - posteriors[index];
|
||||
}
|
||||
}
|
||||
// we have to calcuate our own
|
||||
|
|
@ -85,16 +85,16 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
locus.getBases(),
|
||||
nextVrsRef,
|
||||
nextVrsBest,
|
||||
likelihoods[0],
|
||||
likelihoods[1],
|
||||
likelihoods[2],
|
||||
likelihoods[3],
|
||||
likelihoods[4],
|
||||
likelihoods[5],
|
||||
likelihoods[6],
|
||||
likelihoods[7],
|
||||
likelihoods[8],
|
||||
likelihoods[9]));
|
||||
posteriors[0],
|
||||
posteriors[1],
|
||||
posteriors[2],
|
||||
posteriors[3],
|
||||
posteriors[4],
|
||||
posteriors[5],
|
||||
posteriors[6],
|
||||
posteriors[7],
|
||||
posteriors[8],
|
||||
posteriors[9]));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ public class GLFReader implements Iterator<GLFRecord> {
|
|||
short rmsMapping = inputBinaryCodec.readUByte();
|
||||
double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length];
|
||||
for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) {
|
||||
lkValues[x] = inputBinaryCodec.readUByte();
|
||||
lkValues[x] = (inputBinaryCodec.readUByte() / GLFRecord.LIKELIHOOD_SCALE_FACTOR + min_lk);
|
||||
}
|
||||
return new SinglePointCall(refBase, offset, readDepth, rmsMapping, lkValues);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
* field values.
|
||||
*/
|
||||
public abstract class GLFRecord {
|
||||
public final static double LIKELIHOOD_SCALE_FACTOR = 1;
|
||||
public final static double LIKELIHOOD_SCALE_FACTOR = 10;
|
||||
|
||||
|
||||
// fields common to all records
|
||||
|
|
@ -247,9 +247,9 @@ public abstract class GLFRecord {
|
|||
if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in");
|
||||
|
||||
double min = vals[0];
|
||||
for (double d : vals) {
|
||||
for (double d : vals)
|
||||
if (d < min) min = d;
|
||||
}
|
||||
|
||||
return GLFRecord.toCappedShort(min);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ public class GLFWriter implements GenotypeWriter {
|
|||
|
||||
// get likelihood information if available
|
||||
LikelihoodObject obj;
|
||||
if (locus instanceof GenotypesBacked) {
|
||||
if (locus instanceof LikelihoodsBacked) {
|
||||
obj = new LikelihoodObject(((LikelihoodsBacked) locus).getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
} else {
|
||||
double values[] = new double[10];
|
||||
|
|
@ -122,12 +122,14 @@ public class GLFWriter implements GenotypeWriter {
|
|||
}
|
||||
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG); // transform! ... to negitive log likelihoods
|
||||
|
||||
double rms = -1;
|
||||
double rms = 0;
|
||||
int readCount = 0;
|
||||
// calculate the RMS mapping qualities and the read depth
|
||||
if (locus instanceof ReadBacked) {
|
||||
rms = calculateRMS(((ReadBacked)locus).getReads());
|
||||
readCount = ((ReadBacked)locus).getReadCount();
|
||||
}
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)rms,ref,((ReadBacked)locus).getReadCount(),obj);
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)rms,ref,readCount,obj);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -62,10 +62,15 @@ public class SinglePointCall extends GLFRecord {
|
|||
*/
|
||||
void write(BinaryCodec out) {
|
||||
super.write(out);
|
||||
try {
|
||||
for (double likelihood : likelihoods) {
|
||||
out.writeUByte(GLFRecord.toCappedShort(likelihood));
|
||||
short[] adjusted = new short[likelihoods.length];
|
||||
// we want to scale our values
|
||||
for (int x = 0; x < likelihoods.length; x++) {
|
||||
adjusted[x] = GLFRecord.toCappedShort(LIKELIHOOD_SCALE_FACTOR * (Math.round(likelihoods[x]) - this.minimumLikelihood));
|
||||
}
|
||||
try {
|
||||
for (short value : adjusted) {
|
||||
out.writeUByte(value);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,12 +1,23 @@
|
|||
package org.broadinstitute.sting.utils.genotype.glf;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodsBacked;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -47,51 +58,142 @@ public class GLFWriterTest extends BaseTest {
|
|||
private final String header = "";
|
||||
private final String referenceSequenceName = "chr1";
|
||||
private final int refLength = 1000;
|
||||
File writeTo = new File("testGLF.glf");
|
||||
|
||||
private static final int GENOTYPE_COUNT = 10;
|
||||
private GenotypeWriter rec;
|
||||
protected static final String[] genotypes = {"AA", "AC", "AG", "AT", "CC", "CG", "CT", "GG", "GT", "TT"};
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
protected final static double SIGNIFICANCE = 5.1;
|
||||
|
||||
@Before
|
||||
public void before() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* make a fake snp
|
||||
*
|
||||
* @param genotype the genotype, 0-15 (AA, AT, AA, ... GG)
|
||||
*/
|
||||
private void addFakeSNP( int genotype, int location ) {
|
||||
LikelihoodObject obj = new LikelihoodObject();
|
||||
obj.setLikelihood(LikelihoodObject.GENOTYPE.values()[genotype], 128);
|
||||
int ran = (int) Math.floor(Math.random() * 4.0);
|
||||
char let = 'A';
|
||||
switch (ran) {
|
||||
case 0:
|
||||
let = 'T';
|
||||
break;
|
||||
case 1:
|
||||
let = 'C';
|
||||
break;
|
||||
case 2:
|
||||
let = 'G';
|
||||
break;
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
/*try {
|
||||
rec.addPointCall(let, location, 10, (short) 10, obj);
|
||||
} catch (IllegalArgumentException e) {
|
||||
e.printStackTrace();
|
||||
} */
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
}
|
||||
|
||||
|
||||
private FakeGenotype createGenotype(int bestGenotype, GenomeLoc location, char ref) {
|
||||
double lk[] = new double[GENOTYPE_COUNT];
|
||||
for (int x = 0; x < GENOTYPE_COUNT; x++) {
|
||||
lk[x] = -15.0 - (double) x; // they'll all be unique like a snowflake
|
||||
}
|
||||
lk[bestGenotype] = -10.0; // lets make the best way better
|
||||
|
||||
return new FakeGenotype(location, genotypes[bestGenotype], ref, SIGNIFICANCE, lk);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void basicWrite() {
|
||||
File writeTo = new File("testGLF.glf");
|
||||
writeTo.deleteOnExit();
|
||||
|
||||
rec = new GLFWriter(header, writeTo);
|
||||
for (int x = 0; x < 100; x++) {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1);
|
||||
Genotype type = createGenotype(x % 10, loc, 'A');
|
||||
rec.addGenotypeCall(type);
|
||||
}
|
||||
rec.close();
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void basicWrite() {
|
||||
public void basicWriteThenRead() {
|
||||
File writeTo = new File("testGLF2.glf");
|
||||
//writeTo.deleteOnExit();
|
||||
List<FakeGenotype> types = new ArrayList<FakeGenotype>();
|
||||
rec = new GLFWriter(header, writeTo);
|
||||
for (int x = 0; x < 100; x++) {
|
||||
addFakeSNP((int) Math.round(Math.random() * 9), 1);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, x + 1);
|
||||
FakeGenotype type = createGenotype(x % 10, loc, 'A');
|
||||
types.add(type);
|
||||
rec.addGenotypeCall(type);
|
||||
}
|
||||
rec.close();
|
||||
GLFReader reader = new GLFReader(writeTo);
|
||||
int count = 0;
|
||||
while (reader.hasNext()) {
|
||||
GLFRecord rec = reader.next();
|
||||
Assert.assertTrue(types.get(count).compareTo(FakeGenotype.toFakeGenotype((SinglePointCall) rec, reader.getReferenceName(), reader.getCurrentLocation())) == 0);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
class FakeGenotype extends BasicGenotype implements LikelihoodsBacked, Comparable<FakeGenotype> {
|
||||
|
||||
private double[] likelihoods;
|
||||
|
||||
/**
|
||||
* create a basic genotype, given the following fields
|
||||
*
|
||||
* @param location the genomic location
|
||||
* @param genotype the genotype, as a string, where ploidy = string.length
|
||||
* @param ref the reference base as a char
|
||||
* @param negLog10PError the confidence score
|
||||
*/
|
||||
public FakeGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError, double likelihoods[]) {
|
||||
super(location, genotype, ref, negLog10PError);
|
||||
this.likelihoods = likelihoods;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the likelihood information for this
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public double[] getLikelihoods() {
|
||||
return likelihoods;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(FakeGenotype that) {
|
||||
if (this.getLocation().compareTo(that.getLocation()) != 0) {
|
||||
System.err.println("Location's aren't equal; this = " + this.getLocation() + " that = " + that.getLocation());
|
||||
return this.getLocation().compareTo(that.getLocation());
|
||||
}
|
||||
if (!this.getBases().equals(that.getBases())) {
|
||||
System.err.println("getBases's aren't equal; this = " + this.getBases() + " that = " + that.getBases());
|
||||
return -1;
|
||||
}
|
||||
for (int x = 0; x < this.likelihoods.length; x++) {
|
||||
if (this.likelihoods[x] != that.getLikelihoods()[x]) {
|
||||
System.err.println("likelihoods' aren't equal; this = " + this.likelihoods[x] + " that = " + that.getLikelihoods()[x]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public static FakeGenotype toFakeGenotype(SinglePointCall record, String contig, int postition) {
|
||||
double likelihoods[] = record.getLikelihoods();
|
||||
char ref = record.getRefBase().toChar();
|
||||
double significance = GLFWriterTest.SIGNIFICANCE;
|
||||
int minIndex = 0;
|
||||
for (int i = 0; i < likelihoods.length; i++) {
|
||||
if (likelihoods[i] < likelihoods[minIndex]) minIndex = i;
|
||||
}
|
||||
for (int i = 0; i < likelihoods.length; i++) {
|
||||
likelihoods[i] = likelihoods[i] * -1;
|
||||
}
|
||||
|
||||
String genotype = GLFWriterTest.genotypes[minIndex];
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(contig, postition);
|
||||
return new FakeGenotype(loc, genotype, ref, significance, likelihoods);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue