GLF reader and writer check in.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1202 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-08 23:06:37 +00:00
parent c8fcecbc6f
commit 8ee5c7de8e
9 changed files with 314 additions and 178 deletions

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.utils.genotype; package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
@ -35,9 +34,9 @@ import java.io.File;
/** /**
* @author aaron * @author aaron
* @version 1.0 * @version 1.0
* * <p/>
* Class GeliAdapter * Class GeliAdapter
* Adapts the Geli file writer to the Genotype writer interface * Adapts the Geli file writer to the Genotype writer interface
*/ */
public class GeliAdapter implements GenotypeWriter { public class GeliAdapter implements GenotypeWriter {
@ -47,30 +46,41 @@ public class GeliAdapter implements GenotypeWriter {
/** /**
* wrap a GeliFileWriter in the Genotype writer interface * wrap a GeliFileWriter in the Genotype writer interface
* @param writeTo where to write to *
* @param writeTo where to write to
* @param fileHeader the file header to write out * @param fileHeader the file header to write out
*/ */
public GeliAdapter( File writeTo, final SAMFileHeader fileHeader ) { public GeliAdapter(File writeTo, final SAMFileHeader fileHeader) {
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo,fileHeader); this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
} }
/** /**
* add a single point genotype call to the * add a single point genotype call to the
* *
* @param contigName the name of the contig you're calling in
* @param contigLength the contig length
* @param position the position on the contig * @param position the position on the contig
* @param referenceBase the reference base * @param referenceBase the reference base
* @param readDepth the read depth at the specified position * @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles * @param likelihoods the likelihoods of each of the possible alleles
*/ */
@Override @Override
public void addGenotypeCall( int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods ) { public void addGenotypeCall(String contigName,
writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte)referenceBase)); int contigLength,
int position,
float rmsMapQuals,
char referenceBase,
int readDepth,
LikelihoodObject likelihoods) {
writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase));
} }
/** /**
* add a variable length call to the genotyper * add a variable length call to the genotyper
* *
* @param contigName the name of the contig you're calling in
* @param contigLength the contig length
* @param position the position on the genome * @param position the position on the genome
* @param rmsMapQuals the root mean square of the mapping qualities * @param rmsMapQuals the root mean square of the mapping qualities
* @param readDepth the read depth * @param readDepth the read depth
@ -80,7 +90,7 @@ public class GeliAdapter implements GenotypeWriter {
* @param hetLikelihood the heterozygous likelihood * @param hetLikelihood the heterozygous likelihood
*/ */
@Override @Override
public void addVariableLengthCall( int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood ) { public void addVariableLengthCall(String contigName, int contigLength, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
throw new UnsupportedOperationException("Geli format does not support variable length allele calls"); throw new UnsupportedOperationException("Geli format does not support variable length allele calls");
} }
@ -91,7 +101,7 @@ public class GeliAdapter implements GenotypeWriter {
* @param readDepth * @param readDepth
*/ */
@Override @Override
public void addNoCall( int position, int readDepth ) { public void addNoCall(int position, int readDepth) {
throw new UnsupportedOperationException("Geli format does not support no-calls"); throw new UnsupportedOperationException("Geli format does not support no-calls");
} }

View File

@ -1,8 +1,5 @@
package org.broadinstitute.sting.utils.genotype; package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
* *
@ -29,22 +26,26 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
*/ */
/** /**
* * @author aaron
* @author aaron * <p/>
* * Class Genotype
* Class Genotype * <p/>
* * The interface for storing genotype calls.
* The interface for storing genotype calls.
*/ */
public interface GenotypeWriter { public interface GenotypeWriter {
/** /**
* add a single point genotype call to the * add a single point genotype call to the
* @param position the position on the contig *
* @param contigName the name of the contig you're calling in
* @param contigLength the contig length
* @param position the position on the contig
* @param referenceBase the reference base * @param referenceBase the reference base
* @param readDepth the read depth at the specified position * @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles * @param likelihoods the likelihoods of each of the possible alleles
*/ */
public void addGenotypeCall(int position, public void addGenotypeCall(String contigName,
int contigLength,
int position,
float rmsMapQuals, float rmsMapQuals,
char referenceBase, char referenceBase,
int readDepth, int readDepth,
@ -52,15 +53,20 @@ public interface GenotypeWriter {
/** /**
* add a variable length call to the genotyper * add a variable length call to the genotyper
* @param position the position on the genome *
* @param rmsMapQuals the root mean square of the mapping qualities * @param contigName the name of the contig you're calling in
* @param readDepth the read depth * @param contigLength the contig length
* @param refBase the reference base * @param position the position on the genome
* @param firstHomZyg the first homozygous indel * @param rmsMapQuals the root mean square of the mapping qualities
* @param secondHomZyg the second homozygous indel (if present, null if not) * @param readDepth the read depth
* @param refBase the reference base
* @param firstHomZyg the first homozygous indel
* @param secondHomZyg the second homozygous indel (if present, null if not)
* @param hetLikelihood the heterozygous likelihood * @param hetLikelihood the heterozygous likelihood
*/ */
public void addVariableLengthCall(int position, public void addVariableLengthCall(String contigName,
int contigLength,
int position,
float rmsMapQuals, float rmsMapQuals,
int readDepth, int readDepth,
char refBase, char refBase,
@ -70,15 +76,14 @@ public interface GenotypeWriter {
/** /**
* add a no call to the genotype file, if supported. * add a no call to the genotype file, if supported.
*
* @param position * @param position
* @param readDepth * @param readDepth
*/ */
public void addNoCall(int position, public void addNoCall(int position,
int readDepth); int readDepth);
/** /** finish writing, closing any open files. */
* finish writing, closing any open files.
*/
public void close(); public void close();
} }

View File

@ -1,15 +1,15 @@
package org.broadinstitute.sting.utils.genotype.glf; package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream; import net.sf.samtools.util.BlockCompressedInputStream;
import java.io.File;
import java.io.DataOutputStream;
import java.util.Iterator;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import java.io.DataInputStream;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
* *
@ -35,9 +35,7 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
/** /** an object for reading in GLF files */
* an object for reading in GLF files
*/
public class GLFReader implements Iterator<GLFRecord> { public class GLFReader implements Iterator<GLFRecord> {
// our next record // our next record
@ -58,62 +56,87 @@ public class GLFReader implements Iterator<GLFRecord> {
// reference length // reference length
private int referenceLength; private int referenceLength;
GLFReader( File readFrom ) { /**
inputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(readFrom))); * create a glf reader
*
* @param readFrom the file to read from
*/
GLFReader(File readFrom) {
try {
inputBinaryCodec = new BinaryCodec(new DataInputStream(new BlockCompressedInputStream(readFrom)));
} catch (IOException e) {
throw new StingException("Unable to open " + readFrom.getName(), e);
}
inputBinaryCodec.setInputFileName(readFrom.getName()); inputBinaryCodec.setInputFileName(readFrom.getName());
// first verify that it's a valid GLF // first verify that it's a valid GLF
for (short s: glfMagic) { for (short s : glfMagic) {
if (inputBinaryCodec.readUByte() != s) throw new StingException("Verification of GLF format failed: magic string doesn't match)"); if (inputBinaryCodec.readUByte() != s)
throw new StingException("Verification of GLF format failed: magic string doesn't match)");
} }
// get the header string // get the header string
headerStr = inputBinaryCodec.readLengthAndString(false); headerStr = inputBinaryCodec.readLengthAndString(false);
// get the reference name if (advanceContig()) {
referenceName = inputBinaryCodec.readLengthAndString(true); // setup the next record
next();
}
// get the reference length - this may be a problem storing an unsigned int into a signed int. but screw it.
referenceLength = (int)inputBinaryCodec.readUInt();
// get the next record
nextRecord = next();
} }
/**
* read in a single point call
*
* @param refBase the reference base
* @param inputBinaryCodec the binary codec
*
* @return a single point call object
*/
private SinglePointCall generateSPC(char refBase, BinaryCodec inputBinaryCodec) { private SinglePointCall generateSPC(char refBase, BinaryCodec inputBinaryCodec) {
int offset = (int)inputBinaryCodec.readUInt(); int offset = (int) inputBinaryCodec.readUInt();
long depth = inputBinaryCodec.readUInt(); long depth = inputBinaryCodec.readUInt();
short min_lk = (short)((depth & 0x00000000ff000000) >> 24); short min_lk = (short) ((depth & 0x00000000ff000000) >> 24);
int readDepth = (int)(depth & 0x0000000000ffffff); int readDepth = (int) (depth & 0x0000000000ffffff);
short rmsMapping = inputBinaryCodec.readUByte(); short rmsMapping = inputBinaryCodec.readUByte();
double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length]; double[] lkValues = new double[LikelihoodObject.GENOTYPE.values().length];
for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) { for (int x = 0; x < LikelihoodObject.GENOTYPE.values().length; x++) {
lkValues[x] = inputBinaryCodec.readUByte(); lkValues[x] = inputBinaryCodec.readUByte();
} }
return new SinglePointCall(refBase,offset,readDepth,rmsMapping,lkValues); return new SinglePointCall(refBase, offset, readDepth, rmsMapping, lkValues);
} }
/**
* read in a variable length call, and generate a VLC object from the data
*
* @param refBase the reference base
* @param inputBinaryCodec the input codex
*
* @return a VariableLengthCall object
*/
private VariableLengthCall generateVLC(char refBase, BinaryCodec inputBinaryCodec) { private VariableLengthCall generateVLC(char refBase, BinaryCodec inputBinaryCodec) {
int offset = (int)inputBinaryCodec.readUInt(); int offset = (int) inputBinaryCodec.readUInt();
int depth = (int)inputBinaryCodec.readUInt(); int depth = (int) inputBinaryCodec.readUInt();
short min_lk = (short)((depth & 0x00000000ff000000) >> 24); short min_lk = (short) ((depth & 0x00000000ff000000) >> 24);
int readDepth = (depth & 0x0000000000ffffff); int readDepth = (depth & 0x0000000000ffffff);
short rmsMapping = inputBinaryCodec.readUByte(); short rmsMapping = inputBinaryCodec.readUByte();
short lkHom1 = inputBinaryCodec.readUByte(); short lkHom1 = inputBinaryCodec.readUByte();
short lkHom2 = inputBinaryCodec.readUByte(); short lkHom2 = inputBinaryCodec.readUByte();
short lkHet = inputBinaryCodec.readUByte(); short lkHet = inputBinaryCodec.readUByte();
int indelLen1 = (int)inputBinaryCodec.readShort(); int indelLen1 = (int) inputBinaryCodec.readShort();
int indelLen2 = (int)inputBinaryCodec.readShort(); int indelLen2 = (int) inputBinaryCodec.readShort();
short[] indelSeq1 = new short[indelLen1];
short[] indelSeq2 = new short[indelLen2]; int readCnt = Math.abs(indelLen1);
for (int x = 0; x < indelLen1; x++) { short indelSeq1[] = new short[readCnt];
for (int x = 0; x < readCnt; x++) {
indelSeq1[x] = inputBinaryCodec.readUByte(); indelSeq1[x] = inputBinaryCodec.readUByte();
} }
for (int x = 0; x < indelLen2; x++) { readCnt = Math.abs(indelLen2);
indelSeq2[x] = inputBinaryCodec.readUByte(); short indelSeq2[] = new short[readCnt];
for (int x = 0; x < readCnt; x++) {
indelSeq2[x] = inputBinaryCodec.readUByte();
} }
return new VariableLengthCall(refBase,offset,readDepth,rmsMapping,lkHom1,lkHom2,lkHet,indelSeq1,indelSeq2); return new VariableLengthCall(refBase, offset, readDepth, rmsMapping, lkHom1, lkHom2, lkHet, indelLen1, indelSeq1, indelLen2, indelSeq2);
} }
@Override @Override
@ -124,24 +147,69 @@ public class GLFReader implements Iterator<GLFRecord> {
@Override @Override
public GLFRecord next() { public GLFRecord next() {
short firstBase = inputBinaryCodec.readUByte(); short firstBase = inputBinaryCodec.readUByte();
byte recordType = (byte)(firstBase & 0x00f0 >> 4); byte recordType = (byte) ((firstBase & 0x0f0) >> 4);
char refBase = (char)(firstBase & 0x000f); char refBase = (char) (firstBase & 0x000f);
GLFRecord ret = nextRecord; GLFRecord ret = nextRecord;
if (recordType == 1) { if (recordType == 1) {
nextRecord = generateSPC(refBase, inputBinaryCodec); nextRecord = generateSPC(refBase, inputBinaryCodec);
} } else if (recordType == 2) {
else if (recordType == 2) { nextRecord = generateVLC(refBase, inputBinaryCodec);
nextRecord = generateVLC(refBase, inputBinaryCodec); } else if (recordType == 0) {
} if (advanceContig()) {
else if (recordType == 0){ return next();
}
nextRecord = null; nextRecord = null;
} else {
throw new StingException("Unkonwn GLF record type (type = " + recordType + ")");
} }
return ret; return ret;
} }
/**
* advance to the next contig
*
* @return true if we could advance
*/
private boolean advanceContig() {
// try to read the next sequence record
try {
// get the reference name
referenceName = inputBinaryCodec.readLengthAndString(true);
// get the reference length - this may be a problem storing an unsigned int into a signed int. but screw it.
referenceLength = (int) inputBinaryCodec.readUInt();
//System.err.println(referenceName.length());
return true;
} catch (RuntimeException e) {
e.printStackTrace();
// we're out of file space, set the next to null
nextRecord = null;
}
return false;
}
@Override @Override
public void remove() { public void remove() {
throw new StingException("I'm Sorry Dave, I can't let you do that (also GLFReader doesn't support remove())."); throw new StingException("GLFReader doesn't support remove()");
}
/**
* getter methods
*/
public String getReferenceName() {
return referenceName;
}
public int getReferenceLength() {
return referenceLength;
}
public String getHeaderStr() {
return headerStr;
} }
} }

View File

@ -74,9 +74,10 @@ abstract class GLFRecord {
/** /**
* private constructor, used by the enum class to makes each enum value * private constructor, used by the enum class to makes each enum value
*
* @param value the short values specified in the enum listing * @param value the short values specified in the enum listing
*/ */
REF_BASE( short value ) { REF_BASE(short value) {
fieldValue = value; fieldValue = value;
} }
@ -88,7 +89,11 @@ abstract class GLFRecord {
* @return the corresponding REF_BASE * @return the corresponding REF_BASE
* @throws IllegalArgumentException if the value passed can't be converted * @throws IllegalArgumentException if the value passed can't be converted
*/ */
public static REF_BASE toBase( char value ) { public static REF_BASE toBase(char value) {
// for the case where they're passing in the enumated value
if (value <= 0x0F && value >= 0) {
return REF_BASE.values()[value];
}
String str = String.valueOf(value).toUpperCase(); String str = String.valueOf(value).toUpperCase();
for (int x = 0; x < REF_BASE.values().length; x++) { for (int x = 0; x < REF_BASE.values().length; x++) {
if (REF_BASE.values()[x].toString().equals(str)) { if (REF_BASE.values()[x].toString().equals(str)) {
@ -111,7 +116,7 @@ abstract class GLFRecord {
private final short fieldValue; // in kilograms private final short fieldValue; // in kilograms
RECORD_TYPE( short value ) { RECORD_TYPE(short value) {
fieldValue = value; fieldValue = value;
} }
@ -130,7 +135,7 @@ abstract class GLFRecord {
* @param readDepth the read depth at this position * @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
*/ */
public GLFRecord( char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) { public GLFRecord(char base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) {
REF_BASE newBase = REF_BASE.toBase(base); REF_BASE newBase = REF_BASE.toBase(base);
validateInput(newBase, offset, minimumLikelihood, readDepth, rmsMapQ); validateInput(newBase, offset, minimumLikelihood, readDepth, rmsMapQ);
} }
@ -144,7 +149,7 @@ abstract class GLFRecord {
* @param readDepth the read depth at this position * @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
*/ */
GLFRecord( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) { GLFRecord(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) {
validateInput(base, offset, minimumLikelihood, readDepth, rmsMapQ); validateInput(base, offset, minimumLikelihood, readDepth, rmsMapQ);
} }
@ -157,7 +162,7 @@ abstract class GLFRecord {
* @param readDepth the read depth at this position * @param readDepth the read depth at this position
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
*/ */
private void validateInput( REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ ) { private void validateInput(REF_BASE base, long offset, short minimumLikelihood, int readDepth, short rmsMapQ) {
this.refBase = base; this.refBase = base;
if (offset > 4294967295L || offset < 0) { if (offset > 4294967295L || offset < 0) {
throw new IllegalArgumentException("Offset is out of bounds (0 to 0xffffffff) value passed = " + offset); throw new IllegalArgumentException("Offset is out of bounds (0 to 0xffffffff) value passed = " + offset);
@ -185,9 +190,10 @@ abstract class GLFRecord {
* *
* @param out the binary codec to write to * @param out the binary codec to write to
*/ */
void write( BinaryCodec out ) { void write(BinaryCodec out) {
out.writeUByte((short) ( this.getRecordType().getReadTypeValue() << 4 | ( refBase.getBaseHexValue() & 0x0f ) )); short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
out.writeUInt(( (Long) offset ).intValue()); out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
out.writeUInt(((Long) offset).intValue());
out.writeUInt((new Long(readDepth).intValue())); out.writeUInt((new Long(readDepth).intValue()));
out.writeUByte((short) rmsMapQ); out.writeUByte((short) rmsMapQ);
} }
@ -210,23 +216,27 @@ abstract class GLFRecord {
/** /**
* convert a double to a byte, capping it at the maximum value of 255 * convert a double to a byte, capping it at the maximum value of 255
*
* @param d a double value * @param d a double value
* @return a byte, capped at *
* @return a byte, capped at
*/ */
protected static short toCappedShort(double d) { protected static short toCappedShort(double d) {
return (d > 255.0) ? (byte)255 : (byte)Math.round(d); return (d > 255.0) ? (short) 255 : (short) Math.round(d);
} }
/** /**
* find the minimum value in a set of doubles * find the minimum value in a set of doubles
*
* @param vals * @param vals
*
* @return * @return
*/ */
protected static double findMin(double vals[]) { protected static double findMin(double vals[]) {
if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in"); if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in");
double min = vals[0]; double min = vals[0];
for (double d: vals) { for (double d : vals) {
if (d < min) min = d; if (d < min) min = d;
} }
return min; return min;

View File

@ -2,13 +2,12 @@ package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream; import net.sf.samtools.util.BlockCompressedOutputStream;
import java.io.File;
import java.io.DataOutputStream;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood; import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import java.io.DataOutputStream;
import java.io.File;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
* *
@ -51,19 +50,17 @@ public class GLFWriter implements GenotypeWriter {
// our header text, reference sequence name (i.e. chr1), and it's length // our header text, reference sequence name (i.e. chr1), and it's length
private String headerText = ""; private String headerText = "";
private String referenceSequenceName = ""; private String referenceSequenceName = null;
private long referenceSequenceLength = 0; private long referenceSequenceLength = 0;
/** /**
* The public constructor for creating a GLF object * The public constructor for creating a GLF object
* *
* @param headerText the header text (currently unclear what the contents are) * @param headerText the header text (currently unclear what the contents are)
* @param referenceSequenceName the reference sequence name, i.e. "chr1", "chr2", etc * @param writeTo the location to write to
*/ */
public GLFWriter( String headerText, String referenceSequenceName, int referenceSequenceLength, File writeTo ) { public GLFWriter(String headerText, File writeTo) {
this.headerText = headerText; this.headerText = headerText;
this.referenceSequenceName = referenceSequenceName;
this.referenceSequenceLength = referenceSequenceLength;
outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo))); outputBinaryCodec = new BinaryCodec(new DataOutputStream(new BlockCompressedOutputStream(writeTo)));
outputBinaryCodec.setOutputFileName(writeTo.toString()); outputBinaryCodec.setOutputFileName(writeTo.toString());
this.writeHeader(); this.writeHeader();
@ -72,21 +69,28 @@ public class GLFWriter implements GenotypeWriter {
/** /**
* add a point genotype to the GLF writer * add a point genotype to the GLF writer
* *
* @param refBase the reference base, as a char * @param contigName the name of the contig you're calling in
* @param genomicLoc the location, as an offset from the previous glf record * @param contigLength the contig length
* @param readDepth the read depth at the specified postion * @param refBase the reference base, as a char
* @param rmsMapQ the root mean square of the mapping quality * @param genomicLoc the location, as an offset from the previous glf record
* @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods * @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
*/ */
@Override @Override
public void addGenotypeCall( int genomicLoc, public void addGenotypeCall(String contigName,
float rmsMapQ, int contigLength,
char refBase, int genomicLoc,
int readDepth, float rmsMapQ,
LikelihoodObject lhValues ) { char refBase,
int readDepth,
LikelihoodObject lhValues) {
// check if we've jumped to a new contig
checkSequence(contigName, contigLength);
SinglePointCall call = new SinglePointCall(refBase, genomicLoc, SinglePointCall call = new SinglePointCall(refBase,
genomicLoc,
readDepth, readDepth,
(short) rmsMapQ, (short) rmsMapQ,
lhValues.toDoubleArray()); lhValues.toDoubleArray());
@ -96,6 +100,8 @@ public class GLFWriter implements GenotypeWriter {
/** /**
* add a variable length (indel, deletion, etc) to the genotype writer * add a variable length (indel, deletion, etc) to the genotype writer
* *
* @param contigName the name of the contig you're calling in
* @param contigLength the contig length
* @param refBase the reference base * @param refBase the reference base
* @param genomicLoc the location, as an offset from the previous glf record * @param genomicLoc the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion * @param readDepth the read depth at the specified postion
@ -105,23 +111,18 @@ public class GLFWriter implements GenotypeWriter {
* @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255 * @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255
*/ */
@Override @Override
public void addVariableLengthCall( int genomicLoc, public void addVariableLengthCall(String contigName,
float rmsMapQ, int contigLength,
int readDepth, int genomicLoc,
char refBase, float rmsMapQ,
IndelLikelihood firstHomZyg, int readDepth,
IndelLikelihood secondHomZyg, char refBase,
byte hetLikelihood ) { IndelLikelihood firstHomZyg,
IndelLikelihood secondHomZyg,
byte hetLikelihood) {
// in this context, the minumum likelihood is lowest of the three options // check if we've jumped to a new contig
double lowestLikelihood = Double.MAX_VALUE; checkSequence(contigName, contigLength);
if (firstHomZyg.getLikelihood() < lowestLikelihood) {
lowestLikelihood = firstHomZyg.getLikelihood();
} else if (secondHomZyg.getLikelihood() < lowestLikelihood) {
lowestLikelihood = secondHomZyg.getLikelihood();
} else if (hetLikelihood < lowestLikelihood) {
lowestLikelihood = hetLikelihood;
}
// normalize the two // normalize the two
VariableLengthCall call = new VariableLengthCall(refBase, VariableLengthCall call = new VariableLengthCall(refBase,
@ -131,11 +132,12 @@ public class GLFWriter implements GenotypeWriter {
firstHomZyg.getLikelihood(), firstHomZyg.getLikelihood(),
secondHomZyg.getLikelihood(), secondHomZyg.getLikelihood(),
hetLikelihood, hetLikelihood,
firstHomZyg.getLengthOfIndel(),
firstHomZyg.getIndelSequence(), firstHomZyg.getIndelSequence(),
secondHomZyg.getLengthOfIndel(),
secondHomZyg.getIndelSequence()); secondHomZyg.getIndelSequence());
call.write(this.outputBinaryCodec); call.write(this.outputBinaryCodec);
} }
/** /**
@ -145,7 +147,7 @@ public class GLFWriter implements GenotypeWriter {
* @param readDepth the read depth * @param readDepth the read depth
*/ */
@Override @Override
public void addNoCall( int position, int readDepth ) { public void addNoCall(int position, int readDepth) {
// glf doesn't support this operation // glf doesn't support this operation
throw new UnsupportedOperationException("GLF doesn't support a 'no call' call."); throw new UnsupportedOperationException("GLF doesn't support a 'no call' call.");
} }
@ -155,7 +157,8 @@ public class GLFWriter implements GenotypeWriter {
* *
* @param rec the GLF record to write. * @param rec the GLF record to write.
*/ */
public void addGLFRecord( GLFRecord rec ) { public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
checkSequence(contigName,contigLength);
rec.write(this.outputBinaryCodec); rec.write(this.outputBinaryCodec);
} }
@ -167,24 +170,52 @@ public class GLFWriter implements GenotypeWriter {
*/ */
private void writeHeader() { private void writeHeader() {
for (int x = 0; x < glfMagic.length; x++) { for (int x = 0; x < glfMagic.length; x++) {
outputBinaryCodec.writeByte(glfMagic[x]); outputBinaryCodec.writeUByte(glfMagic[x]);
} }
if (!( headerText.equals("") )) { if (!(headerText.equals(""))) {
outputBinaryCodec.writeString(headerText, true, true); outputBinaryCodec.writeString(headerText, true, true);
} else { } else {
outputBinaryCodec.writeInt(0); outputBinaryCodec.writeInt(0);
} }
}
/**
* check to see if we've jumped to a new contig
*
* @param sequenceName
* @param seqLength
*/
private void checkSequence(String sequenceName, int seqLength) {
if ((referenceSequenceName == null) || (!referenceSequenceName.equals(sequenceName))) {
if (this.referenceSequenceName != null) { // don't write the record the first time
this.writeEndRecord();
}
referenceSequenceName = sequenceName;
referenceSequenceLength = seqLength;
addSequence();
}
}
/** add a sequence definition to the glf */
private void addSequence() {
outputBinaryCodec.writeString(referenceSequenceName, true, true); outputBinaryCodec.writeString(referenceSequenceName, true, true);
outputBinaryCodec.writeUInt(referenceSequenceLength); outputBinaryCodec.writeUInt(referenceSequenceLength);
} }
/** write end record */
private void writeEndRecord() {
outputBinaryCodec.writeUByte((short) 0);
}
/** /**
* close the file. You must close the file to ensure any remaining data gets * close the file. You must close the file to ensure any remaining data gets
* written out. * written out.
*/ */
@Override @Override
public void close() { public void close() {
outputBinaryCodec.writeByte((byte) 0); writeEndRecord();
outputBinaryCodec.close(); outputBinaryCodec.close();
} }
@ -195,21 +226,31 @@ public class GLFWriter implements GenotypeWriter {
* *
* @return a byte array containing the normalized values * @return a byte array containing the normalized values
*/ */
private byte[] normalizeToByte( double[] values ) { private byte[] normalizeToByte(double[] values) {
byte ret[] = new byte[values.length]; byte ret[] = new byte[values.length];
double min = Double.MAX_VALUE; double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE; double max = Double.MIN_VALUE;
for (double d : values) { for (double d : values) {
min = ( d < min ) ? d : min; min = (d < min) ? d : min;
max = ( d > max ) ? d : max; max = (d > max) ? d : max;
} }
double scale = max / 255.0; double scale = max / 255.0;
for (int x = 0; x < values.length; x++) { for (int x = 0; x < values.length; x++) {
ret[x] = (byte) ( ( values[x] - min ) / scale ); ret[x] = (byte) ((values[x] - min) / scale);
} }
return ret; return ret;
} }
/**
* get the reference sequence
*
* @return
*/
public String getReferenceSequenceName() {
return referenceSequenceName;
}
} }

View File

@ -43,15 +43,14 @@ class SinglePointCall extends GLFRecord {
/** /**
* create a single * create a single
* *
* @param refBase the reference base, as a char * @param refBase the reference base, as a char
* @param offset the location, as an offset from the previous glf record * @param offset the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion * @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
* @param likelihoods the Likelihoods * @param likelihoods the Likelihoods
*/ */
SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[] ) { SinglePointCall(char refBase, int offset, int readDepth, short rmsMapQ, double likelihoods[]) {
super(refBase, offset, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ); super(refBase, offset, (short) GLFRecord.findMin(likelihoods), readDepth, rmsMapQ);
this.likelihoods = likelihoods; this.likelihoods = likelihoods;
} }
@ -61,11 +60,15 @@ class SinglePointCall extends GLFRecord {
* *
* @param out the codec to write to * @param out the codec to write to
*/ */
void write( BinaryCodec out ) { void write(BinaryCodec out) {
super.write(out); super.write(out);
try {
for (double likelihood : likelihoods) { for (double likelihood : likelihoods) {
out.writeUByte(GLFRecord.toCappedShort(likelihood)); out.writeUByte(GLFRecord.toCappedShort(likelihood));
} }
} catch (Exception e) {
e.printStackTrace();
}
} }
/** /**

View File

@ -53,31 +53,33 @@ class VariableLengthCall extends GLFRecord {
/** /**
* the default constructor * the default constructor
* *
* @param refBase the reference base * @param refBase the reference base
* @param offset the location, as an offset from the previous glf record * @param offset the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion * @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality * @param rmsMapQ the root mean square of the mapping quality
* @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255 * @param lkHom1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
* @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255 * @param lkHom2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
* @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255 * @param lkHet the negitive log likelihood of the heterozygote, from 0 to 255
* @param indelSeq1 the sequence for the first indel allele * @param indelSeq1 the sequence for the first indel allele
* @param indelSeq2 the sequence for the second indel allele * @param indelSeq2 the sequence for the second indel allele
*/ */
VariableLengthCall( char refBase, VariableLengthCall(char refBase,
long offset, long offset,
int readDepth, int readDepth,
short rmsMapQ, short rmsMapQ,
double lkHom1, double lkHom1,
double lkHom2, double lkHom2,
double lkHet, double lkHet,
final short indelSeq1[], int indelOneLength,
final short indelSeq2[] ) { final short indelSeq1[],
super(refBase, offset, GLFRecord.toCappedShort(findMin(new double []{lkHom1,lkHom2,lkHet})), readDepth, rmsMapQ); int indelTwoLength,
final short indelSeq2[]) {
super(refBase, offset, GLFRecord.toCappedShort(findMin(new double[]{lkHom1, lkHom2, lkHet})), readDepth, rmsMapQ);
this.lkHom1 = GLFRecord.toCappedShort(lkHom1); this.lkHom1 = GLFRecord.toCappedShort(lkHom1);
this.lkHom2 = GLFRecord.toCappedShort(lkHom2); this.lkHom2 = GLFRecord.toCappedShort(lkHom2);
this.lkHet = GLFRecord.toCappedShort(lkHet); this.lkHet = GLFRecord.toCappedShort(lkHet);
this.indelLen1 = indelSeq1.length; this.indelLen1 = indelOneLength;
this.indelLen2 = indelSeq2.length; this.indelLen2 = indelTwoLength;
this.indelSeq1 = indelSeq1; this.indelSeq1 = indelSeq1;
this.indelSeq2 = indelSeq2; this.indelSeq2 = indelSeq2;
size = 16 + indelSeq1.length + indelSeq2.length; size = 16 + indelSeq1.length + indelSeq2.length;
@ -89,7 +91,7 @@ class VariableLengthCall extends GLFRecord {
* *
* @param out the binary codec to write to * @param out the binary codec to write to
*/ */
void write( BinaryCodec out ) { void write(BinaryCodec out) {
super.write(out); super.write(out);
out.writeByte(lkHom1); out.writeByte(lkHom1);
out.writeByte(lkHom2); out.writeByte(lkHom2);
@ -109,7 +111,7 @@ class VariableLengthCall extends GLFRecord {
return RECORD_TYPE.VARIABLE; return RECORD_TYPE.VARIABLE;
} }
/** @return the size of the record, which is the size of our fields plus the generic records fields */ /** @return the size of the record, which is the size of our fields plus the generic records fields */
public int getByteSize() { public int getByteSize() {
return size + super.getByteSize(); return size + super.getByteSize();
} }

View File

@ -1,14 +1,12 @@
package org.broadinstitute.sting.utils.genotype; package org.broadinstitute.sting.utils.genotype;
import org.junit.Test; import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.junit.Test;
import java.io.File; import java.io.File;
import net.sf.samtools.SAMFileHeader;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -58,7 +56,7 @@ public class GeliAdapterTest extends BaseTest {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10);
adapter = new GeliAdapter(fl,header); adapter = new GeliAdapter(fl,header);
LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods()); LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods());
adapter.addGenotypeCall(100,100,'A',100,obj); adapter.addGenotypeCall("chr1",10,100,100,'A',100,obj);
adapter.close(); adapter.close();
} }

View File

@ -1,11 +1,10 @@
package org.broadinstitute.sting.utils.genotype.glf; package org.broadinstitute.sting.utils.genotype.glf;
import org.junit.Test;
import org.junit.Before;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.junit.Before;
import org.junit.Test;
import java.io.File; import java.io.File;
@ -88,7 +87,7 @@ public class GLFWriterTest extends BaseTest {
@Test @Test
public void basicWrite() { public void basicWrite() {
rec = new GLFWriter(header, referenceSequenceName, refLength, writeTo); rec = new GLFWriter(header, writeTo);
for (int x = 0; x < 100; x++) { for (int x = 0; x < 100; x++) {
addFakeSNP((int) Math.round(Math.random() * 9), 1); addFakeSNP((int) Math.round(Math.random() * 9), 1);
} }