diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java
index 8026b1041..771fe5117 100755
--- a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java
@@ -5,6 +5,31 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.IOException;
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
public class rodVariants extends BasicReferenceOrderedDatum {
private enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java
new file mode 100644
index 000000000..282498f2e
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java
@@ -0,0 +1,105 @@
+package org.broadinstitute.sting.utils.genotype;
+
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
+import net.sf.samtools.SAMFileHeader;
+
+import java.io.File;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ * @version 1.0
+ *
+ * Class GeliAdapter
+ * Adapts the Geli file writer to the Genotype writer interface
+ */
+public class GeliAdapter implements GenotypeWriter {
+
+
+ // the geli file writer we're adapting
+ private final GeliFileWriter writer;
+
+ /**
+ * wrap a GeliFileWriter in the Genotype writer interface
+ * @param writeTo where to write to
+ * @param fileHeader the file header to write out
+ */
+ public GeliAdapter( File writeTo, final SAMFileHeader fileHeader ) {
+ this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo,fileHeader);
+ }
+
+
+ /**
+ * add a single point genotype call to the
+ *
+ * @param position the position on the contig
+ * @param referenceBase the reference base
+ * @param readDepth the read depth at the specified position
+ * @param likelihoods the likelihoods of each of the possible alleles
+ */
+ @Override
+ public void addGenotypeCall( int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods ) {
+ writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte)referenceBase));
+ }
+
+ /**
+ * add a variable length call to the genotyper
+ *
+ * @param position the position on the genome
+ * @param rmsMapQuals the root mean square of the mapping qualities
+ * @param readDepth the read depth
+ * @param refBase the reference base
+ * @param firstHomZyg the first homozygous indel
+ * @param secondHomZyg the second homozygous indel (if present, null if not)
+ * @param hetLikelihood the heterozygous likelihood
+ */
+ @Override
+ public void addVariableLengthCall( int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood ) {
+ throw new UnsupportedOperationException("Geli format does not support variable length allele calls");
+ }
+
+ /**
+ * add a no call to the genotype file, if supported.
+ *
+ * @param position
+ * @param readDepth
+ */
+ @Override
+ public void addNoCall( int position, int readDepth ) {
+ throw new UnsupportedOperationException("Geli format does not support no-calls");
+ }
+
+ /** finish writing, closing any open files. */
+ @Override
+ public void close() {
+ if (this.writer != null) {
+ this.writer.close();
+ }
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java
new file mode 100644
index 000000000..64b0af488
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java
@@ -0,0 +1,84 @@
+package org.broadinstitute.sting.utils.genotype;
+
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ *
+ * @author aaron
+ *
+ * Class Genotype
+ *
+ * The interface for storing genotype calls.
+ */
+public interface GenotypeWriter {
+ /**
+ * add a single point genotype call to the
+ * @param position the position on the contig
+ * @param referenceBase the reference base
+ * @param readDepth the read depth at the specified position
+ * @param likelihoods the likelihoods of each of the possible alleles
+ */
+ public void addGenotypeCall(int position,
+ float rmsMapQuals,
+ char referenceBase,
+ int readDepth,
+ LikelihoodObject likelihoods);
+
+ /**
+ * add a variable length call to the genotyper
+ * @param position the position on the genome
+ * @param rmsMapQuals the root mean square of the mapping qualities
+ * @param readDepth the read depth
+ * @param refBase the reference base
+ * @param firstHomZyg the first homozygous indel
+ * @param secondHomZyg the second homozygous indel (if present, null if not)
+ * @param hetLikelihood the heterozygous likelihood
+ */
+ public void addVariableLengthCall(int position,
+ float rmsMapQuals,
+ int readDepth,
+ char refBase,
+ IndelLikelihood firstHomZyg,
+ IndelLikelihood secondHomZyg,
+ byte hetLikelihood);
+
+ /**
+ * add a no call to the genotype file, if supported.
+ * @param position
+ * @param readDepth
+ */
+ public void addNoCall(int position,
+ int readDepth);
+
+ /**
+ * finish writing, closing any open files.
+ */
+ public void close();
+
+}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/IndelLikelihood.java b/java/src/org/broadinstitute/sting/utils/genotype/IndelLikelihood.java
new file mode 100644
index 000000000..1800f023c
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/genotype/IndelLikelihood.java
@@ -0,0 +1,73 @@
+package org.broadinstitute.sting.utils.genotype;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * Class IndelLikelihood
+ *
+ * The representation of an indel allele.
+ */
+public class IndelLikelihood {
+
+ protected double loglikelihood;
+ protected short lengthOfIndel;
+ protected short[] indelSequence;
+
+ /**
+ * Create a likelihood object for an indel call
+ *
+ * @param likelihood the likelihood (represented as a negitive log likelihood,
+ * with a ceiling of 255.
+ * @param indelSequence the indel sequence, not null terminated
+ */
+ public IndelLikelihood( byte likelihood, String indelSequence ) {
+ this.loglikelihood = likelihood;
+ this.lengthOfIndel = (short)indelSequence.length();
+ this.indelSequence = new short[indelSequence.length()];
+ for (int tmp = 0; tmp < indelSequence.length(); tmp++) {
+ this.indelSequence[tmp] = (short)indelSequence.charAt(tmp);
+ }
+ }
+
+ /**
+ * getter methods
+ */
+
+ public double getLikelihood() {
+ return loglikelihood;
+ }
+
+ public short getLengthOfIndel() {
+ return lengthOfIndel;
+ }
+
+ public short[] getIndelSequence() {
+ return indelSequence;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java
new file mode 100755
index 000000000..d41688cc2
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java
@@ -0,0 +1,237 @@
+package org.broadinstitute.sting.utils.genotype;
+
+import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
+import edu.mit.broad.picard.genotype.DiploidGenotype;
+
+import java.util.HashMap;
+
+import net.sf.samtools.SAMFileHeader;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *
+ * Class LikelyhoodObject
+ *
+ * An object used to store likelyhood information for genotypes. Genotype
+ * likelihoods are assumed to be infinite (negitive log likelihood), unless set.
+ * This allows the consumer to make an empty LikelihoodObject, and just set
+ * those values which have associated likelihood values.
+ *
+ */
+public class LikelihoodObject {
+
+ // our possible genotypes, in order according to GLFv3
+ public enum GENOTYPE {
+ AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
+ }
+
+ // possible types of likihoods to store
+ public enum LIKELIHOOD_TYPE {
+ NEGITIVE_LOG, LOG, RAW;
+ }
+
+ // our liklihood storage type
+ protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGITIVE_LOG;
+
+ // default the bestGenotype likelihood to the allele AA
+ protected GENOTYPE bestGenotype = GENOTYPE.AA;
+
+ // how many genotypes we're storing
+ public static final int genoTypeCount = GENOTYPE.values().length;
+
+ // the associated negitive log likelihoods for each genotype
+ protected final HashMap likelihood = new HashMap();
+
+ /** create a blank likelihood object */
+ public LikelihoodObject() {
+ for (GENOTYPE type : GENOTYPE.values()) {
+ likelihood.put(type, Double.MAX_VALUE);
+ }
+ }
+
+ /**
+ * create a likelihood object, given a picard style GenotypeLikelihoods object. The
+ * GenotypeLikelihoods stores likelihoods in log likelihood format, and we want them in
+ * negitive log likelihood
+ *
+ * @param lk the likelihood object
+ */
+ public LikelihoodObject( GenotypeLikelihoods lk ) {
+ Double minValue = Double.MAX_VALUE;
+ for (GENOTYPE type : GENOTYPE.values()) {
+ byte[] bases = new byte[2];
+ bases[0] = (byte) type.toString().charAt(0);
+ bases[1] = (byte) type.toString().charAt(1);
+ double val = -1.0d * lk.getLikelihood(DiploidGenotype.fromBases(bases));
+ likelihood.put(type, val);
+ if (val < minValue) { bestGenotype = type; }
+ }
+ }
+
+ /**
+ * create a likelyhood object, given an array of genotype scores in GLFv3 ordering
+ *
+ * @param values an array of int's from 0 to 255, representing the negitive log likelihoods.
+ */
+ public LikelihoodObject( double[] values ) {
+ if (values.length != GENOTYPE.values().length) {
+ throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
+ }
+ int index = 0;
+ double lowestScore = Double.MAX_VALUE;
+ for (GENOTYPE type : GENOTYPE.values()) {
+ likelihood.put(type, values[index]);
+ if (values[index] < lowestScore) {
+ lowestScore = values[index];
+ bestGenotype = type;
+ }
+ ++index;
+ }
+ }
+
+ /**
+ * set the likelihood, given it's probability and the genotype
+ *
+ * @param type the genotype
+ * @param lh the likelihood as a double between 0 and 1, which is converted to a byte
+ */
+ public void setLikelihood( GENOTYPE type, double lh ) {
+ likelihood.put(type, lh);
+ if (lh < likelihood.get(this.bestGenotype)) {
+ this.bestGenotype = type;
+ }
+ }
+
+ /**
+ * find the minimum likelihood value stored in the set. This represents the most likely genotype,
+ * since genotypes are represented as negitive log likeihoods
+ *
+ * @return the min value
+ */
+ public double getBestLikelihood() {
+ return likelihood.get(this.bestGenotype);
+ }
+
+ /**
+ * return a byte array representation of the likelihood object, in GLFv3 specified order.
+ * The return type is short[] instead of byte[], since signed bytes only store -127 to 127,
+ * not the 255 range we need.
+ *
+ * @return a byte array of the genotype values
+ */
+ public short[] toByteArray() {
+ short ret[] = new short[GENOTYPE.values().length];
+ int index = 0;
+ for (GENOTYPE type : GENOTYPE.values()) {
+ ret[index] = ( likelihood.get(type).intValue() > 254 ) ? 255 : (short) likelihood.get(type).intValue();
+ ++index;
+ }
+ return ret;
+ }
+
+ /**
+ * create a float array of our genotype values, in order specified in the GENOTYPE enum (currently the GLF and
+ * geli ordering).
+ *
+ * @return a float array containing our genotype likelihoods, as negitive log likelihoods
+ */
+ public float[] toFloatArray() {
+ // make an array of floats
+ float[] ft = new float[10];
+ int index = 0;
+ for (GENOTYPE T : GENOTYPE.values()) {
+ ft[index] = this.likelihood.get(T).floatValue();
+ }
+ return ft;
+ }
+
+ /**
+ * convert this object, with aditional information, to a GenotypeLikelihoods object. This involves determining
+ * what our underlying storage type is, and coverting our values to the appropriate (log likelihood) format.
+ *
+ * @return a GenotypeLikelihoods object representing our data
+ */
+ public GenotypeLikelihoods convert( SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase ) {
+ float[] ft = toFloatArray();
+
+ if (this.mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
+ for (float f : ft) {
+ f = f * -1.0f;
+ }
+ } else if (this.mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
+ for (float f : ft) {
+ f = (float) Math.log(f);
+ }
+ }
+ return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, ft);
+ }
+
+ /**
+ * getter for the likelihood type
+ * @return our likelihood storage type
+ */
+ public LIKELIHOOD_TYPE getLikelihoodType() {
+ return mLikelihoodType;
+ }
+
+ /**
+ * set our likelihood storage type, and adjust our current likelihood values to reflect
+ * the new setting.
+ * @param likelihood the type to set the values to.
+ */
+ public void setLikelihoodType( LIKELIHOOD_TYPE likelihood ) {
+ if (likelihood == mLikelihoodType)
+ return;
+ if (mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
+ double mult = 1.0;
+ if (likelihood == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
+ mult = -1.0;
+ }
+ for (Double d : this.likelihood.values()) {
+ d = mult * Math.log(d);
+ }
+ }
+ else if (likelihood == LIKELIHOOD_TYPE.RAW) {
+ double mult = 1.0;
+ if (mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
+ mult = -1.0;
+ }
+ for (Double d : this.likelihood.values()) {
+ d = Math.pow(d*mult,10);
+ }
+ }
+ else {
+ // one of us in log, the other negitive log, it doesn't matter which
+ for (Double d : this.likelihood.values()) {
+ d = -1.0 * Math.log(d);
+ }
+ }
+ this.mLikelihoodType = likelihood;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
index 14a574337..026140e43 100755
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java
@@ -206,5 +206,15 @@ abstract class GLFRecord {
public int getByteSize() {
return 10; // the record type field (1), offset (4), the min depth field (4), and the rms mapping (1)
}
+
+ /**
+ * convert a double to a byte, capping it at the maximum value of 255
+ * @param d a double value
+ * @return a byte, capped at
+ */
+ protected static short toCappedShort(double d) {
+ return (d > 255.0) ? (byte)255 : (byte)Math.round(d);
+ }
+
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
index f54b5bf1f..40b1b5194 100755
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
@@ -6,33 +6,43 @@ import net.sf.samtools.util.BlockCompressedOutputStream;
import java.io.File;
import java.io.DataOutputStream;
-/**
+import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
+import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+/*
+ * Copyright (c) 2009 The Broad Institute
*
- * User: aaron
- * Date: May 13, 2009
- * Time: 3:36:18 PM
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
*
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
*
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*/
-
/**
* @author aaron
* @version 1.0
- *
- * This class writes GLF files. You can either specify GLFRecords, or programaticly generate
- * single and variable length genotype calls using the provided functions. When you've finished
- * generating GLF records, make sure you close the file.
- *
+ *
+ * This class writes GLF files. You can either specify GLFRecords, or programaticly generate
+ * single and variable length genotype calls using the provided functions. When you've finished
+ * generating GLF records, make sure you close the file.
*/
-public class GLFWriter {
+public class GLFWriter implements GenotypeWriter {
// our output codec
private final BinaryCodec outputBinaryCodec;
@@ -62,21 +72,23 @@ public class GLFWriter {
/**
* add a point genotype to the GLF writer
*
- * @param refBase the reference base, as a char
+ * @param refBase the reference base, as a char
* @param genomicLoc the location, as an offset from the previous glf record
- * @param readDepth the read depth at the specified postion
- * @param rmsMapQ the root mean square of the mapping quality
- * @param lhValues the LikelihoodObject, representing the genotype likelyhoods
+ * @param readDepth the read depth at the specified postion
+ * @param rmsMapQ the root mean square of the mapping quality
+ * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
*/
- public void addPointCall( char refBase,
- int genomicLoc,
- int readDepth,
- short rmsMapQ,
- LikelihoodObject lhValues ) {
+ @Override
+ public void addGenotypeCall( int genomicLoc,
+ float rmsMapQ,
+ char refBase,
+ int readDepth,
+ LikelihoodObject lhValues ) {
+
SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
readDepth,
- rmsMapQ,
+ (short) rmsMapQ,
lhValues);
call.write(this.outputBinaryCodec);
}
@@ -84,60 +96,66 @@ public class GLFWriter {
/**
* add a variable length (indel, deletion, etc) to the genotype writer
*
- * @param refBase the reference base
- * @param genomicLoc the location, as an offset from the previous glf record
- * @param readDepth the read depth at the specified postion
- * @param rmsMapQ the root mean square of the mapping quality
- * @param minimumLikelihood the minimum likelihood value
- * @param homozygProb1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
- * @param homozygProb2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
- * @param heterozygProb the negitive log likelihood of the heterozygote, from 0 to 255
- * @param indelLength1 the length of the first indel allele
- * @param indelLength2 the length of the second indel allele
- * @param indelSeq1 the sequence for the first indel allele
- * @param indelSeq2 the sequence for the second indel allele
+ * @param refBase the reference base
+ * @param genomicLoc the location, as an offset from the previous glf record
+ * @param readDepth the read depth at the specified postion
+ * @param rmsMapQ the root mean square of the mapping quality
+ * @param firstHomZyg the first homozygous call
+ * @param secondHomZyg the second homozygous call
+ * @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255
*/
- public void addVarLengthCall( char refBase,
- long genomicLoc,
- int readDepth,
- short rmsMapQ,
- short minimumLikelihood,
- short homozygProb1,
- short homozygProb2,
- short heterozygProb,
- int indelLength1,
- int indelLength2,
- char[] indelSeq1,
- char[] indelSeq2 ) {
+ @Override
+ public void addVariableLengthCall( int genomicLoc,
+ float rmsMapQ,
+ int readDepth,
+ char refBase,
+ IndelLikelihood firstHomZyg,
+ IndelLikelihood secondHomZyg,
+ byte hetLikelihood ) {
- short[] indexSeqEq1 = new short[indelSeq1.length];
- short[] indexSeqEq2 = new short[indelSeq2.length];
- for (int x = 0; x < indelSeq1.length; x++) {
- indexSeqEq1[x] = new Integer(0x00ff & indelSeq1[x]).shortValue();
- }
- for (int x = 0; x < indelSeq2.length; x++) {
- indexSeqEq2[x] = new Integer(0x00ff & indelSeq2[x]).shortValue();
+ // in this context, the minumum likelihood is lowest of the three options
+ double lowestLikelihood = Double.MAX_VALUE;
+ if (firstHomZyg.getLikelihood() < lowestLikelihood) {
+ lowestLikelihood = firstHomZyg.getLikelihood();
+ } else if (secondHomZyg.getLikelihood() < lowestLikelihood) {
+ lowestLikelihood = secondHomZyg.getLikelihood();
+ } else if (hetLikelihood < lowestLikelihood) {
+ lowestLikelihood = hetLikelihood;
}
+ // normalize the two
VariableLengthCall call = new VariableLengthCall(refBase,
genomicLoc,
readDepth,
- minimumLikelihood,
- rmsMapQ,
- homozygProb1,
- homozygProb2,
- heterozygProb,
- indelLength1,
- indelLength2,
- indexSeqEq1,
- indexSeqEq2);
+ lowestLikelihood,
+ (short) rmsMapQ,
+ firstHomZyg.getLikelihood(),
+ secondHomZyg.getLikelihood(),
+ hetLikelihood,
+ firstHomZyg.getLengthOfIndel(),
+ secondHomZyg.getLengthOfIndel(),
+ firstHomZyg.getIndelSequence(),
+ secondHomZyg.getIndelSequence());
call.write(this.outputBinaryCodec);
}
+ /**
+ * add a no call to the genotype file, if supported.
+ *
+ * @param position the position
+ * @param readDepth the read depth
+ */
+ @Override
+ public void addNoCall( int position, int readDepth ) {
+ // glf doesn't support this operation
+ throw new UnsupportedOperationException("GLF doesn't support a 'no call' call.");
+ }
+
/**
* add a GLF record to the output file
+ *
* @param rec the GLF record to write.
*/
public void addGLFRecord( GLFRecord rec ) {
@@ -149,13 +167,12 @@ public class GLFWriter {
* the magic number, the length of the header text, the text itself, the reference
* sequence (null terminated) preceeded by it's length, and the the genomic
* length of the reference sequence.
- *
*/
private void writeHeader() {
for (int x = 0; x < glfMagic.length; x++) {
outputBinaryCodec.writeByte(glfMagic[x]);
}
- if (!(headerText.equals(""))) {
+ if (!( headerText.equals("") )) {
outputBinaryCodec.writeString(headerText, true, true);
} else {
outputBinaryCodec.writeInt(0);
@@ -167,13 +184,35 @@ public class GLFWriter {
/**
* close the file. You must close the file to ensure any remaining data gets
* written out.
- *
*/
+ @Override
public void close() {
outputBinaryCodec.writeByte((byte) 0);
outputBinaryCodec.close();
}
+ /**
+ * normalize the values to the range of a byte (0 - 255)
+ *
+ * @param values the floating point values to normalize
+ *
+ * @return a byte array containing the normalized values
+ */
+ private byte[] normalizeToByte( double[] values ) {
+ byte ret[] = new byte[values.length];
+ double min = Double.MAX_VALUE;
+ double max = Double.MIN_VALUE;
+ for (double d : values) {
+ min = ( d < min ) ? d : min;
+ max = ( d > max ) ? d : max;
+ }
+ double scale = max / 255.0;
+ for (int x = 0; x < values.length; x++) {
+ ret[x] = (byte) ( ( values[x] - min ) / scale );
+ }
+ return ret;
+ }
+
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObject.java
deleted file mode 100755
index 7d52cf871..000000000
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObject.java
+++ /dev/null
@@ -1,130 +0,0 @@
-package org.broadinstitute.sting.utils.genotype.glf;
-
-import java.util.HashMap;
-
-
-/*
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * @author aaron
- *
- *
- * Class LikelyhoodObject
- *
- * An object used to store likelyhood information for genotypes. Genotype
- * likelihoods are assumed to be zero, unless set. This allows the consumer
- * to make an empty LikelihoodObject, and just set those values which have
- * associated likelihood values.
- */
-public class LikelihoodObject {
-
- // our possible genotypes, in order according to GLFv3
- public enum GENOTYPE {
- AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
- }
-
- // how many genotypes we're storing
- public static final int genoTypeCount = GENOTYPE.values().length;
-
- // the associated negitive log likelihoods for each genotype
- final protected HashMap likelihood = new HashMap();
-
- /** create a blank likelihood object */
- public LikelihoodObject() {
- for (GENOTYPE type : GENOTYPE.values()) {
- likelihood.put(type, 255);
- }
- }
-
- // since the default case it to start with all infinity, we can choose any random base
- protected GENOTYPE greatest = GENOTYPE.AA;
-
-
- /**
- * create a likelyhood object, given an array of genotype scores in GLFv3 ordering
- * @param values an array of int's from 0 to 255, representing the negitive log likelihoods.
- */
- public LikelihoodObject(int[] values) {
- if (values.length != GENOTYPE.values().length) {
- throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
- }
- int index = 0;
- int lowestScore = 256;
- for (GENOTYPE type : GENOTYPE.values()) {
- if (values[index] < 0 || values[index] > 255) {
- throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]);
- }
- likelihood.put(type, values[index]);
- if (values[index] < lowestScore) {
- lowestScore = values[index];
- greatest = type;
- }
- ++index;
- }
- }
-
- /**
- * set the likelihood, given it's probability and the genotype
- * @param type the genotype
- * @param lh the likelihood as a double between 0 and 1, which is converted to a byte
- */
- public void setLikelihood(GENOTYPE type, int lh) {
- if (lh < 0 || lh > 255) {
- throw new IllegalArgumentException("supplied likelihood must be between 0 and 255");
- }
- likelihood.put(type,lh);
- if (lh < likelihood.get(this.greatest)) {
- this.greatest = type;
- }
- }
-
- /**
- * find the minimum likelihood value stored in the set. This represents the most likely genotype,
- * since genotypes are represented as negitive log likeihoods
- * @return
- */
- public int getMinimumValue() {
- return likelihood.get(this.greatest);
- }
-
- /**
- * return a byte array representation of the likelihood object, in GLFv3 specified order.
- * The return type is short[] instead of byte[], since signed bytes only store -127 to 127,
- * not the 255 range we need.
- * @return a byte array of the genotype values
- */
- public short[] toByteArray() {
- short ret[] = new short[GENOTYPE.values().length];
- int index = 0;
- for (GENOTYPE type : GENOTYPE.values()) {
- ret[index] = (short)likelihood.get(type).intValue();
- ++index;
- }
- return ret;
- }
-
-
-}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java
index bf941153d..0ce49c176 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java
@@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.util.BinaryCodec;
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
/*
@@ -53,7 +54,7 @@ class SinglePointCall extends GLFRecord {
* @param lhValues the LikelihoodObject, representing the genotype likelyhoods
*/
SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, LikelihoodObject lhValues ) {
- super(refBase, offset, (short) lhValues.getMinimumValue(), readDepth, rmsMapQ);
+ super(refBase, offset, (short) lhValues.getBestLikelihood(), readDepth, rmsMapQ);
likelihoods = lhValues;
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java
index 63d3617b1..6c18a2cf4 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java
@@ -70,19 +70,19 @@ class VariableLengthCall extends GLFRecord {
VariableLengthCall( char refBase,
long offset,
int readDepth,
- short minimumLikelihood,
+ double minimumLikelihood,
short rmsMapQ,
- short lkHom1,
- short lkHom2,
- short lkHet,
+ double lkHom1,
+ double lkHom2,
+ double lkHet,
int indelLen1,
int indelLen2,
final short indelSeq1[],
final short indelSeq2[] ) {
- super(refBase, offset, minimumLikelihood, readDepth, rmsMapQ);
- this.lkHom1 = lkHom1;
- this.lkHom2 = lkHom2;
- this.lkHet = lkHet;
+ super(refBase, offset, GLFRecord.toCappedShort(minimumLikelihood), readDepth, rmsMapQ);
+ this.lkHom1 = GLFRecord.toCappedShort(lkHom1);
+ this.lkHom2 = GLFRecord.toCappedShort(lkHom2);
+ this.lkHet = GLFRecord.toCappedShort(lkHet);
this.indelLen1 = indelLen1;
this.indelLen2 = indelLen2;
this.indelSeq1 = indelSeq1;
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java
similarity index 53%
rename from java/src/org/broadinstitute/sting/utils/genotype/Genotype.java
rename to java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java
index 8759c0468..6bb168cb0 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java
+++ b/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java
@@ -1,5 +1,14 @@
package org.broadinstitute.sting.utils.genotype;
+import org.junit.Test;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+
+import java.io.File;
+
+import net.sf.samtools.SAMFileHeader;
+
/*
* Copyright (c) 2009 The Broad Institute
@@ -30,10 +39,35 @@ package org.broadinstitute.sting.utils.genotype;
*
* @author aaron
*
- * Class Genotype
+ * Class GeliAdapterTest
*
- * The interface for making genotype calls.
+ * Tests the GeliAdapter class
*/
-public interface Genotype {
-
+public class GeliAdapterTest extends BaseTest {
+
+
+ // private our Geli adapter
+ private GenotypeWriter adapter = null;
+
+ /**
+ * test out the likelihood object
+ */
+ @Test
+ public void test1() {
+ File fl = new File("testFile.txt");
+ SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10);
+ adapter = new GeliAdapter(fl,header);
+ LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods());
+ adapter.addGenotypeCall(100,100,'A',100,obj);
+ adapter.close();
+ }
+
+
+ public double[] createFakeLikelihoods() {
+ double ret[] = new double[10];
+ for (int x = 0; x < 10; x++) {
+ ret[x] = (double)(10.0-x) * 10.0;
+ }
+ return ret;
+ }
}
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObjectTest.java b/java/test/org/broadinstitute/sting/utils/genotype/LikelihoodObjectTest.java
similarity index 90%
rename from java/test/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObjectTest.java
rename to java/test/org/broadinstitute/sting/utils/genotype/LikelihoodObjectTest.java
index a6d106b84..644a8a559 100755
--- a/java/test/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObjectTest.java
+++ b/java/test/org/broadinstitute/sting/utils/genotype/LikelihoodObjectTest.java
@@ -1,9 +1,10 @@
-package org.broadinstitute.sting.utils.genotype.glf;
+package org.broadinstitute.sting.utils.genotype;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import static junit.framework.Assert.assertTrue;
@@ -56,7 +57,7 @@ public class LikelihoodObjectTest extends BaseTest {
@Test
public void testConstructionFromArray() {
- int[] ray = new int[10];
+ double[] ray = new double[10];
for (int x = 0; x < 10; x++) {
ray[x] = ( x * 25 );
}
@@ -72,9 +73,9 @@ public class LikelihoodObjectTest extends BaseTest {
@Test
public void testByteArrayReturn() {
- int[] ray = new int[10];
+ double[] ray = new double[10];
for (int x = 0; x < 10; x++) {
- ray[x] = ( x * 25 );
+ ray[x] = ( x * 25.0 );
}
mLO = new LikelihoodObject(ray);
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
@@ -97,14 +98,15 @@ public class LikelihoodObjectTest extends BaseTest {
@Test
public void testGetMinimum() {
- int[] ray = new int[10];
+ double[] ray = new double[10];
for (int x = 0; x < 10; x++) {
- ray[x] = ( 240 );
+ ray[x] = ( 240.0 );
+ ray[x] = ( 240.0 );
}
ray [5] = 0;
mLO = new LikelihoodObject(ray);
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
- short smallest = (short)mLO.getMinimumValue();
+ short smallest = (short)mLO.getBestLikelihood();
assertTrue(smallest == 0);
int index = 0;
short[] ret = mLO.toByteArray();
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java
index feedf1184..11f14bd71 100755
--- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java
+++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java
@@ -4,7 +4,8 @@ import org.junit.Test;
import org.junit.Before;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
-import org.broadinstitute.sting.utils.genotype.glf.LikelihoodObject;
+import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
+import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.io.File;
@@ -49,7 +50,7 @@ public class GLFWriterTest extends BaseTest {
private final int refLength = 1000;
File writeTo = new File("testGLF.glf");
- private GLFWriter rec;
+ private GenotypeWriter rec;
@Before
public void before() {
@@ -77,11 +78,11 @@ public class GLFWriterTest extends BaseTest {
let = 'G';
break;
}
- try {
+ /*try {
rec.addPointCall(let, location, 10, (short) 10, obj);
} catch (IllegalArgumentException e) {
e.printStackTrace();
- }
+ } */
}