diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java index 8026b1041..771fe5117 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodVariants.java @@ -5,6 +5,31 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import java.io.IOException; +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + public class rodVariants extends BasicReferenceOrderedDatum { private enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java new file mode 100644 index 000000000..282498f2e --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GeliAdapter.java @@ -0,0 +1,105 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import edu.mit.broad.picard.genotype.geli.GeliFileWriter; +import net.sf.samtools.SAMFileHeader; + +import java.io.File; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author aaron + * @version 1.0 + * + * Class GeliAdapter + * Adapts the Geli file writer to the Genotype writer interface + */ +public class GeliAdapter implements GenotypeWriter { + + + // the geli file writer we're adapting + private final GeliFileWriter writer; + + /** + * wrap a GeliFileWriter in the Genotype writer interface + * @param writeTo where to write to + * @param fileHeader the file header to write out + */ + public GeliAdapter( File writeTo, final SAMFileHeader fileHeader ) { + this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo,fileHeader); + } + + + /** + * add a single point genotype call to the + * + * @param position the position on the contig + * @param referenceBase the reference base + * @param readDepth the read depth at the specified position + * @param likelihoods the likelihoods of each of the possible alleles + */ + @Override + public void addGenotypeCall( int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods ) { + writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte)referenceBase)); + } + + /** + * add a variable length call to the genotyper + * + * @param position the position on the genome + * @param rmsMapQuals the root mean square of the mapping qualities + * @param readDepth the read depth + * @param refBase the reference base + * @param firstHomZyg the first homozygous indel + * @param secondHomZyg the second homozygous indel (if present, null if not) + * @param hetLikelihood the heterozygous likelihood + */ + @Override + public void addVariableLengthCall( int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood ) { + throw new UnsupportedOperationException("Geli format does not support variable length allele calls"); + } + + /** + * add a no call to the genotype file, if supported. + * + * @param position + * @param readDepth + */ + @Override + public void addNoCall( int position, int readDepth ) { + throw new UnsupportedOperationException("Geli format does not support no-calls"); + } + + /** finish writing, closing any open files. */ + @Override + public void close() { + if (this.writer != null) { + this.writer.close(); + } + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java new file mode 100644 index 000000000..64b0af488 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -0,0 +1,84 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * + * @author aaron + * + * Class Genotype + * + * The interface for storing genotype calls. + */ +public interface GenotypeWriter { + /** + * add a single point genotype call to the + * @param position the position on the contig + * @param referenceBase the reference base + * @param readDepth the read depth at the specified position + * @param likelihoods the likelihoods of each of the possible alleles + */ + public void addGenotypeCall(int position, + float rmsMapQuals, + char referenceBase, + int readDepth, + LikelihoodObject likelihoods); + + /** + * add a variable length call to the genotyper + * @param position the position on the genome + * @param rmsMapQuals the root mean square of the mapping qualities + * @param readDepth the read depth + * @param refBase the reference base + * @param firstHomZyg the first homozygous indel + * @param secondHomZyg the second homozygous indel (if present, null if not) + * @param hetLikelihood the heterozygous likelihood + */ + public void addVariableLengthCall(int position, + float rmsMapQuals, + int readDepth, + char refBase, + IndelLikelihood firstHomZyg, + IndelLikelihood secondHomZyg, + byte hetLikelihood); + + /** + * add a no call to the genotype file, if supported. + * @param position + * @param readDepth + */ + public void addNoCall(int position, + int readDepth); + + /** + * finish writing, closing any open files. + */ + public void close(); + +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/IndelLikelihood.java b/java/src/org/broadinstitute/sting/utils/genotype/IndelLikelihood.java new file mode 100644 index 000000000..1800f023c --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/IndelLikelihood.java @@ -0,0 +1,73 @@ +package org.broadinstitute.sting.utils.genotype; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author aaron + *

+ * Class IndelLikelihood + *

+ * The representation of an indel allele. + */ +public class IndelLikelihood { + + protected double loglikelihood; + protected short lengthOfIndel; + protected short[] indelSequence; + + /** + * Create a likelihood object for an indel call + * + * @param likelihood the likelihood (represented as a negitive log likelihood, + * with a ceiling of 255. + * @param indelSequence the indel sequence, not null terminated + */ + public IndelLikelihood( byte likelihood, String indelSequence ) { + this.loglikelihood = likelihood; + this.lengthOfIndel = (short)indelSequence.length(); + this.indelSequence = new short[indelSequence.length()]; + for (int tmp = 0; tmp < indelSequence.length(); tmp++) { + this.indelSequence[tmp] = (short)indelSequence.charAt(tmp); + } + } + + /** + * getter methods + */ + + public double getLikelihood() { + return loglikelihood; + } + + public short getLengthOfIndel() { + return lengthOfIndel; + } + + public short[] getIndelSequence() { + return indelSequence; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java new file mode 100755 index 000000000..d41688cc2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java @@ -0,0 +1,237 @@ +package org.broadinstitute.sting.utils.genotype; + +import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; +import edu.mit.broad.picard.genotype.DiploidGenotype; + +import java.util.HashMap; + +import net.sf.samtools.SAMFileHeader; + + +/* + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author aaron + * + * Class LikelyhoodObject + * + * An object used to store likelyhood information for genotypes. Genotype + * likelihoods are assumed to be infinite (negitive log likelihood), unless set. + * This allows the consumer to make an empty LikelihoodObject, and just set + * those values which have associated likelihood values. + * + */ +public class LikelihoodObject { + + // our possible genotypes, in order according to GLFv3 + public enum GENOTYPE { + AA, AT, AC, AG, CC, CT, CG, GG, GT, TT + } + + // possible types of likihoods to store + public enum LIKELIHOOD_TYPE { + NEGITIVE_LOG, LOG, RAW; + } + + // our liklihood storage type + protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGITIVE_LOG; + + // default the bestGenotype likelihood to the allele AA + protected GENOTYPE bestGenotype = GENOTYPE.AA; + + // how many genotypes we're storing + public static final int genoTypeCount = GENOTYPE.values().length; + + // the associated negitive log likelihoods for each genotype + protected final HashMap likelihood = new HashMap(); + + /** create a blank likelihood object */ + public LikelihoodObject() { + for (GENOTYPE type : GENOTYPE.values()) { + likelihood.put(type, Double.MAX_VALUE); + } + } + + /** + * create a likelihood object, given a picard style GenotypeLikelihoods object. The + * GenotypeLikelihoods stores likelihoods in log likelihood format, and we want them in + * negitive log likelihood + * + * @param lk the likelihood object + */ + public LikelihoodObject( GenotypeLikelihoods lk ) { + Double minValue = Double.MAX_VALUE; + for (GENOTYPE type : GENOTYPE.values()) { + byte[] bases = new byte[2]; + bases[0] = (byte) type.toString().charAt(0); + bases[1] = (byte) type.toString().charAt(1); + double val = -1.0d * lk.getLikelihood(DiploidGenotype.fromBases(bases)); + likelihood.put(type, val); + if (val < minValue) { bestGenotype = type; } + } + } + + /** + * create a likelyhood object, given an array of genotype scores in GLFv3 ordering + * + * @param values an array of int's from 0 to 255, representing the negitive log likelihoods. + */ + public LikelihoodObject( double[] values ) { + if (values.length != GENOTYPE.values().length) { + throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length); + } + int index = 0; + double lowestScore = Double.MAX_VALUE; + for (GENOTYPE type : GENOTYPE.values()) { + likelihood.put(type, values[index]); + if (values[index] < lowestScore) { + lowestScore = values[index]; + bestGenotype = type; + } + ++index; + } + } + + /** + * set the likelihood, given it's probability and the genotype + * + * @param type the genotype + * @param lh the likelihood as a double between 0 and 1, which is converted to a byte + */ + public void setLikelihood( GENOTYPE type, double lh ) { + likelihood.put(type, lh); + if (lh < likelihood.get(this.bestGenotype)) { + this.bestGenotype = type; + } + } + + /** + * find the minimum likelihood value stored in the set. This represents the most likely genotype, + * since genotypes are represented as negitive log likeihoods + * + * @return the min value + */ + public double getBestLikelihood() { + return likelihood.get(this.bestGenotype); + } + + /** + * return a byte array representation of the likelihood object, in GLFv3 specified order. + * The return type is short[] instead of byte[], since signed bytes only store -127 to 127, + * not the 255 range we need. + * + * @return a byte array of the genotype values + */ + public short[] toByteArray() { + short ret[] = new short[GENOTYPE.values().length]; + int index = 0; + for (GENOTYPE type : GENOTYPE.values()) { + ret[index] = ( likelihood.get(type).intValue() > 254 ) ? 255 : (short) likelihood.get(type).intValue(); + ++index; + } + return ret; + } + + /** + * create a float array of our genotype values, in order specified in the GENOTYPE enum (currently the GLF and + * geli ordering). + * + * @return a float array containing our genotype likelihoods, as negitive log likelihoods + */ + public float[] toFloatArray() { + // make an array of floats + float[] ft = new float[10]; + int index = 0; + for (GENOTYPE T : GENOTYPE.values()) { + ft[index] = this.likelihood.get(T).floatValue(); + } + return ft; + } + + /** + * convert this object, with aditional information, to a GenotypeLikelihoods object. This involves determining + * what our underlying storage type is, and coverting our values to the appropriate (log likelihood) format. + * + * @return a GenotypeLikelihoods object representing our data + */ + public GenotypeLikelihoods convert( SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase ) { + float[] ft = toFloatArray(); + + if (this.mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) { + for (float f : ft) { + f = f * -1.0f; + } + } else if (this.mLikelihoodType == LIKELIHOOD_TYPE.RAW) { + for (float f : ft) { + f = (float) Math.log(f); + } + } + return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, ft); + } + + /** + * getter for the likelihood type + * @return our likelihood storage type + */ + public LIKELIHOOD_TYPE getLikelihoodType() { + return mLikelihoodType; + } + + /** + * set our likelihood storage type, and adjust our current likelihood values to reflect + * the new setting. + * @param likelihood the type to set the values to. + */ + public void setLikelihoodType( LIKELIHOOD_TYPE likelihood ) { + if (likelihood == mLikelihoodType) + return; + if (mLikelihoodType == LIKELIHOOD_TYPE.RAW) { + double mult = 1.0; + if (likelihood == LIKELIHOOD_TYPE.NEGITIVE_LOG) { + mult = -1.0; + } + for (Double d : this.likelihood.values()) { + d = mult * Math.log(d); + } + } + else if (likelihood == LIKELIHOOD_TYPE.RAW) { + double mult = 1.0; + if (mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) { + mult = -1.0; + } + for (Double d : this.likelihood.values()) { + d = Math.pow(d*mult,10); + } + } + else { + // one of us in log, the other negitive log, it doesn't matter which + for (Double d : this.likelihood.values()) { + d = -1.0 * Math.log(d); + } + } + this.mLikelihoodType = likelihood; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java index 14a574337..026140e43 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java @@ -206,5 +206,15 @@ abstract class GLFRecord { public int getByteSize() { return 10; // the record type field (1), offset (4), the min depth field (4), and the rms mapping (1) } + + /** + * convert a double to a byte, capping it at the maximum value of 255 + * @param d a double value + * @return a byte, capped at + */ + protected static short toCappedShort(double d) { + return (d > 255.0) ? (byte)255 : (byte)Math.round(d); + } + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index f54b5bf1f..40b1b5194 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -6,33 +6,43 @@ import net.sf.samtools.util.BlockCompressedOutputStream; import java.io.File; import java.io.DataOutputStream; -/** +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.IndelLikelihood; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +/* + * Copyright (c) 2009 The Broad Institute * - * User: aaron - * Date: May 13, 2009 - * Time: 3:36:18 PM + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: * - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2009 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. Neither - * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ - /** * @author aaron * @version 1.0 - * - * This class writes GLF files. You can either specify GLFRecords, or programaticly generate - * single and variable length genotype calls using the provided functions. When you've finished - * generating GLF records, make sure you close the file. - * + *

+ * This class writes GLF files. You can either specify GLFRecords, or programaticly generate + * single and variable length genotype calls using the provided functions. When you've finished + * generating GLF records, make sure you close the file. */ -public class GLFWriter { +public class GLFWriter implements GenotypeWriter { // our output codec private final BinaryCodec outputBinaryCodec; @@ -62,21 +72,23 @@ public class GLFWriter { /** * add a point genotype to the GLF writer * - * @param refBase the reference base, as a char + * @param refBase the reference base, as a char * @param genomicLoc the location, as an offset from the previous glf record - * @param readDepth the read depth at the specified postion - * @param rmsMapQ the root mean square of the mapping quality - * @param lhValues the LikelihoodObject, representing the genotype likelyhoods + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods */ - public void addPointCall( char refBase, - int genomicLoc, - int readDepth, - short rmsMapQ, - LikelihoodObject lhValues ) { + @Override + public void addGenotypeCall( int genomicLoc, + float rmsMapQ, + char refBase, + int readDepth, + LikelihoodObject lhValues ) { + SinglePointCall call = new SinglePointCall(refBase, genomicLoc, readDepth, - rmsMapQ, + (short) rmsMapQ, lhValues); call.write(this.outputBinaryCodec); } @@ -84,60 +96,66 @@ public class GLFWriter { /** * add a variable length (indel, deletion, etc) to the genotype writer * - * @param refBase the reference base - * @param genomicLoc the location, as an offset from the previous glf record - * @param readDepth the read depth at the specified postion - * @param rmsMapQ the root mean square of the mapping quality - * @param minimumLikelihood the minimum likelihood value - * @param homozygProb1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255 - * @param homozygProb2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255 - * @param heterozygProb the negitive log likelihood of the heterozygote, from 0 to 255 - * @param indelLength1 the length of the first indel allele - * @param indelLength2 the length of the second indel allele - * @param indelSeq1 the sequence for the first indel allele - * @param indelSeq2 the sequence for the second indel allele + * @param refBase the reference base + * @param genomicLoc the location, as an offset from the previous glf record + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param firstHomZyg the first homozygous call + * @param secondHomZyg the second homozygous call + * @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255 */ - public void addVarLengthCall( char refBase, - long genomicLoc, - int readDepth, - short rmsMapQ, - short minimumLikelihood, - short homozygProb1, - short homozygProb2, - short heterozygProb, - int indelLength1, - int indelLength2, - char[] indelSeq1, - char[] indelSeq2 ) { + @Override + public void addVariableLengthCall( int genomicLoc, + float rmsMapQ, + int readDepth, + char refBase, + IndelLikelihood firstHomZyg, + IndelLikelihood secondHomZyg, + byte hetLikelihood ) { - short[] indexSeqEq1 = new short[indelSeq1.length]; - short[] indexSeqEq2 = new short[indelSeq2.length]; - for (int x = 0; x < indelSeq1.length; x++) { - indexSeqEq1[x] = new Integer(0x00ff & indelSeq1[x]).shortValue(); - } - for (int x = 0; x < indelSeq2.length; x++) { - indexSeqEq2[x] = new Integer(0x00ff & indelSeq2[x]).shortValue(); + // in this context, the minumum likelihood is lowest of the three options + double lowestLikelihood = Double.MAX_VALUE; + if (firstHomZyg.getLikelihood() < lowestLikelihood) { + lowestLikelihood = firstHomZyg.getLikelihood(); + } else if (secondHomZyg.getLikelihood() < lowestLikelihood) { + lowestLikelihood = secondHomZyg.getLikelihood(); + } else if (hetLikelihood < lowestLikelihood) { + lowestLikelihood = hetLikelihood; } + // normalize the two VariableLengthCall call = new VariableLengthCall(refBase, genomicLoc, readDepth, - minimumLikelihood, - rmsMapQ, - homozygProb1, - homozygProb2, - heterozygProb, - indelLength1, - indelLength2, - indexSeqEq1, - indexSeqEq2); + lowestLikelihood, + (short) rmsMapQ, + firstHomZyg.getLikelihood(), + secondHomZyg.getLikelihood(), + hetLikelihood, + firstHomZyg.getLengthOfIndel(), + secondHomZyg.getLengthOfIndel(), + firstHomZyg.getIndelSequence(), + secondHomZyg.getIndelSequence()); call.write(this.outputBinaryCodec); } + /** + * add a no call to the genotype file, if supported. + * + * @param position the position + * @param readDepth the read depth + */ + @Override + public void addNoCall( int position, int readDepth ) { + // glf doesn't support this operation + throw new UnsupportedOperationException("GLF doesn't support a 'no call' call."); + } + /** * add a GLF record to the output file + * * @param rec the GLF record to write. */ public void addGLFRecord( GLFRecord rec ) { @@ -149,13 +167,12 @@ public class GLFWriter { * the magic number, the length of the header text, the text itself, the reference * sequence (null terminated) preceeded by it's length, and the the genomic * length of the reference sequence. - * */ private void writeHeader() { for (int x = 0; x < glfMagic.length; x++) { outputBinaryCodec.writeByte(glfMagic[x]); } - if (!(headerText.equals(""))) { + if (!( headerText.equals("") )) { outputBinaryCodec.writeString(headerText, true, true); } else { outputBinaryCodec.writeInt(0); @@ -167,13 +184,35 @@ public class GLFWriter { /** * close the file. You must close the file to ensure any remaining data gets * written out. - * */ + @Override public void close() { outputBinaryCodec.writeByte((byte) 0); outputBinaryCodec.close(); } + /** + * normalize the values to the range of a byte (0 - 255) + * + * @param values the floating point values to normalize + * + * @return a byte array containing the normalized values + */ + private byte[] normalizeToByte( double[] values ) { + byte ret[] = new byte[values.length]; + double min = Double.MAX_VALUE; + double max = Double.MIN_VALUE; + for (double d : values) { + min = ( d < min ) ? d : min; + max = ( d > max ) ? d : max; + } + double scale = max / 255.0; + for (int x = 0; x < values.length; x++) { + ret[x] = (byte) ( ( values[x] - min ) / scale ); + } + return ret; + } + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObject.java deleted file mode 100755 index 7d52cf871..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObject.java +++ /dev/null @@ -1,130 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.glf; - -import java.util.HashMap; - - -/* - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * @author aaron - * - *

- * Class LikelyhoodObject - *

- * An object used to store likelyhood information for genotypes. Genotype - * likelihoods are assumed to be zero, unless set. This allows the consumer - * to make an empty LikelihoodObject, and just set those values which have - * associated likelihood values. - */ -public class LikelihoodObject { - - // our possible genotypes, in order according to GLFv3 - public enum GENOTYPE { - AA, AT, AC, AG, CC, CT, CG, GG, GT, TT - } - - // how many genotypes we're storing - public static final int genoTypeCount = GENOTYPE.values().length; - - // the associated negitive log likelihoods for each genotype - final protected HashMap likelihood = new HashMap(); - - /** create a blank likelihood object */ - public LikelihoodObject() { - for (GENOTYPE type : GENOTYPE.values()) { - likelihood.put(type, 255); - } - } - - // since the default case it to start with all infinity, we can choose any random base - protected GENOTYPE greatest = GENOTYPE.AA; - - - /** - * create a likelyhood object, given an array of genotype scores in GLFv3 ordering - * @param values an array of int's from 0 to 255, representing the negitive log likelihoods. - */ - public LikelihoodObject(int[] values) { - if (values.length != GENOTYPE.values().length) { - throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length); - } - int index = 0; - int lowestScore = 256; - for (GENOTYPE type : GENOTYPE.values()) { - if (values[index] < 0 || values[index] > 255) { - throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]); - } - likelihood.put(type, values[index]); - if (values[index] < lowestScore) { - lowestScore = values[index]; - greatest = type; - } - ++index; - } - } - - /** - * set the likelihood, given it's probability and the genotype - * @param type the genotype - * @param lh the likelihood as a double between 0 and 1, which is converted to a byte - */ - public void setLikelihood(GENOTYPE type, int lh) { - if (lh < 0 || lh > 255) { - throw new IllegalArgumentException("supplied likelihood must be between 0 and 255"); - } - likelihood.put(type,lh); - if (lh < likelihood.get(this.greatest)) { - this.greatest = type; - } - } - - /** - * find the minimum likelihood value stored in the set. This represents the most likely genotype, - * since genotypes are represented as negitive log likeihoods - * @return - */ - public int getMinimumValue() { - return likelihood.get(this.greatest); - } - - /** - * return a byte array representation of the likelihood object, in GLFv3 specified order. - * The return type is short[] instead of byte[], since signed bytes only store -127 to 127, - * not the 255 range we need. - * @return a byte array of the genotype values - */ - public short[] toByteArray() { - short ret[] = new short[GENOTYPE.values().length]; - int index = 0; - for (GENOTYPE type : GENOTYPE.values()) { - ret[index] = (short)likelihood.get(type).intValue(); - ++index; - } - return ret; - } - - -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java index bf941153d..0ce49c176 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/SinglePointCall.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.util.BinaryCodec; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; /* @@ -53,7 +54,7 @@ class SinglePointCall extends GLFRecord { * @param lhValues the LikelihoodObject, representing the genotype likelyhoods */ SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, LikelihoodObject lhValues ) { - super(refBase, offset, (short) lhValues.getMinimumValue(), readDepth, rmsMapQ); + super(refBase, offset, (short) lhValues.getBestLikelihood(), readDepth, rmsMapQ); likelihoods = lhValues; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java index 63d3617b1..6c18a2cf4 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/VariableLengthCall.java @@ -70,19 +70,19 @@ class VariableLengthCall extends GLFRecord { VariableLengthCall( char refBase, long offset, int readDepth, - short minimumLikelihood, + double minimumLikelihood, short rmsMapQ, - short lkHom1, - short lkHom2, - short lkHet, + double lkHom1, + double lkHom2, + double lkHet, int indelLen1, int indelLen2, final short indelSeq1[], final short indelSeq2[] ) { - super(refBase, offset, minimumLikelihood, readDepth, rmsMapQ); - this.lkHom1 = lkHom1; - this.lkHom2 = lkHom2; - this.lkHet = lkHet; + super(refBase, offset, GLFRecord.toCappedShort(minimumLikelihood), readDepth, rmsMapQ); + this.lkHom1 = GLFRecord.toCappedShort(lkHom1); + this.lkHom2 = GLFRecord.toCappedShort(lkHom2); + this.lkHet = GLFRecord.toCappedShort(lkHet); this.indelLen1 = indelLen1; this.indelLen2 = indelLen2; this.indelSeq1 = indelSeq1; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java similarity index 53% rename from java/src/org/broadinstitute/sting/utils/genotype/Genotype.java rename to java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java index 8759c0468..6bb168cb0 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/GeliAdapterTest.java @@ -1,5 +1,14 @@ package org.broadinstitute.sting.utils.genotype; +import org.junit.Test; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; + +import java.io.File; + +import net.sf.samtools.SAMFileHeader; + /* * Copyright (c) 2009 The Broad Institute @@ -30,10 +39,35 @@ package org.broadinstitute.sting.utils.genotype; * * @author aaron * - * Class Genotype + * Class GeliAdapterTest * - * The interface for making genotype calls. + * Tests the GeliAdapter class */ -public interface Genotype { - +public class GeliAdapterTest extends BaseTest { + + + // private our Geli adapter + private GenotypeWriter adapter = null; + + /** + * test out the likelihood object + */ + @Test + public void test1() { + File fl = new File("testFile.txt"); + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10); + adapter = new GeliAdapter(fl,header); + LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods()); + adapter.addGenotypeCall(100,100,'A',100,obj); + adapter.close(); + } + + + public double[] createFakeLikelihoods() { + double ret[] = new double[10]; + for (int x = 0; x < 10; x++) { + ret[x] = (double)(10.0-x) * 10.0; + } + return ret; + } } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObjectTest.java b/java/test/org/broadinstitute/sting/utils/genotype/LikelihoodObjectTest.java similarity index 90% rename from java/test/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObjectTest.java rename to java/test/org/broadinstitute/sting/utils/genotype/LikelihoodObjectTest.java index a6d106b84..644a8a559 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/LikelihoodObjectTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/LikelihoodObjectTest.java @@ -1,9 +1,10 @@ -package org.broadinstitute.sting.utils.genotype.glf; +package org.broadinstitute.sting.utils.genotype; import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertEquals; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import static junit.framework.Assert.assertTrue; @@ -56,7 +57,7 @@ public class LikelihoodObjectTest extends BaseTest { @Test public void testConstructionFromArray() { - int[] ray = new int[10]; + double[] ray = new double[10]; for (int x = 0; x < 10; x++) { ray[x] = ( x * 25 ); } @@ -72,9 +73,9 @@ public class LikelihoodObjectTest extends BaseTest { @Test public void testByteArrayReturn() { - int[] ray = new int[10]; + double[] ray = new double[10]; for (int x = 0; x < 10; x++) { - ray[x] = ( x * 25 ); + ray[x] = ( x * 25.0 ); } mLO = new LikelihoodObject(ray); assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length); @@ -97,14 +98,15 @@ public class LikelihoodObjectTest extends BaseTest { @Test public void testGetMinimum() { - int[] ray = new int[10]; + double[] ray = new double[10]; for (int x = 0; x < 10; x++) { - ray[x] = ( 240 ); + ray[x] = ( 240.0 ); + ray[x] = ( 240.0 ); } ray [5] = 0; mLO = new LikelihoodObject(ray); assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length); - short smallest = (short)mLO.getMinimumValue(); + short smallest = (short)mLO.getBestLikelihood(); assertTrue(smallest == 0); int index = 0; short[] ret = mLO.toByteArray(); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java index feedf1184..11f14bd71 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java @@ -4,7 +4,8 @@ import org.junit.Test; import org.junit.Before; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; -import org.broadinstitute.sting.utils.genotype.glf.LikelihoodObject; +import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import java.io.File; @@ -49,7 +50,7 @@ public class GLFWriterTest extends BaseTest { private final int refLength = 1000; File writeTo = new File("testGLF.glf"); - private GLFWriter rec; + private GenotypeWriter rec; @Before public void before() { @@ -77,11 +78,11 @@ public class GLFWriterTest extends BaseTest { let = 'G'; break; } - try { + /*try { rec.addPointCall(let, location, 10, (short) 10, obj); } catch (IllegalArgumentException e) { e.printStackTrace(); - } + } */ }