Some files to support generic genotype outputing

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1112 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-06-26 15:43:41 +00:00
parent 1a97c86f95
commit d7d4298917
13 changed files with 706 additions and 225 deletions

View File

@ -5,6 +5,31 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.IOException;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public class rodVariants extends BasicReferenceOrderedDatum {
private enum Genotype { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }

View File

@ -0,0 +1,105 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import net.sf.samtools.SAMFileHeader;
import java.io.File;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
* @version 1.0
*
* Class GeliAdapter
* Adapts the Geli file writer to the Genotype writer interface
*/
public class GeliAdapter implements GenotypeWriter {
// the geli file writer we're adapting
private final GeliFileWriter writer;
/**
* wrap a GeliFileWriter in the Genotype writer interface
* @param writeTo where to write to
* @param fileHeader the file header to write out
*/
public GeliAdapter( File writeTo, final SAMFileHeader fileHeader ) {
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo,fileHeader);
}
/**
* add a single point genotype call to the
*
* @param position the position on the contig
* @param referenceBase the reference base
* @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles
*/
@Override
public void addGenotypeCall( int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods ) {
writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte)referenceBase));
}
/**
* add a variable length call to the genotyper
*
* @param position the position on the genome
* @param rmsMapQuals the root mean square of the mapping qualities
* @param readDepth the read depth
* @param refBase the reference base
* @param firstHomZyg the first homozygous indel
* @param secondHomZyg the second homozygous indel (if present, null if not)
* @param hetLikelihood the heterozygous likelihood
*/
@Override
public void addVariableLengthCall( int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood ) {
throw new UnsupportedOperationException("Geli format does not support variable length allele calls");
}
/**
* add a no call to the genotype file, if supported.
*
* @param position
* @param readDepth
*/
@Override
public void addNoCall( int position, int readDepth ) {
throw new UnsupportedOperationException("Geli format does not support no-calls");
}
/** finish writing, closing any open files. */
@Override
public void close() {
if (this.writer != null) {
this.writer.close();
}
}
}

View File

@ -0,0 +1,84 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
*
* @author aaron
*
* Class Genotype
*
* The interface for storing genotype calls.
*/
public interface GenotypeWriter {
/**
* add a single point genotype call to the
* @param position the position on the contig
* @param referenceBase the reference base
* @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles
*/
public void addGenotypeCall(int position,
float rmsMapQuals,
char referenceBase,
int readDepth,
LikelihoodObject likelihoods);
/**
* add a variable length call to the genotyper
* @param position the position on the genome
* @param rmsMapQuals the root mean square of the mapping qualities
* @param readDepth the read depth
* @param refBase the reference base
* @param firstHomZyg the first homozygous indel
* @param secondHomZyg the second homozygous indel (if present, null if not)
* @param hetLikelihood the heterozygous likelihood
*/
public void addVariableLengthCall(int position,
float rmsMapQuals,
int readDepth,
char refBase,
IndelLikelihood firstHomZyg,
IndelLikelihood secondHomZyg,
byte hetLikelihood);
/**
* add a no call to the genotype file, if supported.
* @param position
* @param readDepth
*/
public void addNoCall(int position,
int readDepth);
/**
* finish writing, closing any open files.
*/
public void close();
}

View File

@ -0,0 +1,73 @@
package org.broadinstitute.sting.utils.genotype;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
* <p/>
* Class IndelLikelihood
* <p/>
* The representation of an indel allele.
*/
public class IndelLikelihood {
protected double loglikelihood;
protected short lengthOfIndel;
protected short[] indelSequence;
/**
* Create a likelihood object for an indel call
*
* @param likelihood the likelihood (represented as a negitive log likelihood,
* with a ceiling of 255.
* @param indelSequence the indel sequence, not null terminated
*/
public IndelLikelihood( byte likelihood, String indelSequence ) {
this.loglikelihood = likelihood;
this.lengthOfIndel = (short)indelSequence.length();
this.indelSequence = new short[indelSequence.length()];
for (int tmp = 0; tmp < indelSequence.length(); tmp++) {
this.indelSequence[tmp] = (short)indelSequence.charAt(tmp);
}
}
/**
* getter methods
*/
public double getLikelihood() {
return loglikelihood;
}
public short getLengthOfIndel() {
return lengthOfIndel;
}
public short[] getIndelSequence() {
return indelSequence;
}
}

View File

@ -0,0 +1,237 @@
package org.broadinstitute.sting.utils.genotype;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import edu.mit.broad.picard.genotype.DiploidGenotype;
import java.util.HashMap;
import net.sf.samtools.SAMFileHeader;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
*
* Class LikelyhoodObject
*
* An object used to store likelyhood information for genotypes. Genotype
* likelihoods are assumed to be infinite (negitive log likelihood), unless set.
* This allows the consumer to make an empty LikelihoodObject, and just set
* those values which have associated likelihood values.
*
*/
public class LikelihoodObject {
// our possible genotypes, in order according to GLFv3
public enum GENOTYPE {
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
}
// possible types of likihoods to store
public enum LIKELIHOOD_TYPE {
NEGITIVE_LOG, LOG, RAW;
}
// our liklihood storage type
protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGITIVE_LOG;
// default the bestGenotype likelihood to the allele AA
protected GENOTYPE bestGenotype = GENOTYPE.AA;
// how many genotypes we're storing
public static final int genoTypeCount = GENOTYPE.values().length;
// the associated negitive log likelihoods for each genotype
protected final HashMap<GENOTYPE, Double> likelihood = new HashMap<GENOTYPE, Double>();
/** create a blank likelihood object */
public LikelihoodObject() {
for (GENOTYPE type : GENOTYPE.values()) {
likelihood.put(type, Double.MAX_VALUE);
}
}
/**
* create a likelihood object, given a picard style GenotypeLikelihoods object. The
* GenotypeLikelihoods stores likelihoods in log likelihood format, and we want them in
* negitive log likelihood
*
* @param lk the likelihood object
*/
public LikelihoodObject( GenotypeLikelihoods lk ) {
Double minValue = Double.MAX_VALUE;
for (GENOTYPE type : GENOTYPE.values()) {
byte[] bases = new byte[2];
bases[0] = (byte) type.toString().charAt(0);
bases[1] = (byte) type.toString().charAt(1);
double val = -1.0d * lk.getLikelihood(DiploidGenotype.fromBases(bases));
likelihood.put(type, val);
if (val < minValue) { bestGenotype = type; }
}
}
/**
* create a likelyhood object, given an array of genotype scores in GLFv3 ordering
*
* @param values an array of int's from 0 to 255, representing the negitive log likelihoods.
*/
public LikelihoodObject( double[] values ) {
if (values.length != GENOTYPE.values().length) {
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
}
int index = 0;
double lowestScore = Double.MAX_VALUE;
for (GENOTYPE type : GENOTYPE.values()) {
likelihood.put(type, values[index]);
if (values[index] < lowestScore) {
lowestScore = values[index];
bestGenotype = type;
}
++index;
}
}
/**
* set the likelihood, given it's probability and the genotype
*
* @param type the genotype
* @param lh the likelihood as a double between 0 and 1, which is converted to a byte
*/
public void setLikelihood( GENOTYPE type, double lh ) {
likelihood.put(type, lh);
if (lh < likelihood.get(this.bestGenotype)) {
this.bestGenotype = type;
}
}
/**
* find the minimum likelihood value stored in the set. This represents the most likely genotype,
* since genotypes are represented as negitive log likeihoods
*
* @return the min value
*/
public double getBestLikelihood() {
return likelihood.get(this.bestGenotype);
}
/**
* return a byte array representation of the likelihood object, in GLFv3 specified order.
* The return type is short[] instead of byte[], since signed bytes only store -127 to 127,
* not the 255 range we need.
*
* @return a byte array of the genotype values
*/
public short[] toByteArray() {
short ret[] = new short[GENOTYPE.values().length];
int index = 0;
for (GENOTYPE type : GENOTYPE.values()) {
ret[index] = ( likelihood.get(type).intValue() > 254 ) ? 255 : (short) likelihood.get(type).intValue();
++index;
}
return ret;
}
/**
* create a float array of our genotype values, in order specified in the GENOTYPE enum (currently the GLF and
* geli ordering).
*
* @return a float array containing our genotype likelihoods, as negitive log likelihoods
*/
public float[] toFloatArray() {
// make an array of floats
float[] ft = new float[10];
int index = 0;
for (GENOTYPE T : GENOTYPE.values()) {
ft[index] = this.likelihood.get(T).floatValue();
}
return ft;
}
/**
* convert this object, with aditional information, to a GenotypeLikelihoods object. This involves determining
* what our underlying storage type is, and coverting our values to the appropriate (log likelihood) format.
*
* @return a GenotypeLikelihoods object representing our data
*/
public GenotypeLikelihoods convert( SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase ) {
float[] ft = toFloatArray();
if (this.mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
for (float f : ft) {
f = f * -1.0f;
}
} else if (this.mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
for (float f : ft) {
f = (float) Math.log(f);
}
}
return new GenotypeLikelihoods(samHeader, seqIndex, seqPosition, refBase, ft);
}
/**
* getter for the likelihood type
* @return our likelihood storage type
*/
public LIKELIHOOD_TYPE getLikelihoodType() {
return mLikelihoodType;
}
/**
* set our likelihood storage type, and adjust our current likelihood values to reflect
* the new setting.
* @param likelihood the type to set the values to.
*/
public void setLikelihoodType( LIKELIHOOD_TYPE likelihood ) {
if (likelihood == mLikelihoodType)
return;
if (mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
double mult = 1.0;
if (likelihood == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
mult = -1.0;
}
for (Double d : this.likelihood.values()) {
d = mult * Math.log(d);
}
}
else if (likelihood == LIKELIHOOD_TYPE.RAW) {
double mult = 1.0;
if (mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
mult = -1.0;
}
for (Double d : this.likelihood.values()) {
d = Math.pow(d*mult,10);
}
}
else {
// one of us in log, the other negitive log, it doesn't matter which
for (Double d : this.likelihood.values()) {
d = -1.0 * Math.log(d);
}
}
this.mLikelihoodType = likelihood;
}
}

View File

@ -206,5 +206,15 @@ abstract class GLFRecord {
public int getByteSize() {
return 10; // the record type field (1), offset (4), the min depth field (4), and the rms mapping (1)
}
/**
* convert a double to a byte, capping it at the maximum value of 255
* @param d a double value
* @return a byte, capped at
*/
protected static short toCappedShort(double d) {
return (d > 255.0) ? (byte)255 : (byte)Math.round(d);
}
}

View File

@ -6,33 +6,43 @@ import net.sf.samtools.util.BlockCompressedOutputStream;
import java.io.File;
import java.io.DataOutputStream;
/**
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
/*
* Copyright (c) 2009 The Broad Institute
*
* User: aaron
* Date: May 13, 2009
* Time: 3:36:18 PM
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
* @version 1.0
*
* This class writes GLF files. You can either specify GLFRecords, or programaticly generate
* single and variable length genotype calls using the provided functions. When you've finished
* generating GLF records, make sure you close the file.
*
* <p/>
* This class writes GLF files. You can either specify GLFRecords, or programaticly generate
* single and variable length genotype calls using the provided functions. When you've finished
* generating GLF records, make sure you close the file.
*/
public class GLFWriter {
public class GLFWriter implements GenotypeWriter {
// our output codec
private final BinaryCodec outputBinaryCodec;
@ -62,21 +72,23 @@ public class GLFWriter {
/**
* add a point genotype to the GLF writer
*
* @param refBase the reference base, as a char
* @param refBase the reference base, as a char
* @param genomicLoc the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param lhValues the LikelihoodObject, representing the genotype likelyhoods
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
*/
public void addPointCall( char refBase,
int genomicLoc,
int readDepth,
short rmsMapQ,
LikelihoodObject lhValues ) {
@Override
public void addGenotypeCall( int genomicLoc,
float rmsMapQ,
char refBase,
int readDepth,
LikelihoodObject lhValues ) {
SinglePointCall call = new SinglePointCall(refBase, genomicLoc,
readDepth,
rmsMapQ,
(short) rmsMapQ,
lhValues);
call.write(this.outputBinaryCodec);
}
@ -84,60 +96,66 @@ public class GLFWriter {
/**
* add a variable length (indel, deletion, etc) to the genotype writer
*
* @param refBase the reference base
* @param genomicLoc the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param minimumLikelihood the minimum likelihood value
* @param homozygProb1 the negitive log likelihood of the first homozygous indel allele, from 0 to 255
* @param homozygProb2 the negitive log likelihood of the second homozygous indel allele, from 0 to 255
* @param heterozygProb the negitive log likelihood of the heterozygote, from 0 to 255
* @param indelLength1 the length of the first indel allele
* @param indelLength2 the length of the second indel allele
* @param indelSeq1 the sequence for the first indel allele
* @param indelSeq2 the sequence for the second indel allele
* @param refBase the reference base
* @param genomicLoc the location, as an offset from the previous glf record
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param firstHomZyg the first homozygous call
* @param secondHomZyg the second homozygous call
* @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255
*/
public void addVarLengthCall( char refBase,
long genomicLoc,
int readDepth,
short rmsMapQ,
short minimumLikelihood,
short homozygProb1,
short homozygProb2,
short heterozygProb,
int indelLength1,
int indelLength2,
char[] indelSeq1,
char[] indelSeq2 ) {
@Override
public void addVariableLengthCall( int genomicLoc,
float rmsMapQ,
int readDepth,
char refBase,
IndelLikelihood firstHomZyg,
IndelLikelihood secondHomZyg,
byte hetLikelihood ) {
short[] indexSeqEq1 = new short[indelSeq1.length];
short[] indexSeqEq2 = new short[indelSeq2.length];
for (int x = 0; x < indelSeq1.length; x++) {
indexSeqEq1[x] = new Integer(0x00ff & indelSeq1[x]).shortValue();
}
for (int x = 0; x < indelSeq2.length; x++) {
indexSeqEq2[x] = new Integer(0x00ff & indelSeq2[x]).shortValue();
// in this context, the minumum likelihood is lowest of the three options
double lowestLikelihood = Double.MAX_VALUE;
if (firstHomZyg.getLikelihood() < lowestLikelihood) {
lowestLikelihood = firstHomZyg.getLikelihood();
} else if (secondHomZyg.getLikelihood() < lowestLikelihood) {
lowestLikelihood = secondHomZyg.getLikelihood();
} else if (hetLikelihood < lowestLikelihood) {
lowestLikelihood = hetLikelihood;
}
// normalize the two
VariableLengthCall call = new VariableLengthCall(refBase,
genomicLoc,
readDepth,
minimumLikelihood,
rmsMapQ,
homozygProb1,
homozygProb2,
heterozygProb,
indelLength1,
indelLength2,
indexSeqEq1,
indexSeqEq2);
lowestLikelihood,
(short) rmsMapQ,
firstHomZyg.getLikelihood(),
secondHomZyg.getLikelihood(),
hetLikelihood,
firstHomZyg.getLengthOfIndel(),
secondHomZyg.getLengthOfIndel(),
firstHomZyg.getIndelSequence(),
secondHomZyg.getIndelSequence());
call.write(this.outputBinaryCodec);
}
/**
* add a no call to the genotype file, if supported.
*
* @param position the position
* @param readDepth the read depth
*/
@Override
public void addNoCall( int position, int readDepth ) {
// glf doesn't support this operation
throw new UnsupportedOperationException("GLF doesn't support a 'no call' call.");
}
/**
* add a GLF record to the output file
*
* @param rec the GLF record to write.
*/
public void addGLFRecord( GLFRecord rec ) {
@ -149,13 +167,12 @@ public class GLFWriter {
* the magic number, the length of the header text, the text itself, the reference
* sequence (null terminated) preceeded by it's length, and the the genomic
* length of the reference sequence.
*
*/
private void writeHeader() {
for (int x = 0; x < glfMagic.length; x++) {
outputBinaryCodec.writeByte(glfMagic[x]);
}
if (!(headerText.equals(""))) {
if (!( headerText.equals("") )) {
outputBinaryCodec.writeString(headerText, true, true);
} else {
outputBinaryCodec.writeInt(0);
@ -167,13 +184,35 @@ public class GLFWriter {
/**
* close the file. You must close the file to ensure any remaining data gets
* written out.
*
*/
@Override
public void close() {
outputBinaryCodec.writeByte((byte) 0);
outputBinaryCodec.close();
}
/**
* normalize the values to the range of a byte (0 - 255)
*
* @param values the floating point values to normalize
*
* @return a byte array containing the normalized values
*/
private byte[] normalizeToByte( double[] values ) {
byte ret[] = new byte[values.length];
double min = Double.MAX_VALUE;
double max = Double.MIN_VALUE;
for (double d : values) {
min = ( d < min ) ? d : min;
max = ( d > max ) ? d : max;
}
double scale = max / 255.0;
for (int x = 0; x < values.length; x++) {
ret[x] = (byte) ( ( values[x] - min ) / scale );
}
return ret;
}
}

View File

@ -1,130 +0,0 @@
package org.broadinstitute.sting.utils.genotype.glf;
import java.util.HashMap;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author aaron
*
* <p/>
* Class LikelyhoodObject
* <p/>
* An object used to store likelyhood information for genotypes. Genotype
* likelihoods are assumed to be zero, unless set. This allows the consumer
* to make an empty LikelihoodObject, and just set those values which have
* associated likelihood values.
*/
public class LikelihoodObject {
// our possible genotypes, in order according to GLFv3
public enum GENOTYPE {
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
}
// how many genotypes we're storing
public static final int genoTypeCount = GENOTYPE.values().length;
// the associated negitive log likelihoods for each genotype
final protected HashMap<GENOTYPE, Integer> likelihood = new HashMap<GENOTYPE, Integer>();
/** create a blank likelihood object */
public LikelihoodObject() {
for (GENOTYPE type : GENOTYPE.values()) {
likelihood.put(type, 255);
}
}
// since the default case it to start with all infinity, we can choose any random base
protected GENOTYPE greatest = GENOTYPE.AA;
/**
* create a likelyhood object, given an array of genotype scores in GLFv3 ordering
* @param values an array of int's from 0 to 255, representing the negitive log likelihoods.
*/
public LikelihoodObject(int[] values) {
if (values.length != GENOTYPE.values().length) {
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
}
int index = 0;
int lowestScore = 256;
for (GENOTYPE type : GENOTYPE.values()) {
if (values[index] < 0 || values[index] > 255) {
throw new IllegalArgumentException("likelihood values must be greater or equal 0, and less then 256, value given: " + values[index]);
}
likelihood.put(type, values[index]);
if (values[index] < lowestScore) {
lowestScore = values[index];
greatest = type;
}
++index;
}
}
/**
* set the likelihood, given it's probability and the genotype
* @param type the genotype
* @param lh the likelihood as a double between 0 and 1, which is converted to a byte
*/
public void setLikelihood(GENOTYPE type, int lh) {
if (lh < 0 || lh > 255) {
throw new IllegalArgumentException("supplied likelihood must be between 0 and 255");
}
likelihood.put(type,lh);
if (lh < likelihood.get(this.greatest)) {
this.greatest = type;
}
}
/**
* find the minimum likelihood value stored in the set. This represents the most likely genotype,
* since genotypes are represented as negitive log likeihoods
* @return
*/
public int getMinimumValue() {
return likelihood.get(this.greatest);
}
/**
* return a byte array representation of the likelihood object, in GLFv3 specified order.
* The return type is short[] instead of byte[], since signed bytes only store -127 to 127,
* not the 255 range we need.
* @return a byte array of the genotype values
*/
public short[] toByteArray() {
short ret[] = new short[GENOTYPE.values().length];
int index = 0;
for (GENOTYPE type : GENOTYPE.values()) {
ret[index] = (short)likelihood.get(type).intValue();
++index;
}
return ret;
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.util.BinaryCodec;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
/*
@ -53,7 +54,7 @@ class SinglePointCall extends GLFRecord {
* @param lhValues the LikelihoodObject, representing the genotype likelyhoods
*/
SinglePointCall( char refBase, int offset, int readDepth, short rmsMapQ, LikelihoodObject lhValues ) {
super(refBase, offset, (short) lhValues.getMinimumValue(), readDepth, rmsMapQ);
super(refBase, offset, (short) lhValues.getBestLikelihood(), readDepth, rmsMapQ);
likelihoods = lhValues;
}

View File

@ -70,19 +70,19 @@ class VariableLengthCall extends GLFRecord {
VariableLengthCall( char refBase,
long offset,
int readDepth,
short minimumLikelihood,
double minimumLikelihood,
short rmsMapQ,
short lkHom1,
short lkHom2,
short lkHet,
double lkHom1,
double lkHom2,
double lkHet,
int indelLen1,
int indelLen2,
final short indelSeq1[],
final short indelSeq2[] ) {
super(refBase, offset, minimumLikelihood, readDepth, rmsMapQ);
this.lkHom1 = lkHom1;
this.lkHom2 = lkHom2;
this.lkHet = lkHet;
super(refBase, offset, GLFRecord.toCappedShort(minimumLikelihood), readDepth, rmsMapQ);
this.lkHom1 = GLFRecord.toCappedShort(lkHom1);
this.lkHom2 = GLFRecord.toCappedShort(lkHom2);
this.lkHet = GLFRecord.toCappedShort(lkHet);
this.indelLen1 = indelLen1;
this.indelLen2 = indelLen2;
this.indelSeq1 = indelSeq1;

View File

@ -1,5 +1,14 @@
package org.broadinstitute.sting.utils.genotype;
import org.junit.Test;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import java.io.File;
import net.sf.samtools.SAMFileHeader;
/*
* Copyright (c) 2009 The Broad Institute
@ -30,10 +39,35 @@ package org.broadinstitute.sting.utils.genotype;
*
* @author aaron
*
* Class Genotype
* Class GeliAdapterTest
*
* The interface for making genotype calls.
* Tests the GeliAdapter class
*/
public interface Genotype {
public class GeliAdapterTest extends BaseTest {
// private our Geli adapter
private GenotypeWriter adapter = null;
/**
* test out the likelihood object
*/
@Test
public void test1() {
File fl = new File("testFile.txt");
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10);
adapter = new GeliAdapter(fl,header);
LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods());
adapter.addGenotypeCall(100,100,'A',100,obj);
adapter.close();
}
public double[] createFakeLikelihoods() {
double ret[] = new double[10];
for (int x = 0; x < 10; x++) {
ret[x] = (double)(10.0-x) * 10.0;
}
return ret;
}
}

View File

@ -1,9 +1,10 @@
package org.broadinstitute.sting.utils.genotype.glf;
package org.broadinstitute.sting.utils.genotype;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import static junit.framework.Assert.assertTrue;
@ -56,7 +57,7 @@ public class LikelihoodObjectTest extends BaseTest {
@Test
public void testConstructionFromArray() {
int[] ray = new int[10];
double[] ray = new double[10];
for (int x = 0; x < 10; x++) {
ray[x] = ( x * 25 );
}
@ -72,9 +73,9 @@ public class LikelihoodObjectTest extends BaseTest {
@Test
public void testByteArrayReturn() {
int[] ray = new int[10];
double[] ray = new double[10];
for (int x = 0; x < 10; x++) {
ray[x] = ( x * 25 );
ray[x] = ( x * 25.0 );
}
mLO = new LikelihoodObject(ray);
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
@ -97,14 +98,15 @@ public class LikelihoodObjectTest extends BaseTest {
@Test
public void testGetMinimum() {
int[] ray = new int[10];
double[] ray = new double[10];
for (int x = 0; x < 10; x++) {
ray[x] = ( 240 );
ray[x] = ( 240.0 );
ray[x] = ( 240.0 );
}
ray [5] = 0;
mLO = new LikelihoodObject(ray);
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
short smallest = (short)mLO.getMinimumValue();
short smallest = (short)mLO.getBestLikelihood();
assertTrue(smallest == 0);
int index = 0;
short[] ret = mLO.toByteArray();

View File

@ -4,7 +4,8 @@ import org.junit.Test;
import org.junit.Before;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
import org.broadinstitute.sting.utils.genotype.glf.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.io.File;
@ -49,7 +50,7 @@ public class GLFWriterTest extends BaseTest {
private final int refLength = 1000;
File writeTo = new File("testGLF.glf");
private GLFWriter rec;
private GenotypeWriter rec;
@Before
public void before() {
@ -77,11 +78,11 @@ public class GLFWriterTest extends BaseTest {
let = 'G';
break;
}
try {
/*try {
rec.addPointCall(let, location, 10, (short) 10, obj);
} catch (IllegalArgumentException e) {
e.printStackTrace();
}
} */
}