diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java index 149ad8843..4a6729e97 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java @@ -154,7 +154,7 @@ public class BasicGenotype implements Genotype { * @return */ @Override - public Variant toVariant() { + public org.broadinstitute.sting.utils.genotype.variant.Variant toVariant() { return null; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java index fd9141275..88f7800a0 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -1,7 +1,8 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; +import org.broadinstitute.sting.utils.genotype.variant.Variant; /** * @author aaron @@ -13,6 +14,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; public interface Genotype { /** * get the confidence score + * * @return get the confidence score that we're based on */ public ConfidenceScore getConfidenceScore(); @@ -26,6 +28,7 @@ public interface Genotype { /** * get the ploidy + * * @return the ploidy value */ public int getPloidy(); @@ -46,25 +49,30 @@ public interface Genotype { /** * get the genotype's location + * * @return a GenomeLoc representing the location */ public GenomeLoc getLocation(); /** * returns true if the genotype is a point genotype, false if it's a indel / deletion + * * @return true is a SNP */ public boolean isPointGenotype(); /** * given the reference, are we a variant? (non-ref) + * * @param ref the reference base or bases + * * @return true if we're a variant */ public boolean isVariant(char ref); /** * return this genotype as a variant + * * @return */ public Variant toVariant(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java new file mode 100644 index 000000000..623c5b831 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java @@ -0,0 +1,47 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall; + +/** + * @author aaron + *
+ * Interface GenotypeOutputFormat + * + * This interface is in adition to the GenotypeCall interface, + * but adds the required functionality that output (GenotypeWriter) interfaces + * need. It's fair game to return 0 or any value for these fields, but the methods + * are all used by various output writers + */ +public interface GenotypeOutput extends GenotypeCall { + + /** + * return the likelihoods as a double array, in lexographic order + * + * @return the likelihoods + */ + public double[] getLikelihoods(); + + /** + * get the depth of the reads at this location + * @return the depth + */ + public int getReadDepth(); + + /** + * get the rms mapping qualities + * @return + */ + public double getRmsMapping(); + + /** + * get the best to the next best genotype + * @return + */ + public double getBestNext(); + + /** + * get the best compaired to the reference score + * @return + */ + public double getBestRef(); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index d74a06e5a..26aec06aa 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -38,7 +38,7 @@ public interface GenotypeWriter { * Add a genotype, given a genotype locus * @param locus the locus to add */ - public void addGenotypeCall(org.broadinstitute.sting.utils.genotype.calls.GenotypeCall locus); + public void addGenotypeCall(GenotypeOutput locus); /** * add a no call to the genotype file, if supported. diff --git a/java/src/org/broadinstitute/sting/utils/genotype/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/TabularLFWriter.java index eb7036704..782601d97 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/TabularLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/TabularLFWriter.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.utils.genotype; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall; import java.io.File; import java.io.FileNotFoundException; @@ -42,19 +41,23 @@ public class TabularLFWriter implements GenotypeWriter { * @param locus the locus to add */ @Override - public void addGenotypeCall(GenotypeCall locus) { - /*outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s", - locus.getLocation(), + public void addGenotypeCall(GenotypeOutput locus) { + /** + * This output is not correct, but I don't we even use this format anymore. If we do, someone + * should change this code + */ + outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s", + locus.getLocation().toString(), "NOT OUTPUTED", locus.getReferencebase(), - locus.getGenotypes().get(1).getBases()., - genotype(), - qhat, - qstar, - lodVsRef, - lodVsNextBest, - depth, - bases); */ + locus.getBases(), + locus.getBases(), + -1, + -1, + locus.getBestRef(), + locus.getBestNext(), + locus.getReadDepth(), + locus.getBases())); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Variant.java b/java/src/org/broadinstitute/sting/utils/genotype/Variant.java deleted file mode 100644 index 0a06ac1f2..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/Variant.java +++ /dev/null @@ -1,11 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -/** - * @author aaron - * - * Interface Variant - * - * This class represents a variant - */ -public interface Variant { -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/calls/SSGGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/calls/SSGGenotypeCall.java index 05b0cf994..3a275c71d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/calls/SSGGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/calls/SSGGenotypeCall.java @@ -6,10 +6,11 @@ import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.BasicGenotype; import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.GenotypeOutput; import org.broadinstitute.sting.utils.genotype.LexigraphicalComparator; -import org.broadinstitute.sting.utils.genotype.Variant; import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; +import org.broadinstitute.sting.utils.genotype.variant.Variant; import java.util.ArrayList; import java.util.Collections; @@ -25,7 +26,7 @@ import java.util.TreeMap; * The single sample genotypers implementation of the genotype call, which contains * extra information for the various genotype outputs */ -public class SSGGenotypeCall implements GenotypeCall { +public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput { // our stored genotype locus private final String mRefBase; private final int mPloidy; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 84d19a30e..b36aa0739 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -3,10 +3,12 @@ package org.broadinstitute.sting.utils.genotype.geli; import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.utils.genotype.GenotypeOutput; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.IndelLikelihood; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall; +import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.io.File; @@ -100,8 +102,10 @@ public class GeliAdapter implements GenotypeWriter { * @param locus the locus to add */ @Override - public void addGenotypeCall(GenotypeCall locus) { - // TODO: add code here + public void addGenotypeCall(GenotypeOutput locus) { + SSGGenotypeCall call = (SSGGenotypeCall)locus; + LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); + this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index 3a253840c..223ea7831 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.utils.genotype.geli; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall; +import org.broadinstitute.sting.utils.genotype.GenotypeOutput; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall; @@ -40,7 +40,7 @@ public class GeliTextWriter implements GenotypeWriter { * * @param locus the locus to add */ - public void addGenotypeCall(GenotypeCall locus) { + public void addGenotypeCall(GenotypeOutput locus) { SSGGenotypeCall call = (SSGGenotypeCall)locus; mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java index a38a0db1a..6a1fed8cb 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFRecord.java @@ -204,7 +204,7 @@ public abstract class GLFRecord { short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f))); out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f))); out.writeUInt(((Long) offset).intValue()); - int write = ((new Long(readDepth).intValue() & 0xffffff) | this.minimumLikelihood & 0xff << 24); + long write = (long) ((long)(readDepth & 0xffffff) | (long)(this.minimumLikelihood & 0xff) << 24); out.writeUInt(write); out.writeUByte((short) rmsMapQ); } @@ -250,7 +250,7 @@ public abstract class GLFRecord { for (double d : vals) { if (d < min) min = d; } - return (min > 255) ? 255 : (short)min; + return GLFRecord.toCappedShort(min); } public REF_BASE getRefBase() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index e38e39432..59118f8b0 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -4,10 +4,10 @@ import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BlockCompressedOutputStream; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.genotype.GenotypeOutput; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.IndelLikelihood; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.utils.genotype.calls.GenotypeCall; import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall; import java.io.DataOutputStream; @@ -60,6 +60,8 @@ public class GLFWriter implements GenotypeWriter { // the last position written private int lastPos = 1; + int getRidOfMe = 0; + /** * The public constructor for creating a GLF object * @@ -90,6 +92,11 @@ public class GLFWriter implements GenotypeWriter { int readDepth, LikelihoodObject lhValues) { + getRidOfMe++; + if (getRidOfMe == 500) { + this.close(); + } else if (getRidOfMe < 500) { + System.err.println(contig.getSequenceName() + ":" + genomicLoc); // check if we've jumped to a new contig checkSequence(contig.getSequenceName(), contig.getSequenceLength()); @@ -100,6 +107,7 @@ public class GLFWriter implements GenotypeWriter { lhValues.toDoubleArray()); lastPos = genomicLoc; call.write(this.outputBinaryCodec); + } } /** @@ -108,11 +116,11 @@ public class GLFWriter implements GenotypeWriter { * @param locus */ @Override - public void addGenotypeCall(GenotypeCall locus) { + public void addGenotypeCall(GenotypeOutput locus) { SSGGenotypeCall call = (SSGGenotypeCall)locus; LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); // transform! ... to negitive log likelihoods - this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)0.0,locus.getReferencebase(),0,obj); + this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/variant/Variant.java b/java/src/org/broadinstitute/sting/utils/genotype/variant/Variant.java new file mode 100644 index 000000000..043782317 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/variant/Variant.java @@ -0,0 +1,63 @@ +package org.broadinstitute.sting.utils.genotype.variant; + +import org.broadinstitute.sting.utils.GenomeLoc; + +/** + * @author aaron + * + * Interface Variant + * + * This class represents a variant + */ +public interface Variant { + // the types of variants we currently allow + public enum VARIANT_TYPE { + SNP, INDEL, DELETION, REFERENCE + } + + /** + * get the frequency of this variant + * + * @return VariantFrequency with the stored frequency + */ + public VariantFrequency getFrequency(); + + /** @return the VARIANT_TYPE of the current variant */ + public VARIANT_TYPE getType(); + + /** + * are we a SNP? If not we're a Indel/deletion + * + * @return true if we're a SNP + */ + public boolean isSNP(); + + /** + * are we an insertion? + * + * @return true if we are, false otherwise + */ + public boolean isInsertion(); + + /** + * are we an insertion? + * + * @return true if we are, false otherwise + */ + public boolean isDeletion(); + + /** + * get the base representation of this Variant + * @return a string, of ploidy + */ + public String toBases(); + + /** + * get the location that this Variant represents + * @return a GenomeLoc + */ + public GenomeLoc getLocation(); + + + +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/variant/VariantFrequency.java b/java/src/org/broadinstitute/sting/utils/genotype/variant/VariantFrequency.java new file mode 100644 index 000000000..044429801 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/variant/VariantFrequency.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.utils.genotype.variant; + + +/** + * + * @author aaron + * + * Class VariantFrequency + * + * a class that represents the variant frequency, and could serve as a base for any other + * variant frequency information (i.e. is it pop gen, from chip, etc). + */ +public class VariantFrequency { + + private double mFrequency; + + /** + * create a variant frequency + * @param frequency + */ + public VariantFrequency(double frequency) { + this.mFrequency = frequency; + } + + public double getFrequency() { + return mFrequency; + } +}