From 9cd53d32731076ce9eb854d53509e03fd31555cd Mon Sep 17 00:00:00 2001 From: aaron Date: Thu, 30 Jul 2009 07:04:05 +0000 Subject: [PATCH] some initial changes from the first review of the genotype redesign, more to come. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1338 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/CoverageEvalWalker.java | 50 ++-- .../gatk/walkers/SingleSampleGenotyper.java | 64 +---- .../sting/playground/utils/AlleleMetrics.java | 22 +- .../playground/utils/GenotypeLikelihoods.java | 20 +- .../sting/utils/genotype/BasicGenotype.java | 158 ++++++++++++ .../sting/utils/genotype/Genotype.java | 126 +++------ .../sting/utils/genotype/GenotypeCall.java | 67 +++-- .../utils/genotype/GenotypeCallImpl.java | 225 ---------------- .../utils/genotype/GenotypeGenerator.java | 2 +- .../sting/utils/genotype/GenotypeLocus.java | 132 ---------- .../utils/genotype/GenotypeLocusImpl.java | 133 ---------- .../sting/utils/genotype/SSGGenotypeCall.java | 241 ++++++++++++++++++ .../confidence/BayesianConfidenceScore.java | 36 +++ .../{ => confidence}/ConfidenceScore.java | 44 ++-- .../utils/genotype/geli/GeliTextWriter.java | 43 ++-- .../sting/utils/genotype/glf/GLFWriter.java | 17 +- 16 files changed, 615 insertions(+), 765 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/GenotypeCallImpl.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocus.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusImpl.java create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/SSGGenotypeCall.java create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java rename java/src/org/broadinstitute/sting/utils/genotype/{ => confidence}/ConfidenceScore.java (55%) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java index 18e4ce1b6..3f82cc9b8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.ListUtils; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.GenotypeCall; +import org.broadinstitute.sting.utils.genotype.SSGGenotypeCall; import java.io.File; import java.io.FileNotFoundException; @@ -90,7 +91,7 @@ public class CoverageEvalWalker extends LocusWalker, String> { LocusContext subContext = new LocusContext(context.getLocation(), sub_reads, sub_offsets); GenotypeCall call = SSG.map(tracker, ref, subContext); - String callType = (call.isVariation()) ? ((call.getBestVrsRef().first.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; + String callType = (call.isVariation()) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; if (call != null) { GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call)); } @@ -116,37 +117,26 @@ public class CoverageEvalWalker extends LocusWalker, String> { // a method to support getting the geli string, since the AlleleFrequencyEstimate is going away public String toGeliString (GenotypeCall locus) { - if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)"); - - - // this is to perserve the format string that we used to use - double[] likelihoods = new double[10]; - int index = 0; - List lt = locus.getLexigraphicallySortedGenotypes(); - for (Genotype G: lt) { - likelihoods[index] = G.getLikelihood(); - index++; - } - + SSGGenotypeCall call = (SSGGenotypeCall)locus; return String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", - locus.getLocation().getContig(), - locus.getLocation().getStart(), - locus.getReferencebase(), - locus.getReadDepth(), + call.getLocation().getContig(), + call.getLocation().getStart(), + call.getReferencebase(), + call.getReadDepth(), -1, - locus.getGenotypes().get(0).getBases(), - locus.getBestVrsRef().second.getScore(), - locus.getBestVrsNext().second.getScore(), - likelihoods[0], - likelihoods[1], - likelihoods[2], - likelihoods[3], - likelihoods[4], - likelihoods[5], - likelihoods[6], - likelihoods[7], - likelihoods[8], - likelihoods[9]); + call.getBases(), + call.getBestRef(), + call.getBestNext(), + call.getLikelihoods()[0], + call.getLikelihoods()[1], + call.getLikelihoods()[2], + call.getLikelihoods()[3], + call.getLikelihoods()[4], + call.getLikelihoods()[5], + call.getLikelihoods()[6], + call.getLikelihoods()[7], + call.getLikelihoods()[8], + call.getLikelihoods()[9]); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java index e630815d7..cab357269 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SingleSampleGenotyper.java @@ -10,19 +10,16 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.playground.utils.AlleleMetrics; import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods; -import org.broadinstitute.sting.playground.utils.IndelLikelihood; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.BasicPileup; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.genotype.*; import java.io.File; -import java.util.List; @ReadFilters(ZeroMappingQualityReadFilter.class) -public class SingleSampleGenotyper extends LocusWalker { +public class SingleSampleGenotyper extends LocusWalker { // Control output settings @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE; @Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr"); @@ -126,22 +123,21 @@ public class SingleSampleGenotyper extends LocusWalker reads, List offsets) { - String[] indels = BasicPileup.indelPileup(reads, offsets); - IndelLikelihood indelCall = new IndelLikelihood(indels, 1e-4); - - if (!indelCall.getType().equals("ref")) { - System.out.printf("INDEL %s %s\n", context.getLocation(), indelCall); - } - - return indelCall; - } - /** * Determine whether we're at a Hapmap site * @@ -271,10 +228,9 @@ public class SingleSampleGenotyper extends LocusWalker LOD_THRESHOLD) || - (call.getBestVrsRef().second.getScore() > LOD_THRESHOLD)) + if (call.getConfidenceScore().getScore() > LOD_THRESHOLD) sum.addGenotypeCall(call); } return sum; diff --git a/java/src/org/broadinstitute/sting/playground/utils/AlleleMetrics.java b/java/src/org/broadinstitute/sting/playground/utils/AlleleMetrics.java index b769bda5c..b7af829a0 100755 --- a/java/src/org/broadinstitute/sting/playground/utils/AlleleMetrics.java +++ b/java/src/org/broadinstitute/sting/playground/utils/AlleleMetrics.java @@ -5,9 +5,10 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.rodGFF; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.genotype.ConfidenceScore; +import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.GenotypeCall; +import org.broadinstitute.sting.utils.genotype.SSGGenotypeCall; import java.io.File; import java.io.FileNotFoundException; @@ -56,8 +57,9 @@ public class AlleleMetrics { this.LOD_cutoff = lodThresold; } - public void nextPosition(GenotypeCall call, RefMetaDataTracker tracker) { - num_loci_total += 1; + public void nextPosition(GenotypeCall cl, RefMetaDataTracker tracker) { + SSGGenotypeCall call = (SSGGenotypeCall)cl; + num_loci_total += 1; boolean is_dbSNP_SNP = false; boolean has_hapmap_chip_genotype = false; @@ -80,10 +82,10 @@ public class AlleleMetrics { } } } - Pair result = call.getBestVrsRef(); - if (Math.abs(call.getBestVrsNext().second.getScore()) >= LOD_cutoff) { num_loci_confident += 1; } + double result = call.getBestRef(); + if (Math.abs(call.getBestNext()) >= LOD_cutoff) { num_loci_confident += 1; } - if (call.isVariation() && result.second.getScore() >= LOD_cutoff) + if (call.isVariation() && result >= LOD_cutoff) { // Confident variant. @@ -101,7 +103,7 @@ public class AlleleMetrics { String hapmap_genotype = hapmap_chip_genotype.getFeature(); long refs=0, alts=0; double hapmap_q; - String str = call.getBestVrsRef().first.getBases(); + String str = call.getBases(); char alt = str.charAt(0); if (str.charAt(0) == call.getReferencebase()) alt = str.charAt(1); for (char c : hapmap_genotype.toCharArray()) { @@ -121,8 +123,8 @@ public class AlleleMetrics { //out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt); //System.out.printf("DBG %f %s\n", LOD_cutoff, alleleFreq.asTabularString()); - if (call.getBestVrsNext().second.getScore() >= LOD_cutoff) { - + if (call.getBestNext() >= LOD_cutoff) { + // TODO : should this all be commented out? /* System.out.printf("DBG %f %f %f %f\n", hapmap_q, @@ -143,7 +145,7 @@ public class AlleleMetrics { }*/ } - if (result.second.getScore() >= LOD_cutoff || -1 * result.second.getScore() >= LOD_cutoff) { + if (result >= LOD_cutoff || -1 * result >= LOD_cutoff) { // Now calculate ref / var performance - did we correctly classify the site as // reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here diff --git a/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java index 52d158958..bb2ae19a8 100644 --- a/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/playground/utils/GenotypeLikelihoods.java @@ -4,10 +4,13 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; import static java.lang.Math.log10; import static java.lang.Math.pow; import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; public class GenotypeLikelihoods implements GenotypeGenerator { // precalculate these for performance (pow/log10 is expensive!) @@ -410,7 +413,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator { * @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values */ @Override - public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) { + public GenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) { //filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag @@ -437,14 +440,15 @@ public class GenotypeLikelihoods implements GenotypeGenerator { applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup()); // lets setup the locus - GenotypeLocus locus = new GenotypeLocusImpl(pileup.getLocation(), pileup.getReads().size(),Math.sqrt(squared/pileup.getReads().size())); + List lst = new ArrayList(); for (int x = 0; x < this.likelihoods.length; x++) { - try { - locus.addGenotype(new Genotype(this.genotypes[x],lklihoods[x],this.likelihoods[x])); - } catch (InvalidGenotypeException e) { - throw new StingException("Invalid Genotype value",e); - } + lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x]))); } - return locus; + return new SSGGenotypeCall(ref,2,pileup.getLocation(),lst,likelihoods,pileup); + } + //TODO: add this to the above code + boolean discovery = false; + public void setDiscovery(boolean isInDiscoveryMode) { + discovery = isInDiscoveryMode; } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java new file mode 100644 index 000000000..1623bf853 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java @@ -0,0 +1,158 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; + + +/** + * + * @author aaron + * + * Class BasicGenotype + * + * A basic implementation of the genotype interface + */ +public class BasicGenotype implements Genotype { + + // the bases that represent this genotype + private String mBases = ""; + + // the ploidy, assume 2 unless told otherwise + private int mPloidy = 2; + + // the confidence score + protected ConfidenceScore mConfidenceScore; + + // our location + private GenomeLoc mLoc; + + /** + * construct a genotypeLikelihood, given the bases, the confidence score, and the ploidy + * + * @param loc the location of the genotype + * @param bases the bases that make up this genotype + * @param ploidy the ploidy of this genotype + * @param score the confidence score + */ + public BasicGenotype(GenomeLoc loc, String bases, int ploidy, ConfidenceScore score) { + this.mPloidy = ploidy; + if (bases.length() != ploidy) { + throw new IllegalArgumentException("The number of bases should match the ploidy"); + } + this.mBases = bases; + this.mConfidenceScore = score; + this.mLoc = loc; + } + + /** + * construct a genotypeLikelihood, given the bases and the confidence score, and assume the + * ploidy is 2. + * + * @param loc the location of the genotype + * @param bases the bases that make up this genotype + * @param score the confidence score + */ + public BasicGenotype(GenomeLoc loc, String bases, ConfidenceScore score) { + if (bases.length() != mPloidy) { + throw new IllegalArgumentException("The number of bases should match the ploidy"); + } + this.mBases = bases; + this.mConfidenceScore = score; + this.mLoc = loc; + } + + /** + * get the confidence score + * @return get the confidence score that we're based on + */ + public ConfidenceScore getConfidenceScore() { + return this.mConfidenceScore; + } + + /** + * get the bases that represent this + * + * @return the bases, as a string + */ + public String getBases() { + return mBases; + } + + /** + * get the ploidy + * @return the ploidy value + */ + public int getPloidy() { + return mPloidy; + } + + /** + * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) + * + * @return true if we're homozygous, false otherwise + */ + public boolean isHom() { + if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); + char first = mBases.charAt(0); + for (char c : mBases.toCharArray()) { + if (c != first) return false; + } + return true; + } + + /** + * Returns true if observed alleles differ (regardless of whether they are ref or alt) + * + * @return true if we're het, false otherwise + */ + public boolean isHet() { + if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); + char first = mBases.charAt(0); + for (char c : mBases.toCharArray()) { + if (c != first) return true; + } + return false; + } + + /** + * get the genotype's location + * @return a GenomeLoc representing the location + */ + public GenomeLoc getLocation() { + return mLoc; + } + + /** + * returns true if the genotype is a point genotype, false if it's a indel / deletion + * + * @return true is a SNP + */ + @Override + public boolean isPointGenotype() { + return true; + } + + /** + * given the reference, are we a variant? (non-ref) + * + * @param ref the reference base + * + * @return true if we're a variant + */ + @Override + public boolean isVariant(char ref) { + String ret = Utils.dupString(ref,this.getPloidy()); + return !this.getBases().equals(ret); + } + + /** + * return this genotype as a variant + * + * @return + */ + @Override + public Variant toVariant() { + return null; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java index 4bc7ebe73..fd9141275 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -1,113 +1,71 @@ package org.broadinstitute.sting.utils.genotype; +import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; +import org.broadinstitute.sting.utils.GenomeLoc; /** * @author aaron *

* Class GenotypeLikelihood *

- * This class encompasses all the information that is associated with a genotype - * and it's likelihood, mainly: - *

- * Likelihood value + * This class emcompasses all the basic information about a genotype */ -public class Genotype { - private double mLikelihood = 0.0; - private double mPosteriorProb = 0.0; - private String mBases = ""; - private int mPloidy = 2; // assume diploid - +public interface Genotype { /** - * construct a genotypeLikelihood, given the bases, the posterior, and the likelihood - * - * @param bases the bases that make up this genotype - * @param posterior the posterior probability of this genotype - * @param likelihood the likelihood of this genotype - * @param ploidy the ploidy of this genotype + * get the confidence score + * @return get the confidence score that we're based on */ - public Genotype(String bases, double posterior, double likelihood, int ploidy) { - this.mPloidy = ploidy; - if (bases.length() != ploidy) { - throw new IllegalArgumentException("The number of bases should match the ploidy"); - } - this.mLikelihood = likelihood; - this.mBases = bases; - this.mPosteriorProb = posterior; - } - - /** - * construct a genotypeLikelihood, given the bases, the posterior, and the likelihood - * - * @param bases the bases that make up this genotype - * @param posterior the posterior probability of this genotype - * @param likelihood the likelihood of this genotype - */ - public Genotype(String bases, double posterior, double likelihood) { - if (bases.length() != mPloidy) { - throw new IllegalArgumentException("The number of bases should match the ploidy"); - } - this.mLikelihood = likelihood; - this.mBases = bases; - this.mPosteriorProb = posterior; - } - - /** - * get the likelihood value - * - * @return a double, representing the likelihood - */ - public double getLikelihood() { - return mLikelihood; - } - - /** - * get the posterior value - * - * @return a double, representing the posterior - */ - public double getPosteriorProb() { - return mPosteriorProb; - } + public ConfidenceScore getConfidenceScore(); /** * get the bases that represent this * - * @return + * @return the bases, as a string */ - public String getBases() { - return mBases; - } + public String getBases(); - public int getPloidy() { - return mPloidy; - } + /** + * get the ploidy + * @return the ploidy value + */ + public int getPloidy(); /** * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) * - * @return + * @return true if we're homozygous, false otherwise */ - public boolean isHom() { - if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); - char first = mBases.charAt(0); - for (char c: mBases.toCharArray()) { - if (c != first) return false; - } - return true; - } + public boolean isHom(); /** * Returns true if observed alleles differ (regardless of whether they are ref or alt) * + * @return true if we're het, false otherwise + */ + public boolean isHet(); + + /** + * get the genotype's location + * @return a GenomeLoc representing the location + */ + public GenomeLoc getLocation(); + + /** + * returns true if the genotype is a point genotype, false if it's a indel / deletion + * @return true is a SNP + */ + public boolean isPointGenotype(); + + /** + * given the reference, are we a variant? (non-ref) + * @param ref the reference base or bases + * @return true if we're a variant + */ + public boolean isVariant(char ref); + + /** + * return this genotype as a variant * @return */ - public boolean isHet() { - if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); - char first = mBases.charAt(0); - for (char c: mBases.toCharArray()) { - if (c != first) return true; - } - return false; - } - + public Variant toVariant(); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java index c98af4651..91c4d0ab5 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java @@ -1,6 +1,9 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.Comparator; +import java.util.List; /** * @author aaron @@ -10,14 +13,7 @@ import org.broadinstitute.sting.utils.Pair; * Genotype call interface, for indicating that a genotype is * also a genotype call. */ -public interface GenotypeCall extends GenotypeLocus{ - /** - * get the confidence - * - * @return a ConfidenceScore representing the LOD score that this genotype was called with - */ - public ConfidenceScore getConfidence(); - +public interface GenotypeCall extends Genotype { /** * gets the reference base * @@ -26,27 +22,48 @@ public interface GenotypeCall extends GenotypeLocus{ public char getReferencebase(); /** - * get the best vrs the next best genotype LOD score - * @return the genotype, and a LOD for best - next - */ - public Pair getBestVrsNext(); - - /** - * get the best vrs the reference allele. - * @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked - * with is variation - */ - public Pair getBestVrsRef(); - - /** - * check to see if this call is a variant, i.e. not homozygous reference + * check to see if this called genotype is a variant, i.e. not homozygous reference * @return true if it's not hom ref, false otherwise */ public boolean isVariation(); /** - * return genotype locus, with our data + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object */ - public GenotypeLocus toGenotypeLocus(); + public GenomeLoc getLocation(); + + /** + * get the genotypes, sorted in asscending order by their ConfidenceScores (the best + * to the worst ConfidenceScores) + * + * @return a list of the genotypes, sorted by ConfidenceScores + */ + public List getGenotypes(); + + /** + * get the genotypes sorted lexigraphically + * + * @return a list of the genotypes sorted lexi + */ + public List getLexigraphicallySortedGenotypes(); + } +class LexigraphicalComparator implements Comparator { + private final Double EPSILON = 1.0e-15; + + @Override + public int compare(Genotype genotype, Genotype genotype1) { + return genotype.getBases().compareTo(genotype1.getBases()); + } +} + +class ConfidenceScoreSort implements Comparator { + @Override + public int compare(Genotype genotype, Genotype genotype1) { + return genotype.getConfidenceScore().compareTo(genotype1.getConfidenceScore()); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCallImpl.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCallImpl.java deleted file mode 100644 index 4db9bcc3b..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCallImpl.java +++ /dev/null @@ -1,225 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.util.List; - - -/** - * @author aaron - *

- * Class GenotypeCallImpl - *

- * A descriptions should go here. Blame aaron if it's missing. - */ -public class GenotypeCallImpl implements GenotypeCall { - - // our stored genotype locus - private final GenotypeLocus mLocus; - private final char mRefBase; - private final ConfidenceScore mScore; - - /** - * generate a GenotypeCall object with the specified locus info and reference base - * - * @param mLocus the locus - * @param mRefBase the reference base to use - */ - public GenotypeCallImpl(GenotypeLocus mLocus, char mRefBase, ConfidenceScore mScore) { - if (mLocus.getGenotypes().size() < 1) throw new StingException("Genotype Locus is empty"); - this.mLocus = mLocus; - this.mRefBase = String.valueOf(mRefBase).toUpperCase().charAt(0); - this.mScore = mScore; - } - - /** - * get the confidence - * - * @return a ConfidenceScore representing the LOD score that this genotype was called with - */ - @Override - public ConfidenceScore getConfidence() { - return mScore; - } - - /** - * gets the reference base - * - * @return the reference base we represent - */ - @Override - public char getReferencebase() { - return mRefBase; - } - - /** - * get the best vrs the next best genotype LOD score - * - * @return the genotype, and a LOD for best - next - */ - @Override - public Pair getBestVrsNext() { - List genos = this.mLocus.getGenotypes(); - if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes"); - return new Pair(genos.get(0), - new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - genos.get(1).getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT)); - } - - /** - * get the best vrs the reference allele. - * - * @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked - * with is variation - */ - @Override - public Pair getBestVrsRef() { - List genos = this.mLocus.getGenotypes(); - - // find the reference allele - String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase(); - Genotype refGenotype = findRefGenotype(ref, genos); - if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes"); - return new Pair(genos.get(0), - new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - refGenotype.getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT)); - } - - /** - * get the reference genotype object - * - * @param ref the reference as a ploidy count homozygous string - * @param genos the genotype list - * - * @return a genotype for the - */ - private static Genotype findRefGenotype(String ref, List genos) { - Genotype refGenotype = null; - for (Genotype g : genos) { - if (g.getBases().equals(ref)) refGenotype = g; - } - if (refGenotype == null) { - for (Genotype g : genos) { - System.err.println(g.getBases()); - } - throw new StingException("Unable to find the reference genotype + " + ref + " size of genotype list = " + genos.size()); - } - return refGenotype; - } - - /** - * check to see if this call is a variant, i.e. not homozygous reference - * - * @return true if it's not hom ref, false otherwise - */ - @Override - public boolean isVariation() { - List genos = this.mLocus.getGenotypes(); - String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase(); - return !(genos.get(0).getBases().equals(ref)); - } - - /** return genotype locus, with our data */ - @Override - public GenotypeLocus toGenotypeLocus() { - return mLocus; - } - - /** - * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base - * is located right after the specified location - * - * @return position on the genome wrapped in GenomeLoc object - */ - @Override - public GenomeLoc getLocation() { - return mLocus.getLocation(); - } - - /** - * get the ploidy at this locus - * - * @return an integer representing the genotype ploidy at this location - */ - @Override - public int getPloidy() { - return mLocus.getPloidy(); - } - - /** - * get the genotypes, sorted in asscending order by their likelihoods (the best - * to the worst likelihoods) - * - * @return a list of the likelihoods - */ - @Override - public List getGenotypes() { - return mLocus.getGenotypes(); - } - - /** - * get the genotypes and their posteriors - * - * @return a list of the poseriors - */ - @Override - public List getPosteriors() { - return mLocus.getPosteriors(); - } - - /** - * get the genotypes sorted lexigraphically - * - * @return a list of the genotypes sorted lexi - */ - @Override - public List getLexigraphicallySortedGenotypes() { - return mLocus.getLexigraphicallySortedGenotypes(); - } - - /** - * get the read depth at this position - * - * @return the read depth, -1 if it is unknown - */ - @Override - public int getReadDepth() { - return mLocus.getReadDepth(); - } - - /** - * add a genotype to the collection - * - * @param genotype - * - * @throws InvalidGenotypeException - */ - @Override - public void addGenotype(Genotype genotype) throws InvalidGenotypeException { - mLocus.addGenotype(genotype); - } - - /** - * get the root mean square (RMS) of the mapping qualities - * - * @return the RMS, or a value < 0 if it's not available - */ - @Override - public double getRMSMappingQuals() { - return mLocus.getRMSMappingQuals(); - } - - /** - * create a variant, given the reference, and a lod score - * - * @param refBase the reference base - * @param score the threshold to use to determine if it's a variant or not - * - * @return a variant object, or null if no genotypes meet the criteria - */ - @Override - public Variant toGenotypeCall(char refBase, ConfidenceScore score) { - return null; //To change body of implemented methods use File | Settings | File Templates. - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeGenerator.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeGenerator.java index 3850f0634..d99a40a81 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeGenerator.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeGenerator.java @@ -22,5 +22,5 @@ public interface GenotypeGenerator { * @param pileup a pileup of the reads, containing the reads and their offsets * @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values */ - public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup); + public GenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocus.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocus.java deleted file mode 100644 index 082461287..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocus.java +++ /dev/null @@ -1,132 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.util.List; -import java.util.Comparator; - -/** - * @author aaron - *

- * Interface Genotype - *

- * This interface represents the collection of genotypes at a specific locus - */ -public interface GenotypeLocus { - - /** - * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base - * is located right after the specified location - * - * @return position on the genome wrapped in GenomeLoc object - */ - public GenomeLoc getLocation(); - - /** - * get the ploidy at this locus - * - * @return an integer representing the genotype ploidy at this location - */ - public int getPloidy(); - - /** - * get the genotypes, sorted in asscending order by their likelihoods (the best - * to the worst likelihoods) - * - * @return a list of the genotypes, sorted by likelihoods - */ - public List getGenotypes(); - - /** - * get the genotypes and their posteriors - * - * @return a list of the genotypes, sorted by poseriors - */ - public List getPosteriors(); - - /** - * get the genotypes sorted lexigraphically - * - * @return a list of the genotypes sorted lexi - */ - public List getLexigraphicallySortedGenotypes(); - - - /** - * get the read depth at this position - * - * @return the read depth, -1 if it is unknown - */ - public int getReadDepth(); - - /** - * add a genotype to the collection - * - * @param genotype - * - * @throws InvalidGenotypeException - */ - public void addGenotype(Genotype genotype) throws InvalidGenotypeException; - - /** - * get the root mean square (RMS) of the mapping qualities - * - * @return the RMS, or a value < 0 if it's not available - */ - public double getRMSMappingQuals(); - - /** - * create a variant, given the reference, and a lod score - * - * @param refBase the reference base - * @param score the threshold to use to determine if it's a variant or not - * - * @return a variant object, or null if no genotypes meet the criteria - */ - public Variant toGenotypeCall(char refBase, ConfidenceScore score); - -} - - -/** - * the following are helper Comparator classes for the above sort orders, that may be useful - * for anyone implementing the GenotypeLocus interface - */ -class PosteriorComparator implements Comparator { - private final Double EPSILON = 1.0e-15; - - @Override - public int compare(Genotype genotype, Genotype genotype1) { - double diff = genotype.getPosteriorProb() - genotype1.getPosteriorProb(); - if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getPosteriorProb()))) - return 0; - else if (diff < 0) - return 1; - else - return -1; // TODO: BACKWARD NOW - } -} - -class LexigraphicalComparator implements Comparator { - private final Double EPSILON = 1.0e-15; - - @Override - public int compare(Genotype genotype, Genotype genotype1) { - return genotype.getBases().compareTo(genotype1.getBases()); - } -} - -class LikelihoodComparator implements Comparator { - private final Double EPSILON = 1.0e-15; - - @Override - public int compare(Genotype genotype, Genotype genotype1) { - double diff = genotype.getLikelihood() - genotype1.getLikelihood(); - if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getLikelihood()))) - return 0; - else if (diff < 0) - return 1; // TODO: BACKWARD NOW - else - return -1; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusImpl.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusImpl.java deleted file mode 100644 index eab555781..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusImpl.java +++ /dev/null @@ -1,133 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - - -/** - * @author aaron - *

- * Class GenotypeBucket - *

- * A descriptions should go here. Blame aaron if it's missing. - */ -public class GenotypeLocusImpl implements GenotypeLocus { - - private final List mGenotypes = new ArrayList(); - private GenomeLoc mLocation = null; - private int mReadDepth = -1; - private double mRMSMappingQual = -1; - - public GenotypeLocusImpl(GenomeLoc location, int readDepth, double rmsMappingQual) { - this.mLocation = location; - mReadDepth = readDepth; - mRMSMappingQual = rmsMappingQual; - } - - /** - * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base - * is located right after the specified location - * - * @return position on the genome wrapped in GenomeLoc object - */ - @Override - public GenomeLoc getLocation() { - return mLocation; - } - - /** - * get the ploidy at this locus - * - * @return an integer representing the genotype ploidy at this location - */ - @Override - public int getPloidy() { - return 2; - } - - /** - * get the genotypes, sorted in asscending order by their likelihoods (the best - * to the worst likelihoods) - * - * @return a list of the likelihoods - */ - @Override - public List getGenotypes() { - Collections.sort(this.mGenotypes, new LikelihoodComparator()); - return this.mGenotypes; - } - - - - /** - * get the genotypes and their posteriors - * - * @return a list of the poseriors - */ - @Override - public List getPosteriors() { - Collections.sort(this.mGenotypes, new PosteriorComparator()); - return this.mGenotypes; - } - - /** - * get the genotypes sorted lexigraphically - * - * @return a list of the genotypes sorted lexi - */ - @Override - public List getLexigraphicallySortedGenotypes() { - Collections.sort(this.mGenotypes, new LexigraphicalComparator()); - return this.mGenotypes; - } - - /** - * get the read depth at this position - * - * @return the read depth, -1 if it is unknown - */ - @Override - public int getReadDepth() { - return mReadDepth; - } - - - - /** - * add a genotype to the collection - * - * @param genotype - * - * @throws InvalidGenotypeException - */ - @Override - public void addGenotype(Genotype genotype) throws InvalidGenotypeException { - this.mGenotypes.add(genotype); - } - - /** - * get the root mean square (RMS) of the mapping qualities - * - * @return the RMS, or a value < 0 if it's not available - */ - @Override - public double getRMSMappingQuals() { - return mRMSMappingQual; - } - - /** - * create a variant, given the reference, and a lod score - * - * @param refBase the reference base - * @param score the threshold to use to determine if it's a variant or not - * - * @return a variant object, or null if no genotypes meet the criteria - */ - @Override - public Variant toGenotypeCall(char refBase, ConfidenceScore score) { - return null; //To change body of implemented methods use File | Settings | File Templates. - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/SSGGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/SSGGenotypeCall.java new file mode 100644 index 000000000..46cbe9075 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/SSGGenotypeCall.java @@ -0,0 +1,241 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; +import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; + +import java.util.*; + +import net.sf.samtools.SAMRecord; + + +/** + * @author aaron + *

+ * Class GenotypeCallImpl + *

+ * The single sample genotypers implementation of the genotype call, plus + * some extra booty for the genotype writers of this world + */ +public class SSGGenotypeCall implements GenotypeCall { + // TODO: make SSG into a more robust Genotype call interface + // our stored genotype locus + private final char mRefBase; + private final int mPloidy; + private final GenomeLoc mLoc; + private TreeMap mGenotypes = new TreeMap(); + private double mLikelihoods[]; + private double bestNext = 0; + private double bestRef = 0; + + private int readDepth; + private double rmsMapping; + + public SSGGenotypeCall(char mRefBase, int mPloidy, GenomeLoc mLoc, List genotypes, double likelihoods[], ReadBackedPileup pileup) { + this.mRefBase = mRefBase; + this.mPloidy = mPloidy; + this.mLoc = mLoc; + if (genotypes.size() < 1) throw new IllegalArgumentException("Genotypes list size must be greater than 0"); + int index = 0; + String refStr = Utils.dupString(mRefBase, mPloidy).toUpperCase(); + double ref = 0.0; + double best = Double.NEGATIVE_INFINITY; // plus one + double next = Double.NEGATIVE_INFINITY; + for (Genotype g : genotypes) { + if (g.getBases().toUpperCase().equals(refStr)) ref = likelihoods[index]; + if (likelihoods[index] > best) { + next = best; + best = likelihoods[index]; + } else if (likelihoods[index] > next) next = likelihoods[index]; + index++; + } + bestNext = Math.abs(best - next); + bestRef = Math.abs(best - ref); + mLikelihoods = likelihoods; + index = 0; + for (Genotype g : genotypes) { + ((BasicGenotype)g).mConfidenceScore = new BayesianConfidenceScore(Math.abs(likelihoods[index] - ref)); + mGenotypes.put(likelihoods[index],g); + index++; + } + + this.readDepth = pileup.getReads().size(); + rmsMapping = Math.sqrt(calculateRMS(pileup) / readDepth); + } + + private double calculateRMS(ReadBackedPileup pileup) { + double rms = 0.0; + for (SAMRecord r : pileup.getReads()) { + rms += r.getMappingQuality() * r.getMappingQuality(); + } + return rms; + } + + /** + * gets the reference base + * + * @return the reference base we represent + */ + @Override + public char getReferencebase() { + return mRefBase; + } + + /** + * check to see if this called genotype is a variant, i.e. not homozygous reference + * + * @return true if it's not hom ref, false otherwise + */ + @Override + public boolean isVariation() { + return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(mRefBase); + } + + /** + * get the confidence score + * + * @return get the confidence score that we're based on + */ + @Override + public ConfidenceScore getConfidenceScore() { + return mGenotypes.get(mGenotypes.descendingKeySet().first()).getConfidenceScore(); + } + + /** + * get the bases that represent this + * + * @return the bases, as a string + */ + @Override + public String getBases() { + return mGenotypes.get(mGenotypes.descendingKeySet().first()).getBases(); + } + + /** + * get the ploidy + * + * @return the ploidy value + */ + @Override + public int getPloidy() { + return this.mPloidy; + } + + /** + * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) + * + * @return true if we're homozygous, false otherwise + */ + @Override + public boolean isHom() { + return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHom(); + } + + /** + * Returns true if observed alleles differ (regardless of whether they are ref or alt) + * + * @return true if we're het, false otherwise + */ + @Override + public boolean isHet() { + return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHet(); + } + + /** + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object + */ + @Override + public GenomeLoc getLocation() { + return mGenotypes.get(mGenotypes.descendingKeySet().first()).getLocation(); + } + + /** + * returns true if the genotype is a point genotype, false if it's a indel / deletion + * + * @return true is a SNP + */ + @Override + public boolean isPointGenotype() { + return true; + } + + /** + * given the reference, are we a variant? (non-ref) + * + * @param ref the reference base or bases + * @return true if we're a variant + */ + @Override + public boolean isVariant(char ref) { + return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(ref); + } + + /** + * return this genotype as a variant + * + * @return + */ + @Override + public Variant toVariant() { + return null; //To change body of implemented methods use File | Settings | File Templates. + } + + /** + * get the genotypes, sorted in asscending order by their ConfidenceScores (the best + * to the worst ConfidenceScores) + * + * @return a list of the genotypes, sorted by ConfidenceScores + */ + @Override + public List getGenotypes() { + List newList = new ArrayList(); + newList.addAll(this.mGenotypes.values()); + return newList; + } + + /** + * get the genotypes sorted lexigraphically + * + * @return a list of the genotypes sorted lexi + */ + @Override + public List getLexigraphicallySortedGenotypes() { + List newList = new ArrayList(); + newList.addAll(this.mGenotypes.values()); + Collections.sort(newList, new LexigraphicalComparator()); + return newList; + } + + /** + * return the likelihoods as a double array, in lexographic order + * + * @return the likelihoods + */ + public double[] getLikelihoods() { + return this.mLikelihoods; + } + + public int getReadDepth() { + return readDepth; + } + + public double getRmsMapping() { + return rmsMapping; + } + + public double getBestNext() { + return bestNext; + } + + public double getBestRef() { + return bestRef; + } +} + + + \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java b/java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java new file mode 100644 index 000000000..4930224ad --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.utils.genotype.confidence; + + +/** + * + * @author aaron + * + * Class LikelihoodConfidenceScore + * + * A descriptions should go here. Blame aaron if it's missing. + */ +public class BayesianConfidenceScore extends ConfidenceScore { + public BayesianConfidenceScore(double score) { + super(score); + } + + /** + * return the confidence method we're employing, UNKNOWN is an option + * + * @return the method of confidence we represent + */ + @Override + public SCORE_METHOD getConfidenceType() { + return SCORE_METHOD.LOD_SCORE; + } + + /** + * get the confidence score, normalized to the range of [0-1] + * + * @return a confidence score + */ + @Override + public double normalizedConfidenceScore() { + return Math.pow(10,this.getScore()); + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceScore.java b/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java similarity index 55% rename from java/src/org/broadinstitute/sting/utils/genotype/ConfidenceScore.java rename to java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java index 33f1ee95b..f3fe2f199 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceScore.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java @@ -1,5 +1,4 @@ -package org.broadinstitute.sting.utils.genotype; - +package org.broadinstitute.sting.utils.genotype.confidence; /** * @author aaron @@ -8,32 +7,19 @@ package org.broadinstitute.sting.utils.genotype; *

* this class represents the confidence in a genotype, and the method we used to obtain it */ -public class ConfidenceScore implements Comparable { +public abstract class ConfidenceScore implements Comparable { + // the general error of a floating point value private static final Double EPSILON = 1.0e-15; public enum SCORE_METHOD { - BEST_NEXT, BEST_REF, OTHER; + LOD_SCORE, UNKNOWN; } private Double mScore; - private SCORE_METHOD mMethod; - public ConfidenceScore(double score, SCORE_METHOD method) { + public ConfidenceScore(double score) { this.mScore = score; - this.mMethod = method; - } - - /** - * generate a confidence score, given the two likelihoods, and the method used - * - * @param likelihoodOne the first likelihood - * @param likelihoodTwo the second likelihood - * @param method the method used to determine the likelihood - */ - public ConfidenceScore(double likelihoodOne, double likelihoodTwo, SCORE_METHOD method) { - this.mScore = likelihoodOne / likelihoodTwo; - this.mMethod = method; } /** @@ -43,7 +29,7 @@ public class ConfidenceScore implements Comparable { */ @Override public int compareTo(ConfidenceScore o) { - if (o.mMethod != this.mMethod) { + if (o.getConfidenceType() != this.getConfidenceType()) { throw new UnsupportedOperationException("Attemped to compare Confidence scores with different methods"); } double diff = mScore - o.mScore; @@ -55,11 +41,23 @@ public class ConfidenceScore implements Comparable { return 1; } + /** + * get the score + * @return a double representing the genotype score + */ public Double getScore() { return mScore; } - public SCORE_METHOD getMethod() { - return mMethod; - } + /** + * return the confidence method we're employing, UNKNOWN is an option + * @return the method of confidence we represent + */ + public abstract SCORE_METHOD getConfidenceType(); + + /** + * get the confidence score, normalized to the range of [0-1] + * @return a confidence score + */ + public abstract double normalizedConfidenceScore(); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index 643607594..a146a9148 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -1,14 +1,13 @@ package org.broadinstitute.sting.utils.genotype.geli; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.GenotypeCall; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.SSGGenotypeCall; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintWriter; -import java.util.List; /** @@ -42,37 +41,27 @@ public class GeliTextWriter implements GenotypeWriter { * @param locus the locus to add */ public void addGenotypeCall(GenotypeCall locus) { - if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)"); - - - // this is to perserve the format string that we used to use - double[] likelihoods = new double[10]; - int index = 0; - List lt = locus.getLexigraphicallySortedGenotypes(); - for (Genotype G: lt) { - likelihoods[index] = G.getLikelihood(); - index++; - } + SSGGenotypeCall call = (SSGGenotypeCall)locus; mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", locus.getLocation().getContig(), locus.getLocation().getStart(), locus.getReferencebase(), - locus.getReadDepth(), + call.getReadDepth(), -1, - locus.getGenotypes().get(0).getBases(), - locus.getBestVrsRef().second.getScore(), - locus.getBestVrsNext().second.getScore(), - likelihoods[0], - likelihoods[1], - likelihoods[2], - likelihoods[3], - likelihoods[4], - likelihoods[5], - likelihoods[6], - likelihoods[7], - likelihoods[8], - likelihoods[9])); + locus.getBases(), + call.getConfidenceScore().getScore(), + locus.getConfidenceScore().getScore(), + call.getLikelihoods()[0], + call.getLikelihoods()[1], + call.getLikelihoods()[2], + call.getLikelihoods()[3], + call.getLikelihoods()[4], + call.getLikelihoods()[5], + call.getLikelihoods()[6], + call.getLikelihoods()[7], + call.getLikelihoods()[8], + call.getLikelihoods()[9])); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index e58a75f79..96acb6569 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -8,7 +8,6 @@ import org.broadinstitute.sting.utils.genotype.*; import java.io.DataOutputStream; import java.io.File; -import java.util.List; /* * Copyright (c) 2009 The Broad Institute * @@ -106,18 +105,10 @@ public class GLFWriter implements GenotypeWriter { */ @Override public void addGenotypeCall(GenotypeCall locus) { - //TODO: CODEWORD - // this is to perserve the format string that we used to use - double[] posteriors = new double[10]; - int index = 0; - List lt = locus.getLexigraphicallySortedGenotypes(); - for (Genotype G: lt) { - posteriors[index] = G.getLikelihood(); - index++; - } - - LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG); - this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRMSMappingQuals(),locus.getReferencebase(),locus.getReadDepth(),obj); + SSGGenotypeCall call = (SSGGenotypeCall)locus; + LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); + // TODO: fix me aaron + this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)0.0,locus.getReferencebase(),0,obj); } /**