diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OldAndBustedGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OldAndBustedGenotypeLikelihoods.java deleted file mode 100755 index 8afba67de..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OldAndBustedGenotypeLikelihoods.java +++ /dev/null @@ -1,359 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.BasicGenotype; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; - -import static java.lang.Math.log10; -import static java.lang.Math.pow; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -public class OldAndBustedGenotypeLikelihoods { - protected static final double[] oneMinusData = new double[Byte.MAX_VALUE]; - protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE]; - protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE]; - //protected static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE]; - protected static final double log10Of1_3 = log10(1.0 / 3.0); - private boolean filterQ0Bases = true; - - static { - for (int qual = 0; qual < Byte.MAX_VALUE; qual++) { - double e = pow(10, (qual / -10.0)); - oneMinusData[qual] = log10(1.0 - e); - oneHalfMinusDataArachne[qual] = log10(0.5 - e / 2.0); - oneHalfMinusData3Base[qual] = log10(0.5 - e / 2.0 + e / 6.0); - } - } - - private static double getOneMinusQual(final byte qual) { - return oneMinusData[qual]; - } - - private double getOneHalfMinusQual(final byte qual) { - return oneHalfMinusData[qual]; - } - - //public double[] likelihoods; - public int coverage; - - // The genotype priors; - private double priorHomRef; - private double priorHet; - private double priorHomVar; - private double[] oneHalfMinusData; - public double[] likelihoods; - - public boolean isThreeStateErrors() { - return threeStateErrors; - } - - public void setThreeStateErrors(boolean threeStateErrors) { - this.threeStateErrors = threeStateErrors; - this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne; - } - - private boolean threeStateErrors = false; - private boolean discoveryMode = false; - - public static double[] computePriors(double h) { - double[] pdbls = new double[3]; - pdbls[0] = 1.0 - (3.0 * h / 2.0); - pdbls[1] = h; - pdbls[2] = h / 2.0; - return pdbls; - } - - public final static String[] genotypes = new String[10]; - static { - genotypes[0] = "AA"; - genotypes[1] = "AC"; - genotypes[2] = "AG"; - genotypes[3] = "AT"; - genotypes[4] = "CC"; - genotypes[5] = "CG"; - genotypes[6] = "CT"; - genotypes[7] = "GG"; - genotypes[8] = "GT"; - genotypes[9] = "TT"; - } - - /** - * set the mode to discovery - * @param isInDiscoveryMode - */ - public void setDiscovery(boolean isInDiscoveryMode) { - discoveryMode = isInDiscoveryMode; - } - // Store the 2nd-best base priors for on-genotype primary bases - private HashMap onNextBestBasePriors = new HashMap(); - - // Store the 2nd-best base priors for off-genotype primary bases - private HashMap offNextBestBasePriors = new HashMap(); - - public OldAndBustedGenotypeLikelihoods(double heterozygosity) { - double[] vals = computePriors(heterozygosity); - initialize(vals[0], vals[1], vals[2]); - } - - public OldAndBustedGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) { - initialize(priorHomRef, priorHet, priorHomVar); - } - - /** - * Are we ignoring Q0 bases during calculations? - * @return - */ - public boolean isFilteringQ0Bases() { - return filterQ0Bases; - } - - /** - * Enable / disable filtering of Q0 bases. Enabled by default - * - * @param filterQ0Bases - */ - public void filterQ0Bases(boolean filterQ0Bases) { - this.filterQ0Bases = filterQ0Bases; - } - - private void initialize(double priorHomRef, double priorHet, double priorHomVar) { - this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne; - - this.priorHomRef = priorHomRef; - this.priorHet = priorHet; - this.priorHomVar = priorHomVar; - - likelihoods = new double[10]; - - coverage = 0; - - for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); } - } - - public double getHomRefPrior() { - return priorHomRef; - } - - public void setHomRefPrior(double priorHomRef) { - this.priorHomRef = priorHomRef; - } - - public double getHetPrior() { - return priorHet; - } - - public void setHetPrior(double priorHet) { - this.priorHet = priorHet; - } - - public double getHomVarPrior() { - return priorHomVar; - } - - public void setHomVarPrior(double priorHomVar) { - this.priorHomVar = priorHomVar; - } - - public int add(char ref, char read, byte qual) - { - if (qual <= 0) { - if ( isFilteringQ0Bases() ) { - return 0; - } else { - qual = 1; - } - } - - if (coverage == 0) - { - for (int i = 0; i < likelihoods.length; i++) - { - likelihoods[i] = 0; - } - } - //System.out.printf("%s %s%n", OldAndBustedGenotypeLikelihoods.class, this.toString('A')); - - for (int i = 0; i < genotypes.length; i++) - { - double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual); - System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]); - likelihoods[i] += likelihood; - coverage += 1; - } - - //System.out.printf("%s %s%n", OldAndBustedGenotypeLikelihoods.class, this.toString('A')); - - return 1; - } - - private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) { - if (qual == 0) { - // zero quals are wrong - throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual)); - } - - char h1 = genotype.charAt(0); - char h2 = genotype.charAt(1); - - double p_base; - - if ((h1 == h2) && (h1 == read)) { - // hom - p_base = getOneMinusQual(qual); - } else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) { - // het - p_base = getOneHalfMinusQual(qual); - } else if ( threeStateErrors ) { - // error - //System.out.printf("%s %c %c %b %f %f%n", genotype, h1, h2, h1 != h2, log10Of2_3, log10Of1_3 ); - p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 ); - } else { - // error - p_base = qual / -10.0; - } - - return p_base; - } - - public String[] sorted_genotypes; - public double[] sorted_likelihoods; - - public void sort() { - Integer[] permutation = Utils.SortPermutation(likelihoods); - - Integer[] reverse_permutation = new Integer[permutation.length]; - for (int i = 0; i < reverse_permutation.length; i++) { - reverse_permutation[i] = permutation[(permutation.length - 1) - i]; - } - - sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation); - sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation); - } - - public String toString(char ref) { - this.sort(); - double sum = 0; - String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref)); - for (int i = 0; i < sorted_genotypes.length; i++) { - if (i != 0) { - s = s + " "; - } - s = s + sorted_genotypes[i] + ":" + String.format("%.10f", sorted_likelihoods[i]); - sum += Math.pow(10,sorted_likelihoods[i]); - } - s = s + String.format(" %f", sum); - return s; - } - - public void applyPrior(char ref, double[] allele_likelihoods) { - int k = 0; - for (int i = 0; i < 4; i++) { - for (int j = i; j < 4; j++) { - if (i == j) { - this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]); - } else { - this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2); - } - k++; - } - } - this.sort(); - } - - public void applyPrior(char ref) { - for (int i = 0; i < genotypes.length; i++) { - if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { - // hom-ref - likelihoods[i] += Math.log10(priorHomRef); - } else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) { - // hom-nonref - likelihoods[i] += Math.log10(priorHomVar); - } else { - // het - likelihoods[i] += Math.log10(priorHet); - } - if (Double.isInfinite(likelihoods[i])) { - likelihoods[i] = -1000; - } - } - this.sort(); - } - - public double LodVsNextBest() { - this.sort(); - return sorted_likelihoods[0] - sorted_likelihoods[1]; - } - - double ref_likelihood = Double.NaN; - - public double LodVsRef(char ref) { - if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) { - ref_likelihood = sorted_likelihoods[0]; - return (-1.0 * this.LodVsNextBest()); - } else { - for (int i = 0; i < genotypes.length; i++) { - if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { - ref_likelihood = likelihoods[i]; - } - } - } - return sorted_likelihoods[0] - ref_likelihood; - } - - public String BestGenotype() { - this.sort(); - return this.sorted_genotypes[0]; - } - - public double BestPosterior() { - this.sort(); - return this.sorted_likelihoods[0]; - } - - public double RefPosterior(char ref) { - this.LodVsRef(ref); - return this.ref_likelihood; - } - - /** - * given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs - * - * @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information - * @param ref the reference base - * @param pileup a pileup of the reads, containing the reads and their offsets - * - * @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values - */ - public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) { - //filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag - - // for calculating the rms of the mapping qualities - double squared = 0.0; - for (int i = 0; i < pileup.getReads().size(); i++) { - SAMRecord read = pileup.getReads().get(i); - squared += read.getMappingQuality() * read.getMappingQuality(); - int offset = pileup.getOffsets().get(i); - char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - add(ref, base, qual); - } - // save off the likelihoods - if (likelihoods == null || likelihoods.length == 0) return null; - - // Apply the two calculations - applyPrior(ref); - - // lets setup the locus - List lst = new ArrayList(); - for (int x = 0; x < this.likelihoods.length; x++) { - lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x]))); - } - return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGGenotypeCall.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGGenotypeCall.java deleted file mode 100644 index c71c66385..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGGenotypeCall.java +++ /dev/null @@ -1,302 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; -import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; -import org.broadinstitute.sting.utils.genotype.*; - -import java.util.*; - - -/** - * @author aaron - *

- * Class GenotypeCallImpl - *

- * The single sample genotypers implementation of the genotype call, which contains - * extra information for the various genotype outputs - */ -public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput { - // our stored genotype locus - private final String mRefBase; - private final int mPloidy; - private TreeMap mGenotypes = new TreeMap(); - private double mLikelihoods[]; - private double bestNext = 0; - private double bestRef = 0; - - private int readDepth = 0; - private double rmsMapping = 0; - - /** - * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object - * - * @param discoveryMode are we in discovery mode or genotyping mode - * @param mRefBase the ref base - * @param mPloidy the ploidy - * @param genotypes the genotype - * @param likelihoods the likelihood - * @param pileup the pile-up of reads at the specified locus - */ - public SSGGenotypeCall(boolean discoveryMode, char mRefBase, int mPloidy, List genotypes, double likelihoods[], ReadBackedPileup pileup) { - this.mRefBase = String.valueOf(mRefBase).toUpperCase(); - this.mPloidy = mPloidy; - if (genotypes.size() < 1) throw new IllegalArgumentException("Genotypes list size must be greater than 0"); - String refStr = Utils.dupString(mRefBase, mPloidy).toUpperCase(); - calculateBestNext(discoveryMode, genotypes, likelihoods, refStr); - - this.readDepth = pileup.getReads().size(); - rmsMapping = calculateRMS(pileup); - } - - - /** - * calculate the best and next best, and update the stored confidence scores based on these values - * - * @param discoveryMode are we in discovery mode? - * @param genotypes the genotypes - * @param likelihoods the likelihoods corresponding to the genotypes - * @param refStr the reference string, i.e. reb base[ploidy] - */ - private void calculateBestNext(boolean discoveryMode, List genotypes, double[] likelihoods, String refStr) { - int index = 0; - double ref = 0.0; - double best = Double.NEGATIVE_INFINITY; - double next = Double.NEGATIVE_INFINITY; - for (Genotype g : genotypes) { - if (g.getBases().toUpperCase().equals(refStr)) ref = likelihoods[index]; - if (likelihoods[index] > best) { - next = best; - best = likelihoods[index]; - } else if (likelihoods[index] > next) next = likelihoods[index]; - index++; - } - bestNext = Math.abs(best - next); - bestRef = Math.abs(best - ref); - mLikelihoods = likelihoods; - index = 0; - - // reset the confidence based on either the discovery mode or the genotype mode - for (Genotype g : genotypes) { - double val = (discoveryMode) ? Math.abs(likelihoods[index] - ref) : Math.abs(likelihoods[index] - next); - ((BasicGenotype) g).setConfidenceScore(new BayesianConfidenceScore(val)); - mGenotypes.put(likelihoods[index], g); - index++; - } - } - - @Override - public boolean equals(Object other) { - if(other == null) - return false; - if(other instanceof SSGGenotypeCall) { - SSGGenotypeCall otherCall = (SSGGenotypeCall)other; - - boolean likelihoodsMatch = true; - for ( int i = 0; i < mLikelihoods.length; i++ ) { - likelihoodsMatch = likelihoodsMatch && MathUtils.compareDoubles(mLikelihoods[i], otherCall.mLikelihoods[i]) == 0; - } - //System.out.printf("likelihoodsMatch = %b%n", likelihoodsMatch); - - return this.mRefBase.equals(otherCall.mRefBase) && - this.mPloidy == otherCall.mPloidy && - MathUtils.compareDoubles(this.bestNext, otherCall.bestNext) == 0 && - MathUtils.compareDoubles(this.bestRef, otherCall.bestRef) == 0 && - this.readDepth == otherCall.readDepth && - MathUtils.compareDoubles(this.rmsMapping, otherCall.rmsMapping) == 0 && - likelihoodsMatch; - } - return false; - } - - public String toString() { - return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s", - getLocation(), mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods)); - } - - /** - * calculate the rms , given the read pileup - * - * @param pileup - * - * @return - */ - private double calculateRMS(ReadBackedPileup pileup) { - List reads = pileup.getReads(); - int[] qualities = new int[reads.size()]; - for (int i=0; i < reads.size(); i++) - qualities[i] = reads.get(i).getMappingQuality(); - return MathUtils.rms(qualities); - } - - /** - * gets the reference base - * - * @return the reference base we represent - */ - @Override - public char getReferencebase() { - return mRefBase.charAt(0); - } - - /** - * check to see if this called genotype is a variant, i.e. not homozygous reference - * - * @return true if it's not hom ref, false otherwise - */ - @Override - public boolean isVariation() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(getReferencebase()); - } - - /** - * get the confidence score - * - * @return get the confidence score that we're based on - */ - @Override - public ConfidenceScore getConfidenceScore() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).getConfidenceScore(); - } - - /** - * get the bases that represent this - * - * @return the bases, as a string - */ - @Override - public String getBases() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).getBases(); - } - - /** - * get the ploidy - * - * @return the ploidy value - */ - @Override - public int getPloidy() { - return this.mPloidy; - } - - /** - * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) - * - * @return true if we're homozygous, false otherwise - */ - @Override - public boolean isHom() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHom(); - } - - /** - * Returns true if observed alleles differ (regardless of whether they are ref or alt) - * - * @return true if we're het, false otherwise - */ - @Override - public boolean isHet() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHet(); - } - - /** - * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base - * is located right after the specified location - * - * @return position on the genome wrapped in GenomeLoc object - */ - @Override - public GenomeLoc getLocation() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).getLocation(); - } - - /** - * returns true if the genotype is a point genotype, false if it's a indel / deletion - * - * @return true is a SNP - */ - @Override - public boolean isPointGenotype() { - return true; - } - - /** - * given the reference, are we a variant? (non-ref) - * - * @param ref the reference base or bases - * - * @return true if we're a variant - */ - @Override - public boolean isVariant(char ref) { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(ref); - } - - /** - * return this genotype as a variant - * - * @return - */ - @Override - public Variant toVariant() { - return null; //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * get the genotypes, sorted in asscending order by their ConfidenceScores (the best - * to the worst ConfidenceScores) - * - * @return a list of the genotypes, sorted by ConfidenceScores - */ - @Override - public List getGenotypes() { - List newList = new ArrayList(); - newList.addAll(this.mGenotypes.values()); - return newList; - } - - /** - * get the genotypes sorted lexigraphically - * - * @return a list of the genotypes sorted lexi - */ - @Override - public List getLexigraphicallySortedGenotypes() { - List newList = new ArrayList(); - newList.addAll(this.mGenotypes.values()); - Collections.sort(newList, new LexigraphicalComparator()); - return newList; - } - - /** - * return the likelihoods as a double array, in lexographic order - * - * @return the likelihoods - */ - public double[] getLikelihoods() { - return this.mLikelihoods; - } - - public int getReadDepth() { - return readDepth; - } - - public double getRmsMapping() { - return rmsMapping; - } - - public double getBestNext() { - return bestNext; - } - - public double getBestRef() { - return bestRef; - } -} - - - \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java new file mode 100644 index 000000000..8aa33f7d4 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java @@ -0,0 +1,279 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.*; + +import java.util.Arrays; +import java.util.List; + + +/** + * @author aaron + *

+ * Class SSGenotypeCall + *

+ * The single sample implementation of the genotype interface, which contains + * extra information for the various genotype outputs + */ +public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked { + private final char mRefBase; + private final GenotypeLikelihoods mGenotypeLikelihoods; + + // the next two values are filled in on-demand. Default to -1 since they can never be negitive + private final GenomeLoc mLocation; + private final ReadBackedPileup mPileup; + + // if this is null, we were constructed with the intention that we'd represent the best genotype + private DiploidGenotype mGenotype = null; + + // which genotype to compare to; if we're in discovery mode it's the ref allele, otherwise it's the next best + private DiploidGenotype mCompareTo = null; + + // are we best vrs ref or best vrs next - for internal consumption only + private final boolean mBestVrsRef; + + /** + * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object + * + * @param discovery are we representing the best vrs next or best vrs ref + * @param location the location we're working with + * @param refBase the ref base + * @param gtlh the genotype likelihoods object + * @param pileup the pile-up of reads at the specified locus + */ + public SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) { + mBestVrsRef = discovery; + mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case + mGenotypeLikelihoods = gtlh; + mLocation = location; + mPileup = pileup; + } + + /** + * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object + * + * @param discovery are we representing the best vrs next or best vrs ref + * @param location the location we're working with + * @param refBase the ref base + * @param gtlh the genotype likelihoods object + * @param pileup the pile-up of reads at the specified locus + */ + SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) { + mBestVrsRef = discovery; + mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case + mGenotypeLikelihoods = gtlh; + mLocation = location; + mGenotype = genotype; + mPileup = pileup; + } + + @Override + public boolean equals(Object other) { + if (other == null) + return false; + if (other instanceof SSGenotypeCall) { + SSGenotypeCall otherCall = (SSGenotypeCall) other; + + if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) { + return false; + } + if (!this.mGenotype.equals(otherCall.mGenotype)) + return false; + return (this.mRefBase == otherCall.mRefBase) && + this.mPileup.equals(mPileup); + } + return false; + } + + public String toString() { + return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f likelihoods=%s", + getLocation(), mRefBase, mPileup.getReads().size(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods())); + } + + + + + /** + * get the confidence we have + * + * @return get the one minus error value + */ + @Override + public double getLog10PError() { + getBestGenotype(); + getAltGenotype(); + return Math.abs(mGenotypeLikelihoods.getPosterior(mGenotype) - mGenotypeLikelihoods.getPosterior(mCompareTo)); + } + + /** + * get the best genotype + */ + public DiploidGenotype getBestGenotype() { + if (mGenotype == null) { + Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors()); + mGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + } + return mGenotype; + } + + /** + * get the alternate genotype + */ + public DiploidGenotype getAltGenotype() { + if (mCompareTo == null) { + if (this.mBestVrsRef) { + mCompareTo = DiploidGenotype.valueOf(Utils.dupString(this.getReference(),2)); + } else { + Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors()); + mCompareTo = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + } + } + return mCompareTo; + } + + /** + * get the bases that represent this + * + * @return the bases, as a string + */ + @Override + public String getBases() { + return getBestGenotype().toString(); + } + + /** + * get the ploidy + * + * @return the ploidy value + */ + @Override + public int getPloidy() { + return 2; + } + + /** + * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) + * + * @return true if we're homozygous, false otherwise + */ + @Override + public boolean isHom() { + return getBestGenotype().isHom(); + } + + /** + * Returns true if observed alleles differ (regardless of whether they are ref or alt) + * + * @return true if we're het, false otherwise + */ + @Override + public boolean isHet() { + return !isHom(); + } + + /** + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object + */ + @Override + public GenomeLoc getLocation() { + return this.mLocation; + } + + /** + * returns true if the genotype is a point genotype, false if it's a indel / deletion + * + * @return true is a SNP + */ + @Override + public boolean isPointGenotype() { + return true; + } + + /** + * given the reference, are we a variant? (non-ref) + * + * @param ref the reference base or bases + * + * @return true if we're a variant + */ + @Override + public boolean isVariant(char ref) { + return !Utils.dupString(this.getReference(),2).equals(getBestGenotype().toString()); + } + + /** + * return this genotype as a variant + * + * @return + */ + public Variant toVariation() { + return null; // the next step is to implement the variant system + } + + /** + * return the likelihoods as a double array, in lexographic order + * + * @return the likelihoods + */ + public double[] getProbabilities() { + return this.mGenotypeLikelihoods.getPosteriors(); + } + + /** + * get the SAM records that back this genotype call + * + * @return a list of SAM records + */ + public List getReads() { + return this.mPileup.getReads(); + } + + /** + * get the count of reads + * + * @return the number of reads we're backed by + */ + public int getReadCount() { + return this.mPileup.getReads().size(); + } + + /** + * get the reference character + * + * @return + */ + public char getReference() { + return this.mRefBase; + } + + /** + * get the likelihoods + * + * @return an array in lexigraphical order of the likelihoods + */ + public Genotype getGenotype(DiploidGenotype x) { + return new SSGenotypeCall(mBestVrsRef,mLocation,mRefBase,mGenotypeLikelihoods,mPileup,x); + } + + /** + * get the likelihood information for this + * + * @return + */ + public double[] getLikelihoods() { + // TODO: this is wrong, obviously, but we've kept it for now to stay backward compatible with previous calls + return this.mGenotypeLikelihoods.getPosteriors(); + } + + +} + + + \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java index 0d42670a1..dd95c92a4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java @@ -5,23 +5,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.BasicGenotype; -import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; import java.io.File; -import java.util.List; -import java.util.ArrayList; @ReadFilters(ZeroMappingQualityReadFilter.class) -public class SingleSampleGenotyper extends LocusWalker { +public class SingleSampleGenotyper extends LocusWalker { // Control output settings @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false) public File VARIANTS_FILE = null; @@ -74,7 +69,7 @@ public class SingleSampleGenotyper extends LocusWalker lst = new ArrayList(); - for ( DiploidGenotype g : DiploidGenotype.values() ) { - lst.add(new BasicGenotype(pileup.getLocation(), g.toString(),new BayesianConfidenceScore(gl.getLikelihood(g)))); - } - - //System.out.printf("At %s%n", new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, g.getPosteriors(), pileup)); - return new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, gl.getPosteriors(), pileup); + return new SSGenotypeCall(!GENOTYPE, context.getLocation(), ref,gl, pileup); } /** @@ -138,11 +126,9 @@ public class SingleSampleGenotyper extends LocusWalker LOD_THRESHOLD) { - //System.out.printf("Call %s%n", call); + public GenotypeWriter reduce(SSGenotypeCall call, GenotypeWriter sum) { + if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) { + if (call.getLog10PError() >= LOD_THRESHOLD) { sum.addGenotypeCall(call); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java index 58e73d513..046057a84 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java @@ -6,14 +6,17 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodGFF; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall; import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.ListUtils; -import org.broadinstitute.sting.utils.genotype.GenotypeCall; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.*; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; /** @@ -77,9 +80,9 @@ public class CoverageEvalWalker extends LocusWalker, String> { List sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets); AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets); - GenotypeCall call = SSG.map(tracker, ref, subContext); + SSGenotypeCall call = SSG.map(tracker, ref, subContext); - String callType = (call.isVariation()) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; + String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; if (call != null) { GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call)); } @@ -104,27 +107,52 @@ public class CoverageEvalWalker extends LocusWalker, String> { } // a method to support getting the geli string, since the AlleleFrequencyEstimate is going away - public String toGeliString (GenotypeCall locus) { - SSGGenotypeCall call = (SSGGenotypeCall)locus; - return String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", - call.getLocation().getContig(), - call.getLocation().getStart(), - call.getReferencebase(), - call.getReadDepth(), - -1, - call.getBases(), - call.getBestRef(), - call.getBestNext(), - call.getLikelihoods()[0], - call.getLikelihoods()[1], - call.getLikelihoods()[2], - call.getLikelihoods()[3], - call.getLikelihoods()[4], - call.getLikelihoods()[5], - call.getLikelihoods()[6], - call.getLikelihoods()[7], - call.getLikelihoods()[8], - call.getLikelihoods()[9]); + public String toGeliString (Genotype locus) { + double likelihoods[]; + int readDepth = -1; + double nextVrsBest = 0; + double nextVrsRef = 0; + + char ref = locus.getReference(); + + if (locus instanceof ReadBacked) { + readDepth = ((ReadBacked)locus).getReadCount(); + } + if (!(locus instanceof GenotypesBacked)) { + likelihoods = new double[10]; + Arrays.fill(likelihoods, 0.0); + } else { + likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + double[] lks; + lks = Arrays.copyOf(likelihoods,likelihoods.length); + Arrays.sort(lks); + nextVrsBest = lks[9] - lks[8]; + if (ref != 'X') { + int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal()); + nextVrsRef = lks[9] - likelihoods[index]; + } + } + // we have to calcuate our own + + return new String(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", + locus.getLocation().getContig(), + locus.getLocation().getStart(), + ref, + readDepth, + -1, + locus.getBases(), + nextVrsRef, + nextVrsBest, + likelihoods[0], + likelihoods[1], + likelihoods[2], + likelihoods[3], + likelihoods[4], + likelihoods[5], + likelihoods[6], + likelihoods[7], + likelihoods[8], + likelihoods[9])); } } diff --git a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java index bca475a1d..13b8854d7 100755 --- a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java +++ b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java @@ -1,21 +1,19 @@ package org.broadinstitute.sting.playground.utils; -import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.GenotypeCall; +import org.broadinstitute.sting.utils.genotype.Genotype; import java.io.PrintWriter; -import java.util.Set; -import java.util.List; -import java.util.LinkedList; import java.util.ArrayList; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileWriter; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; /** * Created by IntelliJ IDEA. @@ -252,11 +250,11 @@ public class ArtificialPoolContext { } public String genotypeAndConfidenceToString(int group, String spacer) { - GenotypeCall call = this.getGenotypeCall(group); - return (call.getGenotypes() + spacer + call.getConfidenceScore().toString()); + Genotype call = this.getGenotype(group); + return (call.getBases() + spacer + call.getLog10PError()); // TODO: fix me } - public GenotypeCall getGenotypeCall(int group) { + public Genotype getGenotype(int group) { AlignmentContext alicon = this.getAlignmentContext(); Pair[],List[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets()); return ssg.map(this.getRefMetaDataTracker(),this.getReferenceContext(), diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java deleted file mode 100644 index 149ad8843..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java +++ /dev/null @@ -1,169 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; - - -/** - * @author aaron - *

- * Class BasicGenotype - *

- * A basic implementation of the genotype interface - */ -public class BasicGenotype implements Genotype { - - // the bases that represent this genotype - private String mBases = ""; - - // the ploidy, assume 2 unless told otherwise - private int mPloidy = 2; - - // the confidence score - protected ConfidenceScore mConfidenceScore; - - // our location - private GenomeLoc mLoc; - - /** - * construct a genotypeLikelihood, given the bases, the confidence score, and the ploidy - * - * @param loc the location of the genotype - * @param bases the bases that make up this genotype - * @param ploidy the ploidy of this genotype - * @param score the confidence score - */ - public BasicGenotype(GenomeLoc loc, String bases, int ploidy, ConfidenceScore score) { - this.mPloidy = ploidy; - if (bases.length() != ploidy) { - throw new IllegalArgumentException("The number of bases should match the ploidy"); - } - this.mBases = bases; - this.mConfidenceScore = score; - this.mLoc = loc; - } - - /** - * construct a genotypeLikelihood, given the bases and the confidence score, and assume the - * ploidy is 2. - * - * @param loc the location of the genotype - * @param bases the bases that make up this genotype - * @param score the confidence score - */ - public BasicGenotype(GenomeLoc loc, String bases, ConfidenceScore score) { - if (bases.length() != mPloidy) { - throw new IllegalArgumentException("The number of bases should match the ploidy"); - } - this.mBases = bases; - this.mConfidenceScore = score; - this.mLoc = loc; - } - - /** - * get the confidence score - * - * @return get the confidence score that we're based on - */ - public ConfidenceScore getConfidenceScore() { - return this.mConfidenceScore; - } - - /** - * get the bases that represent this - * - * @return the bases, as a string - */ - public String getBases() { - return mBases; - } - - /** - * get the ploidy - * - * @return the ploidy value - */ - public int getPloidy() { - return mPloidy; - } - - /** - * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) - * - * @return true if we're homozygous, false otherwise - */ - public boolean isHom() { - if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); - char first = mBases.charAt(0); - for (char c : mBases.toCharArray()) { - if (c != first) return false; - } - return true; - } - - /** - * Returns true if observed alleles differ (regardless of whether they are ref or alt) - * - * @return true if we're het, false otherwise - */ - public boolean isHet() { - if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); - char first = mBases.charAt(0); - for (char c : mBases.toCharArray()) { - if (c != first) return true; - } - return false; - } - - /** - * get the genotype's location - * - * @return a GenomeLoc representing the location - */ - public GenomeLoc getLocation() { - return mLoc; - } - - /** - * returns true if the genotype is a point genotype, false if it's a indel / deletion - * - * @return true is a SNP - */ - @Override - public boolean isPointGenotype() { - return true; - } - - /** - * given the reference, are we a variant? (non-ref) - * - * @param ref the reference base - * - * @return true if we're a variant - */ - @Override - public boolean isVariant(char ref) { - String ret = Utils.dupString(ref, this.getPloidy()); - return !this.getBases().equals(ret); - } - - /** - * return this genotype as a variant - * - * @return - */ - @Override - public Variant toVariant() { - return null; - } - - /** - * set the confidence score - * @param confidenceScore - */ - public void setConfidenceScore(ConfidenceScore confidenceScore) { - this.mConfidenceScore = confidenceScore; - } - -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java index 4847d34d7..0d4e64aa3 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.utils.genotype; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; -import org.broadinstitute.sting.utils.genotype.Variant; /** * @author aaron @@ -13,11 +11,11 @@ import org.broadinstitute.sting.utils.genotype.Variant; */ public interface Genotype { /** - * get the confidence score + * get the -1 * (log 10 of the error value) * - * @return get the confidence score that we're based on + * @return the log based error estimate */ - public ConfidenceScore getConfidenceScore(); + public double getLog10PError(); /** * get the bases that represent this @@ -71,16 +69,16 @@ public interface Genotype { public boolean isVariant(char ref); /** - * return this genotype as a variant - * - * @return + * get the reference base. + * @return a character, representing the reference base */ - public Variant toVariant(); + public char getReference(); /** - * return a readable string representation of this genotype + * return this genotype as a variant * - * @return + * @return the variant */ - public String toString(); + public Variant toVariation(); + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java deleted file mode 100644 index 298254540..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -/** - * @author aaron - *

- * Interface GenotypeOutputFormat - *

- * This interface is in adition to the GenotypeCall interface, - * but adds the required functionality that output (GenotypeWriter) interfaces - * need. It's fair game to return 0 or any value for these fields, but the methods - * are all used by various output writers - */ -public interface GenotypeOutput extends GenotypeCall { - - /** - * return the likelihoods as a double array, in lexographic order - * - * @return the likelihoods - */ - public double[] getLikelihoods(); - - /** - * get the depth of the reads at this location - * @return the depth - */ - public int getReadDepth(); - - /** - * get the rms mapping qualities - * @return - */ - public double getRmsMapping(); - - /** - * get the best to the next best genotype - * @return - */ - public double getBestNext(); - - /** - * get the best compaired to the reference score - * @return - */ - public double getBestRef(); -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 26aec06aa..20c8f12a8 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -38,7 +38,7 @@ public interface GenotypeWriter { * Add a genotype, given a genotype locus * @param locus the locus to add */ - public void addGenotypeCall(GenotypeOutput locus); + public void addGenotypeCall(Genotype locus); /** * add a no call to the genotype file, if supported. diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypesBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypesBacked.java new file mode 100644 index 000000000..b511a781b --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypesBacked.java @@ -0,0 +1,19 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; + +/** + * @author aaron + * + * Interface LikelihoodsBacked + * + * indicate that this genotype was called, and contains the other genotypes with their + * associtated information. + */ +public interface GenotypesBacked { + /** + * get the likelihoods + * @return an array in lexigraphical order of the likelihoods + */ + public Genotype getGenotype(DiploidGenotype x); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java index 9747aadd7..55b1ae52e 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java @@ -60,11 +60,6 @@ public class LikelihoodObject { NEGITIVE_LOG, LOG, RAW; } - // our qhet and qstar values; wait, what? - // TODO: are these really needed here? We have them to support the tabular output format only. - //private double qhat; - //private double qstar; - // our liklihood storage type protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGITIVE_LOG; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java new file mode 100644 index 000000000..81f9cd328 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java @@ -0,0 +1,17 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author aaron + * Interface LikelihoodsBacked + * + * this interface indicates that the genotype is + * backed up by genotype likelihood information. + */ +public interface LikelihoodsBacked { + + /** + * get the likelihood information for this + * @return + */ + public double[] getLikelihoods(); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java new file mode 100644 index 000000000..84a3b6687 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java @@ -0,0 +1,26 @@ +package org.broadinstitute.sting.utils.genotype; + +import net.sf.samtools.SAMRecord; + +import java.util.List; + +/** + * @author aaron + * + * Interface ReadBacked + * + * This interface indicates that a genotype or variant is backed by reads + */ +public interface ReadBacked { + /** + * get the SAM records that back this genotype call + * @return a list of SAM records + */ + public List getReads(); + + /** + * get the count of reads + * @return the number of reads we're backed by + */ + public int getReadCount(); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Variant.java b/java/src/org/broadinstitute/sting/utils/genotype/Variant.java index 4a642b1b2..c79dff4ba 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Variant.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Variant.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.utils.genotype; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; - /** * @author aaron *

@@ -23,13 +21,6 @@ public interface Variant { */ public double getNonRefAlleleFrequency(); - /** - * get the confidence score for this variance - * - * @return the confidence score - */ - public ConfidenceScore getConfidenceScore(); - /** @return the VARIANT_TYPE of the current variant */ public VARIANT_TYPE getType(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java b/java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java deleted file mode 100644 index 4930224ad..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.confidence; - - -/** - * - * @author aaron - * - * Class LikelihoodConfidenceScore - * - * A descriptions should go here. Blame aaron if it's missing. - */ -public class BayesianConfidenceScore extends ConfidenceScore { - public BayesianConfidenceScore(double score) { - super(score); - } - - /** - * return the confidence method we're employing, UNKNOWN is an option - * - * @return the method of confidence we represent - */ - @Override - public SCORE_METHOD getConfidenceType() { - return SCORE_METHOD.LOD_SCORE; - } - - /** - * get the confidence score, normalized to the range of [0-1] - * - * @return a confidence score - */ - @Override - public double normalizedConfidenceScore() { - return Math.pow(10,this.getScore()); - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java b/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java deleted file mode 100644 index aecf8f1e8..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java +++ /dev/null @@ -1,70 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.confidence; - -/** - * @author aaron - *

- * Class ConfidenceScore - *

- * this class represents the confidence in a genotype, and the method we used to obtain it - */ -public abstract class ConfidenceScore implements Comparable { - - // the general error of a floating point value - private static final Double EPSILON = 1.0e-15; - - /** - * the method we use to generate this confidence - */ - public enum SCORE_METHOD { - LOD_SCORE, CHIP, UNKNOWN; - } - - private Double mScore; - - public ConfidenceScore(double score) { - this.mScore = score; - } - - /** - * compare this ConfidenceScore to another, throwing an exception if they're not the same scoring method - * @param o the other confidence score if - * @return 0 if equal - */ - @Override - public int compareTo(ConfidenceScore o) { - if (o.getConfidenceType() != this.getConfidenceType()) { - throw new UnsupportedOperationException("Attemped to compare Confidence scores with different methods"); - } - double diff = mScore - o.mScore; - if (Math.abs(diff) < (EPSILON * Math.abs(mScore))) - return 0; - else if (diff < 0) - return -1; - else - return 1; - } - - /** - * get the score - * @return a double representing the genotype score - */ - public Double getScore() { - return mScore; - } - - /** - * return the confidence method we're employing, UNKNOWN is an option - * @return the method of confidence we represent - */ - public abstract SCORE_METHOD getConfidenceType(); - - /** - * get the confidence score, normalized to the range of [0-1] - * @return a confidence score - */ - public abstract double normalizedConfidenceScore(); - - public String toString() { - return String.format("%.2f %s", getScore(), getConfidenceType().toString()); - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScoreComparator.java b/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScoreComparator.java deleted file mode 100644 index 51f07cc23..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScoreComparator.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.confidence; - -import org.broadinstitute.sting.utils.genotype.Genotype; - -import java.util.Comparator; - - -/** - * - * @author aaron - * - * Class ConfidenceScoreComparator - * - * A descriptions should go here. Blame aaron if it's missing. - */ - -public class ConfidenceScoreComparator implements Comparator { - @Override - public int compare(Genotype genotype, Genotype genotype1) { - return genotype.getConfidenceScore().compareTo(genotype1.getConfidenceScore()); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 88aa40f47..0155c0308 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -3,14 +3,12 @@ package org.broadinstitute.sting.utils.genotype.geli; import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.utils.genotype.GenotypeOutput; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.IndelLikelihood; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall; +import org.broadinstitute.sting.utils.genotype.*; import java.io.File; +import java.util.Arrays; /* @@ -68,14 +66,11 @@ public class GeliAdapter implements GenotypeWriter { * @param contig the contig you're calling in * @param position the position on the contig * @param referenceBase the reference base - * @param readDepth the read depth at the specified position * @param likelihoods the likelihoods of each of the possible alleles */ public void addGenotypeCall(SAMSequenceRecord contig, int position, - float rmsMapQuals, char referenceBase, - int readDepth, LikelihoodObject likelihoods) { writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase)); } @@ -102,10 +97,27 @@ public class GeliAdapter implements GenotypeWriter { * @param locus the locus to add */ @Override - public void addGenotypeCall(GenotypeOutput locus) { - SSGGenotypeCall call = (SSGGenotypeCall)locus; - LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); - this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj); + public void addGenotypeCall(Genotype locus) { + double likelihoods[]; + int readDepth = -1; + double nextVrsBest = 0; + double nextVrsRef = 0; + if (!(locus instanceof GenotypesBacked)) { + likelihoods = new double[10]; + Arrays.fill(likelihoods, Double.MIN_VALUE); + } else { + likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + + } + char ref = locus.getReference(); + + + SSGenotypeCall call = (SSGenotypeCall)locus; + LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); + this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()), + (int)locus.getLocation().getStart(), + ref, + obj); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index 79e7bbde1..6a516d253 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -1,23 +1,23 @@ package org.broadinstitute.sting.utils.genotype.geli; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.GenotypeOutput; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.*; import java.io.File; import java.io.FileNotFoundException; -import java.io.PrintWriter; import java.io.PrintStream; +import java.io.PrintWriter; +import java.util.Arrays; /** - * - * @author aaron - * - * Class GeliTextWriter - * - * A descriptions should go here. Blame aaron if it's missing. + * @author aaron + *

+ * Class GeliTextWriter + *

+ * write out the geli text file format containing genotype information */ public class GeliTextWriter implements GenotypeWriter { // where we write to @@ -25,6 +25,7 @@ public class GeliTextWriter implements GenotypeWriter { /** * create a geli text writer + * * @param file the file to write to */ public GeliTextWriter(File file) { @@ -48,28 +49,52 @@ public class GeliTextWriter implements GenotypeWriter { * * @param locus the locus to add */ - public void addGenotypeCall(GenotypeOutput locus) { - SSGGenotypeCall call = (SSGGenotypeCall)locus; + public void addGenotypeCall(Genotype locus) { + double likelihoods[]; + int readDepth = -1; + double nextVrsBest = 0; + double nextVrsRef = 0; - mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", - locus.getLocation().getContig(), - locus.getLocation().getStart(), - locus.getReferencebase(), - call.getReadDepth(), - -1, - locus.getBases(), - call.getBestRef(), - call.getBestNext(), - call.getLikelihoods()[0], - call.getLikelihoods()[1], - call.getLikelihoods()[2], - call.getLikelihoods()[3], - call.getLikelihoods()[4], - call.getLikelihoods()[5], - call.getLikelihoods()[6], - call.getLikelihoods()[7], - call.getLikelihoods()[8], - call.getLikelihoods()[9])); + char ref = locus.getReference(); + + if (locus instanceof ReadBacked) { + readDepth = ((ReadBacked)locus).getReadCount(); + } + if (!(locus instanceof GenotypesBacked)) { + likelihoods = new double[10]; + Arrays.fill(likelihoods, 0.0); + } else { + likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + double[] lks; + lks = Arrays.copyOf(likelihoods,likelihoods.length); + Arrays.sort(lks); + nextVrsBest = lks[9] - lks[8]; + if (ref != 'X') { + int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal()); + nextVrsRef = lks[9] - likelihoods[index]; + } + } + // we have to calcuate our own + + mWriter.println(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", + locus.getLocation().getContig(), + locus.getLocation().getStart(), + ref, + readDepth, + -1, + locus.getBases(), + nextVrsRef, + nextVrsBest, + likelihoods[0], + likelihoods[1], + likelihoods[2], + likelihoods[3], + likelihoods[4], + likelihoods[5], + likelihoods[6], + likelihoods[7], + likelihoods[8], + likelihoods[9])); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index d1f40f9ab..893b54a27 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -1,17 +1,18 @@ package org.broadinstitute.sting.utils.genotype.glf; +import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BlockCompressedOutputStream; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.genotype.GenotypeOutput; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.IndelLikelihood; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.genotype.*; import java.io.DataOutputStream; import java.io.File; +import java.util.Arrays; +import java.util.List; /* * Copyright (c) 2009 The Broad Institute * @@ -76,12 +77,12 @@ public class GLFWriter implements GenotypeWriter { /** * add a point genotype to the GLF writer * - * @param contig the name of the contig you're calling in - * @param refBase the reference base, as a char - * @param genomicLoc the location the location on the reference contig - * @param readDepth the read depth at the specified postion - * @param rmsMapQ the root mean square of the mapping quality - * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods + * @param contig the name of the contig you're calling in + * @param refBase the reference base, as a char + * @param genomicLoc the location the location on the reference contig + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods */ public void addGenotypeCall(SAMSequenceRecord contig, int genomicLoc, @@ -90,14 +91,14 @@ public class GLFWriter implements GenotypeWriter { int readDepth, LikelihoodObject lhValues) { - // check if we've jumped to a new contig + // check if we've jumped to a new contig checkSequence(contig.getSequenceName(), contig.getSequenceLength()); SinglePointCall call = new SinglePointCall(refBase, - genomicLoc - lastPos, - readDepth, - (short) rmsMapQ, - lhValues.toDoubleArray()); + genomicLoc - lastPos, + readDepth, + (short) rmsMapQ, + lhValues.toDoubleArray()); lastPos = genomicLoc; call.write(this.outputBinaryCodec); } @@ -105,20 +106,50 @@ public class GLFWriter implements GenotypeWriter { /** * Add a genotype, given a genotype locus * - * @param locus + * @param locus the genotype called at a locus */ @Override - public void addGenotypeCall(GenotypeOutput locus) { - SSGGenotypeCall call = (SSGGenotypeCall)locus; - LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); + public void addGenotypeCall(Genotype locus) { + char ref = locus.getReference(); + + // get likelihood information if available + LikelihoodObject obj; + if (locus instanceof GenotypesBacked) { + obj = new LikelihoodObject(((LikelihoodsBacked) locus).getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); + } else { + double values[] = new double[10]; + Arrays.fill(values,-255.0); + obj = new LikelihoodObject(values, LikelihoodObject.LIKELIHOOD_TYPE.LOG); + } obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); // transform! ... to negitive log likelihoods - this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj); + + double rms = -1; + // calculate the RMS mapping qualities and the read depth + if (locus instanceof ReadBacked) { + rms = calculateRMS(((ReadBacked)locus).getReads()); + } + this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)rms,ref,((ReadBacked)locus).getReadCount(),obj); } + + /** + * calculate the rms , given the read pileup + * + * @param reads the read array + * + * @return the rms of the read mapping qualities + */ + private double calculateRMS(List reads) { + int[] qualities = new int[reads.size()]; + for (int i = 0; i < reads.size(); i++) { + qualities[i] = reads.get(i).getMappingQuality(); + } + return MathUtils.rms(qualities); + } /** * add a variable length (indel, deletion, etc) to the genotype writer * - * @param contig the name of the contig you're calling in + * @param contig the name of the contig you're calling in * @param refBase the reference base * @param genomicLoc the location on the reference contig * @param readDepth the read depth at the specified postion @@ -138,19 +169,19 @@ public class GLFWriter implements GenotypeWriter { // check if we've jumped to a new contig checkSequence(contig.getSequenceName(), contig.getSequenceLength()); - + // normalize the two VariableLengthCall call = new VariableLengthCall(refBase, - genomicLoc - lastPos, - readDepth, - (short) rmsMapQ, - firstHomZyg.getLikelihood(), - secondHomZyg.getLikelihood(), - hetLikelihood, - firstHomZyg.getLengthOfIndel(), - firstHomZyg.getIndelSequence(), - secondHomZyg.getLengthOfIndel(), - secondHomZyg.getIndelSequence()); + genomicLoc - lastPos, + readDepth, + (short) rmsMapQ, + firstHomZyg.getLikelihood(), + secondHomZyg.getLikelihood(), + hetLikelihood, + firstHomZyg.getLengthOfIndel(), + firstHomZyg.getIndelSequence(), + secondHomZyg.getLengthOfIndel(), + secondHomZyg.getIndelSequence()); lastPos = genomicLoc; call.write(this.outputBinaryCodec); } @@ -158,8 +189,7 @@ public class GLFWriter implements GenotypeWriter { /** * add a no call to the genotype file, if supported. * - * @param position the position - * + * @param position the position */ @Override public void addNoCall(int position) { @@ -170,12 +200,12 @@ public class GLFWriter implements GenotypeWriter { /** * add a GLF record to the output file * - * @param contigName the contig name + * @param contigName the contig name * @param contigLength the contig length - * @param rec the GLF record to write. + * @param rec the GLF record to write. */ public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) { - checkSequence(contigName,contigLength); + checkSequence(contigName, contigLength); rec.write(this.outputBinaryCodec); } @@ -200,12 +230,12 @@ public class GLFWriter implements GenotypeWriter { * check to see if we've jumped to a new contig * * @param sequenceName the name for the sequence - * @param seqLength the sequence length + * @param seqLength the sequence length */ private void checkSequence(String sequenceName, int seqLength) { if ((referenceSequenceName == null) || (!referenceSequenceName.equals(sequenceName))) { if (this.referenceSequenceName != null) { // don't write the record the first time - this.writeEndRecord(); + this.writeEndRecord(); } referenceSequenceName = sequenceName; referenceSequenceLength = seqLength; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java index 2d9823e12..879e3f08b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.utils.genotype.tabular; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.GenotypeOutput; +import org.broadinstitute.sting.utils.genotype.*; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; +import java.util.Arrays; /** @@ -43,7 +43,23 @@ public class TabularLFWriter implements GenotypeWriter { * @param locus the locus to add */ @Override - public void addGenotypeCall(GenotypeOutput locus) { + public void addGenotypeCall(Genotype locus) { + double likelihoods[]; + int readDepth = -1; + double nextVrsBest = 0; + double nextVrsRef = 0; + if (!(locus instanceof GenotypesBacked)) { + likelihoods = new double[10]; + Arrays.fill(likelihoods, Double.MIN_VALUE); + } else { + likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + + } + char ref = locus.getReference(); + + if (locus instanceof ReadBacked) { + readDepth = ((ReadBacked)locus).getReadCount(); + } /** * This output is not correct, but I don't we even use this format anymore. If we do, someone * should change this code @@ -51,14 +67,14 @@ public class TabularLFWriter implements GenotypeWriter { outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s", locus.getLocation().toString(), "NOT OUTPUTED", - locus.getReferencebase(), + ref, locus.getBases(), locus.getBases(), -1, -1, - locus.getBestRef(), - locus.getBestNext(), - locus.getReadDepth(), + nextVrsRef, + nextVrsBest, + readDepth, locus.getBases())); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index bf536d849..a5a0882f1 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -180,7 +180,7 @@ public class VCFReader implements Iterator, Iterable { // check to ensure that the column count of tokens is right if (tokens.length != mHeader.getColumnCount()) { - throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length); + throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line); } int index = 0; @@ -261,10 +261,14 @@ public class VCFReader implements Iterator, Iterable { * @param bases the list of bases for this genotype call */ private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { - if (Integer.valueOf(alleleNumber) == 0) + int alleleValue = Integer.valueOf(alleleNumber); + // check to make sure the allele value is within bounds + if (alleleValue < 0 || alleleValue > altAlleles.length) + throw new IllegalArgumentException("VCFReader: the allele value of " + alleleValue + " is out of bounds given the alternate allele list."); + if (alleleValue == 0) bases.add(String.valueOf(referenceBase)); else - bases.add(altAlleles[Integer.valueOf(alleleNumber) - 1]); + bases.add(altAlleles[alleleValue - 1]); }