From 3c2ae5585968fd751c4ea7c14708c303a690de09 Mon Sep 17 00:00:00 2001 From: aaron Date: Fri, 4 Sep 2009 05:31:15 +0000 Subject: [PATCH] changes for the genotype overhaul. Lots of changes focusing on the output side, from single sample genotyper to the output file formats like GLF and geli. Of note the genotype formats are still emitting posteriors as likelihoods; this is the way we've been doing it but it may change soon. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1529 348d0f76-0448-11de-a6fe-93d51630548a --- .../OldAndBustedGenotypeLikelihoods.java | 359 ------------------ .../walkers/genotyper/SSGGenotypeCall.java | 302 --------------- .../walkers/genotyper/SSGenotypeCall.java | 279 ++++++++++++++ .../genotyper/SingleSampleGenotyper.java | 28 +- .../gatk/walkers/CoverageEvalWalker.java | 78 ++-- .../utils/ArtificialPoolContext.java | 26 +- .../sting/utils/genotype/BasicGenotype.java | 169 --------- .../sting/utils/genotype/Genotype.java | 22 +- .../sting/utils/genotype/GenotypeOutput.java | 45 --- .../sting/utils/genotype/GenotypeWriter.java | 2 +- .../sting/utils/genotype/GenotypesBacked.java | 19 + .../utils/genotype/LikelihoodObject.java | 5 - .../utils/genotype/LikelihoodsBacked.java | 17 + .../sting/utils/genotype/ReadBacked.java | 26 ++ .../sting/utils/genotype/Variant.java | 9 - .../confidence/BayesianConfidenceScore.java | 36 -- .../genotype/confidence/ConfidenceScore.java | 70 ---- .../confidence/ConfidenceScoreComparator.java | 22 -- .../utils/genotype/geli/GeliAdapter.java | 36 +- .../utils/genotype/geli/GeliTextWriter.java | 87 +++-- .../sting/utils/genotype/glf/GLFWriter.java | 110 ++++-- .../genotype/tabular/TabularLFWriter.java | 30 +- .../sting/utils/genotype/vcf/VCFReader.java | 10 +- 23 files changed, 604 insertions(+), 1183 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OldAndBustedGenotypeLikelihoods.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGGenotypeCall.java create mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/GenotypesBacked.java create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScoreComparator.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OldAndBustedGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OldAndBustedGenotypeLikelihoods.java deleted file mode 100755 index 8afba67de..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/OldAndBustedGenotypeLikelihoods.java +++ /dev/null @@ -1,359 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.BasicGenotype; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; - -import static java.lang.Math.log10; -import static java.lang.Math.pow; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -public class OldAndBustedGenotypeLikelihoods { - protected static final double[] oneMinusData = new double[Byte.MAX_VALUE]; - protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE]; - protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE]; - //protected static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE]; - protected static final double log10Of1_3 = log10(1.0 / 3.0); - private boolean filterQ0Bases = true; - - static { - for (int qual = 0; qual < Byte.MAX_VALUE; qual++) { - double e = pow(10, (qual / -10.0)); - oneMinusData[qual] = log10(1.0 - e); - oneHalfMinusDataArachne[qual] = log10(0.5 - e / 2.0); - oneHalfMinusData3Base[qual] = log10(0.5 - e / 2.0 + e / 6.0); - } - } - - private static double getOneMinusQual(final byte qual) { - return oneMinusData[qual]; - } - - private double getOneHalfMinusQual(final byte qual) { - return oneHalfMinusData[qual]; - } - - //public double[] likelihoods; - public int coverage; - - // The genotype priors; - private double priorHomRef; - private double priorHet; - private double priorHomVar; - private double[] oneHalfMinusData; - public double[] likelihoods; - - public boolean isThreeStateErrors() { - return threeStateErrors; - } - - public void setThreeStateErrors(boolean threeStateErrors) { - this.threeStateErrors = threeStateErrors; - this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne; - } - - private boolean threeStateErrors = false; - private boolean discoveryMode = false; - - public static double[] computePriors(double h) { - double[] pdbls = new double[3]; - pdbls[0] = 1.0 - (3.0 * h / 2.0); - pdbls[1] = h; - pdbls[2] = h / 2.0; - return pdbls; - } - - public final static String[] genotypes = new String[10]; - static { - genotypes[0] = "AA"; - genotypes[1] = "AC"; - genotypes[2] = "AG"; - genotypes[3] = "AT"; - genotypes[4] = "CC"; - genotypes[5] = "CG"; - genotypes[6] = "CT"; - genotypes[7] = "GG"; - genotypes[8] = "GT"; - genotypes[9] = "TT"; - } - - /** - * set the mode to discovery - * @param isInDiscoveryMode - */ - public void setDiscovery(boolean isInDiscoveryMode) { - discoveryMode = isInDiscoveryMode; - } - // Store the 2nd-best base priors for on-genotype primary bases - private HashMap onNextBestBasePriors = new HashMap(); - - // Store the 2nd-best base priors for off-genotype primary bases - private HashMap offNextBestBasePriors = new HashMap(); - - public OldAndBustedGenotypeLikelihoods(double heterozygosity) { - double[] vals = computePriors(heterozygosity); - initialize(vals[0], vals[1], vals[2]); - } - - public OldAndBustedGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) { - initialize(priorHomRef, priorHet, priorHomVar); - } - - /** - * Are we ignoring Q0 bases during calculations? - * @return - */ - public boolean isFilteringQ0Bases() { - return filterQ0Bases; - } - - /** - * Enable / disable filtering of Q0 bases. Enabled by default - * - * @param filterQ0Bases - */ - public void filterQ0Bases(boolean filterQ0Bases) { - this.filterQ0Bases = filterQ0Bases; - } - - private void initialize(double priorHomRef, double priorHet, double priorHomVar) { - this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne; - - this.priorHomRef = priorHomRef; - this.priorHet = priorHet; - this.priorHomVar = priorHomVar; - - likelihoods = new double[10]; - - coverage = 0; - - for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); } - } - - public double getHomRefPrior() { - return priorHomRef; - } - - public void setHomRefPrior(double priorHomRef) { - this.priorHomRef = priorHomRef; - } - - public double getHetPrior() { - return priorHet; - } - - public void setHetPrior(double priorHet) { - this.priorHet = priorHet; - } - - public double getHomVarPrior() { - return priorHomVar; - } - - public void setHomVarPrior(double priorHomVar) { - this.priorHomVar = priorHomVar; - } - - public int add(char ref, char read, byte qual) - { - if (qual <= 0) { - if ( isFilteringQ0Bases() ) { - return 0; - } else { - qual = 1; - } - } - - if (coverage == 0) - { - for (int i = 0; i < likelihoods.length; i++) - { - likelihoods[i] = 0; - } - } - //System.out.printf("%s %s%n", OldAndBustedGenotypeLikelihoods.class, this.toString('A')); - - for (int i = 0; i < genotypes.length; i++) - { - double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual); - System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]); - likelihoods[i] += likelihood; - coverage += 1; - } - - //System.out.printf("%s %s%n", OldAndBustedGenotypeLikelihoods.class, this.toString('A')); - - return 1; - } - - private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) { - if (qual == 0) { - // zero quals are wrong - throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual)); - } - - char h1 = genotype.charAt(0); - char h2 = genotype.charAt(1); - - double p_base; - - if ((h1 == h2) && (h1 == read)) { - // hom - p_base = getOneMinusQual(qual); - } else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) { - // het - p_base = getOneHalfMinusQual(qual); - } else if ( threeStateErrors ) { - // error - //System.out.printf("%s %c %c %b %f %f%n", genotype, h1, h2, h1 != h2, log10Of2_3, log10Of1_3 ); - p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 ); - } else { - // error - p_base = qual / -10.0; - } - - return p_base; - } - - public String[] sorted_genotypes; - public double[] sorted_likelihoods; - - public void sort() { - Integer[] permutation = Utils.SortPermutation(likelihoods); - - Integer[] reverse_permutation = new Integer[permutation.length]; - for (int i = 0; i < reverse_permutation.length; i++) { - reverse_permutation[i] = permutation[(permutation.length - 1) - i]; - } - - sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation); - sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation); - } - - public String toString(char ref) { - this.sort(); - double sum = 0; - String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref)); - for (int i = 0; i < sorted_genotypes.length; i++) { - if (i != 0) { - s = s + " "; - } - s = s + sorted_genotypes[i] + ":" + String.format("%.10f", sorted_likelihoods[i]); - sum += Math.pow(10,sorted_likelihoods[i]); - } - s = s + String.format(" %f", sum); - return s; - } - - public void applyPrior(char ref, double[] allele_likelihoods) { - int k = 0; - for (int i = 0; i < 4; i++) { - for (int j = i; j < 4; j++) { - if (i == j) { - this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]); - } else { - this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2); - } - k++; - } - } - this.sort(); - } - - public void applyPrior(char ref) { - for (int i = 0; i < genotypes.length; i++) { - if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { - // hom-ref - likelihoods[i] += Math.log10(priorHomRef); - } else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) { - // hom-nonref - likelihoods[i] += Math.log10(priorHomVar); - } else { - // het - likelihoods[i] += Math.log10(priorHet); - } - if (Double.isInfinite(likelihoods[i])) { - likelihoods[i] = -1000; - } - } - this.sort(); - } - - public double LodVsNextBest() { - this.sort(); - return sorted_likelihoods[0] - sorted_likelihoods[1]; - } - - double ref_likelihood = Double.NaN; - - public double LodVsRef(char ref) { - if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) { - ref_likelihood = sorted_likelihoods[0]; - return (-1.0 * this.LodVsNextBest()); - } else { - for (int i = 0; i < genotypes.length; i++) { - if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) { - ref_likelihood = likelihoods[i]; - } - } - } - return sorted_likelihoods[0] - ref_likelihood; - } - - public String BestGenotype() { - this.sort(); - return this.sorted_genotypes[0]; - } - - public double BestPosterior() { - this.sort(); - return this.sorted_likelihoods[0]; - } - - public double RefPosterior(char ref) { - this.LodVsRef(ref); - return this.ref_likelihood; - } - - /** - * given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs - * - * @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information - * @param ref the reference base - * @param pileup a pileup of the reads, containing the reads and their offsets - * - * @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values - */ - public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) { - //filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag - - // for calculating the rms of the mapping qualities - double squared = 0.0; - for (int i = 0; i < pileup.getReads().size(); i++) { - SAMRecord read = pileup.getReads().get(i); - squared += read.getMappingQuality() * read.getMappingQuality(); - int offset = pileup.getOffsets().get(i); - char base = read.getReadString().charAt(offset); - byte qual = read.getBaseQualities()[offset]; - add(ref, base, qual); - } - // save off the likelihoods - if (likelihoods == null || likelihoods.length == 0) return null; - - // Apply the two calculations - applyPrior(ref); - - // lets setup the locus - List lst = new ArrayList(); - for (int x = 0; x < this.likelihoods.length; x++) { - lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x]))); - } - return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGGenotypeCall.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGGenotypeCall.java deleted file mode 100644 index c71c66385..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGGenotypeCall.java +++ /dev/null @@ -1,302 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; -import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; -import org.broadinstitute.sting.utils.genotype.*; - -import java.util.*; - - -/** - * @author aaron - *

- * Class GenotypeCallImpl - *

- * The single sample genotypers implementation of the genotype call, which contains - * extra information for the various genotype outputs - */ -public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput { - // our stored genotype locus - private final String mRefBase; - private final int mPloidy; - private TreeMap mGenotypes = new TreeMap(); - private double mLikelihoods[]; - private double bestNext = 0; - private double bestRef = 0; - - private int readDepth = 0; - private double rmsMapping = 0; - - /** - * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object - * - * @param discoveryMode are we in discovery mode or genotyping mode - * @param mRefBase the ref base - * @param mPloidy the ploidy - * @param genotypes the genotype - * @param likelihoods the likelihood - * @param pileup the pile-up of reads at the specified locus - */ - public SSGGenotypeCall(boolean discoveryMode, char mRefBase, int mPloidy, List genotypes, double likelihoods[], ReadBackedPileup pileup) { - this.mRefBase = String.valueOf(mRefBase).toUpperCase(); - this.mPloidy = mPloidy; - if (genotypes.size() < 1) throw new IllegalArgumentException("Genotypes list size must be greater than 0"); - String refStr = Utils.dupString(mRefBase, mPloidy).toUpperCase(); - calculateBestNext(discoveryMode, genotypes, likelihoods, refStr); - - this.readDepth = pileup.getReads().size(); - rmsMapping = calculateRMS(pileup); - } - - - /** - * calculate the best and next best, and update the stored confidence scores based on these values - * - * @param discoveryMode are we in discovery mode? - * @param genotypes the genotypes - * @param likelihoods the likelihoods corresponding to the genotypes - * @param refStr the reference string, i.e. reb base[ploidy] - */ - private void calculateBestNext(boolean discoveryMode, List genotypes, double[] likelihoods, String refStr) { - int index = 0; - double ref = 0.0; - double best = Double.NEGATIVE_INFINITY; - double next = Double.NEGATIVE_INFINITY; - for (Genotype g : genotypes) { - if (g.getBases().toUpperCase().equals(refStr)) ref = likelihoods[index]; - if (likelihoods[index] > best) { - next = best; - best = likelihoods[index]; - } else if (likelihoods[index] > next) next = likelihoods[index]; - index++; - } - bestNext = Math.abs(best - next); - bestRef = Math.abs(best - ref); - mLikelihoods = likelihoods; - index = 0; - - // reset the confidence based on either the discovery mode or the genotype mode - for (Genotype g : genotypes) { - double val = (discoveryMode) ? Math.abs(likelihoods[index] - ref) : Math.abs(likelihoods[index] - next); - ((BasicGenotype) g).setConfidenceScore(new BayesianConfidenceScore(val)); - mGenotypes.put(likelihoods[index], g); - index++; - } - } - - @Override - public boolean equals(Object other) { - if(other == null) - return false; - if(other instanceof SSGGenotypeCall) { - SSGGenotypeCall otherCall = (SSGGenotypeCall)other; - - boolean likelihoodsMatch = true; - for ( int i = 0; i < mLikelihoods.length; i++ ) { - likelihoodsMatch = likelihoodsMatch && MathUtils.compareDoubles(mLikelihoods[i], otherCall.mLikelihoods[i]) == 0; - } - //System.out.printf("likelihoodsMatch = %b%n", likelihoodsMatch); - - return this.mRefBase.equals(otherCall.mRefBase) && - this.mPloidy == otherCall.mPloidy && - MathUtils.compareDoubles(this.bestNext, otherCall.bestNext) == 0 && - MathUtils.compareDoubles(this.bestRef, otherCall.bestRef) == 0 && - this.readDepth == otherCall.readDepth && - MathUtils.compareDoubles(this.rmsMapping, otherCall.rmsMapping) == 0 && - likelihoodsMatch; - } - return false; - } - - public String toString() { - return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s", - getLocation(), mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods)); - } - - /** - * calculate the rms , given the read pileup - * - * @param pileup - * - * @return - */ - private double calculateRMS(ReadBackedPileup pileup) { - List reads = pileup.getReads(); - int[] qualities = new int[reads.size()]; - for (int i=0; i < reads.size(); i++) - qualities[i] = reads.get(i).getMappingQuality(); - return MathUtils.rms(qualities); - } - - /** - * gets the reference base - * - * @return the reference base we represent - */ - @Override - public char getReferencebase() { - return mRefBase.charAt(0); - } - - /** - * check to see if this called genotype is a variant, i.e. not homozygous reference - * - * @return true if it's not hom ref, false otherwise - */ - @Override - public boolean isVariation() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(getReferencebase()); - } - - /** - * get the confidence score - * - * @return get the confidence score that we're based on - */ - @Override - public ConfidenceScore getConfidenceScore() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).getConfidenceScore(); - } - - /** - * get the bases that represent this - * - * @return the bases, as a string - */ - @Override - public String getBases() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).getBases(); - } - - /** - * get the ploidy - * - * @return the ploidy value - */ - @Override - public int getPloidy() { - return this.mPloidy; - } - - /** - * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) - * - * @return true if we're homozygous, false otherwise - */ - @Override - public boolean isHom() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHom(); - } - - /** - * Returns true if observed alleles differ (regardless of whether they are ref or alt) - * - * @return true if we're het, false otherwise - */ - @Override - public boolean isHet() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHet(); - } - - /** - * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base - * is located right after the specified location - * - * @return position on the genome wrapped in GenomeLoc object - */ - @Override - public GenomeLoc getLocation() { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).getLocation(); - } - - /** - * returns true if the genotype is a point genotype, false if it's a indel / deletion - * - * @return true is a SNP - */ - @Override - public boolean isPointGenotype() { - return true; - } - - /** - * given the reference, are we a variant? (non-ref) - * - * @param ref the reference base or bases - * - * @return true if we're a variant - */ - @Override - public boolean isVariant(char ref) { - return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(ref); - } - - /** - * return this genotype as a variant - * - * @return - */ - @Override - public Variant toVariant() { - return null; //To change body of implemented methods use File | Settings | File Templates. - } - - /** - * get the genotypes, sorted in asscending order by their ConfidenceScores (the best - * to the worst ConfidenceScores) - * - * @return a list of the genotypes, sorted by ConfidenceScores - */ - @Override - public List getGenotypes() { - List newList = new ArrayList(); - newList.addAll(this.mGenotypes.values()); - return newList; - } - - /** - * get the genotypes sorted lexigraphically - * - * @return a list of the genotypes sorted lexi - */ - @Override - public List getLexigraphicallySortedGenotypes() { - List newList = new ArrayList(); - newList.addAll(this.mGenotypes.values()); - Collections.sort(newList, new LexigraphicalComparator()); - return newList; - } - - /** - * return the likelihoods as a double array, in lexographic order - * - * @return the likelihoods - */ - public double[] getLikelihoods() { - return this.mLikelihoods; - } - - public int getReadDepth() { - return readDepth; - } - - public double getRmsMapping() { - return rmsMapping; - } - - public double getBestNext() { - return bestNext; - } - - public double getBestRef() { - return bestRef; - } -} - - - \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java new file mode 100644 index 000000000..8aa33f7d4 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java @@ -0,0 +1,279 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.*; + +import java.util.Arrays; +import java.util.List; + + +/** + * @author aaron + *

+ * Class SSGenotypeCall + *

+ * The single sample implementation of the genotype interface, which contains + * extra information for the various genotype outputs + */ +public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked { + private final char mRefBase; + private final GenotypeLikelihoods mGenotypeLikelihoods; + + // the next two values are filled in on-demand. Default to -1 since they can never be negitive + private final GenomeLoc mLocation; + private final ReadBackedPileup mPileup; + + // if this is null, we were constructed with the intention that we'd represent the best genotype + private DiploidGenotype mGenotype = null; + + // which genotype to compare to; if we're in discovery mode it's the ref allele, otherwise it's the next best + private DiploidGenotype mCompareTo = null; + + // are we best vrs ref or best vrs next - for internal consumption only + private final boolean mBestVrsRef; + + /** + * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object + * + * @param discovery are we representing the best vrs next or best vrs ref + * @param location the location we're working with + * @param refBase the ref base + * @param gtlh the genotype likelihoods object + * @param pileup the pile-up of reads at the specified locus + */ + public SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) { + mBestVrsRef = discovery; + mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case + mGenotypeLikelihoods = gtlh; + mLocation = location; + mPileup = pileup; + } + + /** + * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object + * + * @param discovery are we representing the best vrs next or best vrs ref + * @param location the location we're working with + * @param refBase the ref base + * @param gtlh the genotype likelihoods object + * @param pileup the pile-up of reads at the specified locus + */ + SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) { + mBestVrsRef = discovery; + mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case + mGenotypeLikelihoods = gtlh; + mLocation = location; + mGenotype = genotype; + mPileup = pileup; + } + + @Override + public boolean equals(Object other) { + if (other == null) + return false; + if (other instanceof SSGenotypeCall) { + SSGenotypeCall otherCall = (SSGenotypeCall) other; + + if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) { + return false; + } + if (!this.mGenotype.equals(otherCall.mGenotype)) + return false; + return (this.mRefBase == otherCall.mRefBase) && + this.mPileup.equals(mPileup); + } + return false; + } + + public String toString() { + return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f likelihoods=%s", + getLocation(), mRefBase, mPileup.getReads().size(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods())); + } + + + + + /** + * get the confidence we have + * + * @return get the one minus error value + */ + @Override + public double getLog10PError() { + getBestGenotype(); + getAltGenotype(); + return Math.abs(mGenotypeLikelihoods.getPosterior(mGenotype) - mGenotypeLikelihoods.getPosterior(mCompareTo)); + } + + /** + * get the best genotype + */ + public DiploidGenotype getBestGenotype() { + if (mGenotype == null) { + Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors()); + mGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + } + return mGenotype; + } + + /** + * get the alternate genotype + */ + public DiploidGenotype getAltGenotype() { + if (mCompareTo == null) { + if (this.mBestVrsRef) { + mCompareTo = DiploidGenotype.valueOf(Utils.dupString(this.getReference(),2)); + } else { + Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors()); + mCompareTo = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + } + } + return mCompareTo; + } + + /** + * get the bases that represent this + * + * @return the bases, as a string + */ + @Override + public String getBases() { + return getBestGenotype().toString(); + } + + /** + * get the ploidy + * + * @return the ploidy value + */ + @Override + public int getPloidy() { + return 2; + } + + /** + * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) + * + * @return true if we're homozygous, false otherwise + */ + @Override + public boolean isHom() { + return getBestGenotype().isHom(); + } + + /** + * Returns true if observed alleles differ (regardless of whether they are ref or alt) + * + * @return true if we're het, false otherwise + */ + @Override + public boolean isHet() { + return !isHom(); + } + + /** + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object + */ + @Override + public GenomeLoc getLocation() { + return this.mLocation; + } + + /** + * returns true if the genotype is a point genotype, false if it's a indel / deletion + * + * @return true is a SNP + */ + @Override + public boolean isPointGenotype() { + return true; + } + + /** + * given the reference, are we a variant? (non-ref) + * + * @param ref the reference base or bases + * + * @return true if we're a variant + */ + @Override + public boolean isVariant(char ref) { + return !Utils.dupString(this.getReference(),2).equals(getBestGenotype().toString()); + } + + /** + * return this genotype as a variant + * + * @return + */ + public Variant toVariation() { + return null; // the next step is to implement the variant system + } + + /** + * return the likelihoods as a double array, in lexographic order + * + * @return the likelihoods + */ + public double[] getProbabilities() { + return this.mGenotypeLikelihoods.getPosteriors(); + } + + /** + * get the SAM records that back this genotype call + * + * @return a list of SAM records + */ + public List getReads() { + return this.mPileup.getReads(); + } + + /** + * get the count of reads + * + * @return the number of reads we're backed by + */ + public int getReadCount() { + return this.mPileup.getReads().size(); + } + + /** + * get the reference character + * + * @return + */ + public char getReference() { + return this.mRefBase; + } + + /** + * get the likelihoods + * + * @return an array in lexigraphical order of the likelihoods + */ + public Genotype getGenotype(DiploidGenotype x) { + return new SSGenotypeCall(mBestVrsRef,mLocation,mRefBase,mGenotypeLikelihoods,mPileup,x); + } + + /** + * get the likelihood information for this + * + * @return + */ + public double[] getLikelihoods() { + // TODO: this is wrong, obviously, but we've kept it for now to stay backward compatible with previous calls + return this.mGenotypeLikelihoods.getPosteriors(); + } + + +} + + + \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java index 0d42670a1..dd95c92a4 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java @@ -5,23 +5,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.BasicGenotype; -import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore; import java.io.File; -import java.util.List; -import java.util.ArrayList; @ReadFilters(ZeroMappingQualityReadFilter.class) -public class SingleSampleGenotyper extends LocusWalker { +public class SingleSampleGenotyper extends LocusWalker { // Control output settings @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false) public File VARIANTS_FILE = null; @@ -74,7 +69,7 @@ public class SingleSampleGenotyper extends LocusWalker lst = new ArrayList(); - for ( DiploidGenotype g : DiploidGenotype.values() ) { - lst.add(new BasicGenotype(pileup.getLocation(), g.toString(),new BayesianConfidenceScore(gl.getLikelihood(g)))); - } - - //System.out.printf("At %s%n", new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, g.getPosteriors(), pileup)); - return new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, gl.getPosteriors(), pileup); + return new SSGenotypeCall(!GENOTYPE, context.getLocation(), ref,gl, pileup); } /** @@ -138,11 +126,9 @@ public class SingleSampleGenotyper extends LocusWalker LOD_THRESHOLD) { - //System.out.printf("Call %s%n", call); + public GenotypeWriter reduce(SSGenotypeCall call, GenotypeWriter sum) { + if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) { + if (call.getLog10PError() >= LOD_THRESHOLD) { sum.addGenotypeCall(call); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java index 58e73d513..046057a84 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java @@ -6,14 +6,17 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodGFF; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall; import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.ListUtils; -import org.broadinstitute.sting.utils.genotype.GenotypeCall; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.*; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; /** @@ -77,9 +80,9 @@ public class CoverageEvalWalker extends LocusWalker, String> { List sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets); AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets); - GenotypeCall call = SSG.map(tracker, ref, subContext); + SSGenotypeCall call = SSG.map(tracker, ref, subContext); - String callType = (call.isVariation()) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; + String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; if (call != null) { GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call)); } @@ -104,27 +107,52 @@ public class CoverageEvalWalker extends LocusWalker, String> { } // a method to support getting the geli string, since the AlleleFrequencyEstimate is going away - public String toGeliString (GenotypeCall locus) { - SSGGenotypeCall call = (SSGGenotypeCall)locus; - return String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", - call.getLocation().getContig(), - call.getLocation().getStart(), - call.getReferencebase(), - call.getReadDepth(), - -1, - call.getBases(), - call.getBestRef(), - call.getBestNext(), - call.getLikelihoods()[0], - call.getLikelihoods()[1], - call.getLikelihoods()[2], - call.getLikelihoods()[3], - call.getLikelihoods()[4], - call.getLikelihoods()[5], - call.getLikelihoods()[6], - call.getLikelihoods()[7], - call.getLikelihoods()[8], - call.getLikelihoods()[9]); + public String toGeliString (Genotype locus) { + double likelihoods[]; + int readDepth = -1; + double nextVrsBest = 0; + double nextVrsRef = 0; + + char ref = locus.getReference(); + + if (locus instanceof ReadBacked) { + readDepth = ((ReadBacked)locus).getReadCount(); + } + if (!(locus instanceof GenotypesBacked)) { + likelihoods = new double[10]; + Arrays.fill(likelihoods, 0.0); + } else { + likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + double[] lks; + lks = Arrays.copyOf(likelihoods,likelihoods.length); + Arrays.sort(lks); + nextVrsBest = lks[9] - lks[8]; + if (ref != 'X') { + int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal()); + nextVrsRef = lks[9] - likelihoods[index]; + } + } + // we have to calcuate our own + + return new String(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", + locus.getLocation().getContig(), + locus.getLocation().getStart(), + ref, + readDepth, + -1, + locus.getBases(), + nextVrsRef, + nextVrsBest, + likelihoods[0], + likelihoods[1], + likelihoods[2], + likelihoods[3], + likelihoods[4], + likelihoods[5], + likelihoods[6], + likelihoods[7], + likelihoods[8], + likelihoods[9])); } } diff --git a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java index bca475a1d..13b8854d7 100755 --- a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java +++ b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java @@ -1,21 +1,19 @@ package org.broadinstitute.sting.playground.utils; -import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import net.sf.samtools.SAMFileWriter; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.GenotypeCall; +import org.broadinstitute.sting.utils.genotype.Genotype; import java.io.PrintWriter; -import java.util.Set; -import java.util.List; -import java.util.LinkedList; import java.util.ArrayList; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMFileWriter; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; /** * Created by IntelliJ IDEA. @@ -252,11 +250,11 @@ public class ArtificialPoolContext { } public String genotypeAndConfidenceToString(int group, String spacer) { - GenotypeCall call = this.getGenotypeCall(group); - return (call.getGenotypes() + spacer + call.getConfidenceScore().toString()); + Genotype call = this.getGenotype(group); + return (call.getBases() + spacer + call.getLog10PError()); // TODO: fix me } - public GenotypeCall getGenotypeCall(int group) { + public Genotype getGenotype(int group) { AlignmentContext alicon = this.getAlignmentContext(); Pair[],List[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets()); return ssg.map(this.getRefMetaDataTracker(),this.getReferenceContext(), diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java deleted file mode 100644 index 149ad8843..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java +++ /dev/null @@ -1,169 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; - - -/** - * @author aaron - *

- * Class BasicGenotype - *

- * A basic implementation of the genotype interface - */ -public class BasicGenotype implements Genotype { - - // the bases that represent this genotype - private String mBases = ""; - - // the ploidy, assume 2 unless told otherwise - private int mPloidy = 2; - - // the confidence score - protected ConfidenceScore mConfidenceScore; - - // our location - private GenomeLoc mLoc; - - /** - * construct a genotypeLikelihood, given the bases, the confidence score, and the ploidy - * - * @param loc the location of the genotype - * @param bases the bases that make up this genotype - * @param ploidy the ploidy of this genotype - * @param score the confidence score - */ - public BasicGenotype(GenomeLoc loc, String bases, int ploidy, ConfidenceScore score) { - this.mPloidy = ploidy; - if (bases.length() != ploidy) { - throw new IllegalArgumentException("The number of bases should match the ploidy"); - } - this.mBases = bases; - this.mConfidenceScore = score; - this.mLoc = loc; - } - - /** - * construct a genotypeLikelihood, given the bases and the confidence score, and assume the - * ploidy is 2. - * - * @param loc the location of the genotype - * @param bases the bases that make up this genotype - * @param score the confidence score - */ - public BasicGenotype(GenomeLoc loc, String bases, ConfidenceScore score) { - if (bases.length() != mPloidy) { - throw new IllegalArgumentException("The number of bases should match the ploidy"); - } - this.mBases = bases; - this.mConfidenceScore = score; - this.mLoc = loc; - } - - /** - * get the confidence score - * - * @return get the confidence score that we're based on - */ - public ConfidenceScore getConfidenceScore() { - return this.mConfidenceScore; - } - - /** - * get the bases that represent this - * - * @return the bases, as a string - */ - public String getBases() { - return mBases; - } - - /** - * get the ploidy - * - * @return the ploidy value - */ - public int getPloidy() { - return mPloidy; - } - - /** - * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) - * - * @return true if we're homozygous, false otherwise - */ - public boolean isHom() { - if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); - char first = mBases.charAt(0); - for (char c : mBases.toCharArray()) { - if (c != first) return false; - } - return true; - } - - /** - * Returns true if observed alleles differ (regardless of whether they are ref or alt) - * - * @return true if we're het, false otherwise - */ - public boolean isHet() { - if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base"); - char first = mBases.charAt(0); - for (char c : mBases.toCharArray()) { - if (c != first) return true; - } - return false; - } - - /** - * get the genotype's location - * - * @return a GenomeLoc representing the location - */ - public GenomeLoc getLocation() { - return mLoc; - } - - /** - * returns true if the genotype is a point genotype, false if it's a indel / deletion - * - * @return true is a SNP - */ - @Override - public boolean isPointGenotype() { - return true; - } - - /** - * given the reference, are we a variant? (non-ref) - * - * @param ref the reference base - * - * @return true if we're a variant - */ - @Override - public boolean isVariant(char ref) { - String ret = Utils.dupString(ref, this.getPloidy()); - return !this.getBases().equals(ret); - } - - /** - * return this genotype as a variant - * - * @return - */ - @Override - public Variant toVariant() { - return null; - } - - /** - * set the confidence score - * @param confidenceScore - */ - public void setConfidenceScore(ConfidenceScore confidenceScore) { - this.mConfidenceScore = confidenceScore; - } - -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java index 4847d34d7..0d4e64aa3 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.utils.genotype; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; -import org.broadinstitute.sting.utils.genotype.Variant; /** * @author aaron @@ -13,11 +11,11 @@ import org.broadinstitute.sting.utils.genotype.Variant; */ public interface Genotype { /** - * get the confidence score + * get the -1 * (log 10 of the error value) * - * @return get the confidence score that we're based on + * @return the log based error estimate */ - public ConfidenceScore getConfidenceScore(); + public double getLog10PError(); /** * get the bases that represent this @@ -71,16 +69,16 @@ public interface Genotype { public boolean isVariant(char ref); /** - * return this genotype as a variant - * - * @return + * get the reference base. + * @return a character, representing the reference base */ - public Variant toVariant(); + public char getReference(); /** - * return a readable string representation of this genotype + * return this genotype as a variant * - * @return + * @return the variant */ - public String toString(); + public Variant toVariation(); + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java deleted file mode 100644 index 298254540..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeOutput.java +++ /dev/null @@ -1,45 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -/** - * @author aaron - *

- * Interface GenotypeOutputFormat - *

- * This interface is in adition to the GenotypeCall interface, - * but adds the required functionality that output (GenotypeWriter) interfaces - * need. It's fair game to return 0 or any value for these fields, but the methods - * are all used by various output writers - */ -public interface GenotypeOutput extends GenotypeCall { - - /** - * return the likelihoods as a double array, in lexographic order - * - * @return the likelihoods - */ - public double[] getLikelihoods(); - - /** - * get the depth of the reads at this location - * @return the depth - */ - public int getReadDepth(); - - /** - * get the rms mapping qualities - * @return - */ - public double getRmsMapping(); - - /** - * get the best to the next best genotype - * @return - */ - public double getBestNext(); - - /** - * get the best compaired to the reference score - * @return - */ - public double getBestRef(); -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 26aec06aa..20c8f12a8 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -38,7 +38,7 @@ public interface GenotypeWriter { * Add a genotype, given a genotype locus * @param locus the locus to add */ - public void addGenotypeCall(GenotypeOutput locus); + public void addGenotypeCall(Genotype locus); /** * add a no call to the genotype file, if supported. diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypesBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypesBacked.java new file mode 100644 index 000000000..b511a781b --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypesBacked.java @@ -0,0 +1,19 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; + +/** + * @author aaron + * + * Interface LikelihoodsBacked + * + * indicate that this genotype was called, and contains the other genotypes with their + * associtated information. + */ +public interface GenotypesBacked { + /** + * get the likelihoods + * @return an array in lexigraphical order of the likelihoods + */ + public Genotype getGenotype(DiploidGenotype x); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java index 9747aadd7..55b1ae52e 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodObject.java @@ -60,11 +60,6 @@ public class LikelihoodObject { NEGITIVE_LOG, LOG, RAW; } - // our qhet and qstar values; wait, what? - // TODO: are these really needed here? We have them to support the tabular output format only. - //private double qhat; - //private double qstar; - // our liklihood storage type protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGITIVE_LOG; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java new file mode 100644 index 000000000..81f9cd328 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java @@ -0,0 +1,17 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author aaron + * Interface LikelihoodsBacked + * + * this interface indicates that the genotype is + * backed up by genotype likelihood information. + */ +public interface LikelihoodsBacked { + + /** + * get the likelihood information for this + * @return + */ + public double[] getLikelihoods(); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java new file mode 100644 index 000000000..84a3b6687 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java @@ -0,0 +1,26 @@ +package org.broadinstitute.sting.utils.genotype; + +import net.sf.samtools.SAMRecord; + +import java.util.List; + +/** + * @author aaron + * + * Interface ReadBacked + * + * This interface indicates that a genotype or variant is backed by reads + */ +public interface ReadBacked { + /** + * get the SAM records that back this genotype call + * @return a list of SAM records + */ + public List getReads(); + + /** + * get the count of reads + * @return the number of reads we're backed by + */ + public int getReadCount(); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Variant.java b/java/src/org/broadinstitute/sting/utils/genotype/Variant.java index 4a642b1b2..c79dff4ba 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Variant.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Variant.java @@ -1,8 +1,6 @@ package org.broadinstitute.sting.utils.genotype; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore; - /** * @author aaron *

@@ -23,13 +21,6 @@ public interface Variant { */ public double getNonRefAlleleFrequency(); - /** - * get the confidence score for this variance - * - * @return the confidence score - */ - public ConfidenceScore getConfidenceScore(); - /** @return the VARIANT_TYPE of the current variant */ public VARIANT_TYPE getType(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java b/java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java deleted file mode 100644 index 4930224ad..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/confidence/BayesianConfidenceScore.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.confidence; - - -/** - * - * @author aaron - * - * Class LikelihoodConfidenceScore - * - * A descriptions should go here. Blame aaron if it's missing. - */ -public class BayesianConfidenceScore extends ConfidenceScore { - public BayesianConfidenceScore(double score) { - super(score); - } - - /** - * return the confidence method we're employing, UNKNOWN is an option - * - * @return the method of confidence we represent - */ - @Override - public SCORE_METHOD getConfidenceType() { - return SCORE_METHOD.LOD_SCORE; - } - - /** - * get the confidence score, normalized to the range of [0-1] - * - * @return a confidence score - */ - @Override - public double normalizedConfidenceScore() { - return Math.pow(10,this.getScore()); - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java b/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java deleted file mode 100644 index aecf8f1e8..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScore.java +++ /dev/null @@ -1,70 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.confidence; - -/** - * @author aaron - *

- * Class ConfidenceScore - *

- * this class represents the confidence in a genotype, and the method we used to obtain it - */ -public abstract class ConfidenceScore implements Comparable { - - // the general error of a floating point value - private static final Double EPSILON = 1.0e-15; - - /** - * the method we use to generate this confidence - */ - public enum SCORE_METHOD { - LOD_SCORE, CHIP, UNKNOWN; - } - - private Double mScore; - - public ConfidenceScore(double score) { - this.mScore = score; - } - - /** - * compare this ConfidenceScore to another, throwing an exception if they're not the same scoring method - * @param o the other confidence score if - * @return 0 if equal - */ - @Override - public int compareTo(ConfidenceScore o) { - if (o.getConfidenceType() != this.getConfidenceType()) { - throw new UnsupportedOperationException("Attemped to compare Confidence scores with different methods"); - } - double diff = mScore - o.mScore; - if (Math.abs(diff) < (EPSILON * Math.abs(mScore))) - return 0; - else if (diff < 0) - return -1; - else - return 1; - } - - /** - * get the score - * @return a double representing the genotype score - */ - public Double getScore() { - return mScore; - } - - /** - * return the confidence method we're employing, UNKNOWN is an option - * @return the method of confidence we represent - */ - public abstract SCORE_METHOD getConfidenceType(); - - /** - * get the confidence score, normalized to the range of [0-1] - * @return a confidence score - */ - public abstract double normalizedConfidenceScore(); - - public String toString() { - return String.format("%.2f %s", getScore(), getConfidenceType().toString()); - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScoreComparator.java b/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScoreComparator.java deleted file mode 100644 index 51f07cc23..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/confidence/ConfidenceScoreComparator.java +++ /dev/null @@ -1,22 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.confidence; - -import org.broadinstitute.sting.utils.genotype.Genotype; - -import java.util.Comparator; - - -/** - * - * @author aaron - * - * Class ConfidenceScoreComparator - * - * A descriptions should go here. Blame aaron if it's missing. - */ - -public class ConfidenceScoreComparator implements Comparator { - @Override - public int compare(Genotype genotype, Genotype genotype1) { - return genotype.getConfidenceScore().compareTo(genotype1.getConfidenceScore()); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 88aa40f47..0155c0308 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -3,14 +3,12 @@ package org.broadinstitute.sting.utils.genotype.geli; import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.utils.genotype.GenotypeOutput; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.IndelLikelihood; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; +import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall; +import org.broadinstitute.sting.utils.genotype.*; import java.io.File; +import java.util.Arrays; /* @@ -68,14 +66,11 @@ public class GeliAdapter implements GenotypeWriter { * @param contig the contig you're calling in * @param position the position on the contig * @param referenceBase the reference base - * @param readDepth the read depth at the specified position * @param likelihoods the likelihoods of each of the possible alleles */ public void addGenotypeCall(SAMSequenceRecord contig, int position, - float rmsMapQuals, char referenceBase, - int readDepth, LikelihoodObject likelihoods) { writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase)); } @@ -102,10 +97,27 @@ public class GeliAdapter implements GenotypeWriter { * @param locus the locus to add */ @Override - public void addGenotypeCall(GenotypeOutput locus) { - SSGGenotypeCall call = (SSGGenotypeCall)locus; - LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); - this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj); + public void addGenotypeCall(Genotype locus) { + double likelihoods[]; + int readDepth = -1; + double nextVrsBest = 0; + double nextVrsRef = 0; + if (!(locus instanceof GenotypesBacked)) { + likelihoods = new double[10]; + Arrays.fill(likelihoods, Double.MIN_VALUE); + } else { + likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + + } + char ref = locus.getReference(); + + + SSGenotypeCall call = (SSGenotypeCall)locus; + LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); + this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()), + (int)locus.getLocation().getStart(), + ref, + obj); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index 79e7bbde1..6a516d253 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -1,23 +1,23 @@ package org.broadinstitute.sting.utils.genotype.geli; +import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.GenotypeOutput; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.*; import java.io.File; import java.io.FileNotFoundException; -import java.io.PrintWriter; import java.io.PrintStream; +import java.io.PrintWriter; +import java.util.Arrays; /** - * - * @author aaron - * - * Class GeliTextWriter - * - * A descriptions should go here. Blame aaron if it's missing. + * @author aaron + *

+ * Class GeliTextWriter + *

+ * write out the geli text file format containing genotype information */ public class GeliTextWriter implements GenotypeWriter { // where we write to @@ -25,6 +25,7 @@ public class GeliTextWriter implements GenotypeWriter { /** * create a geli text writer + * * @param file the file to write to */ public GeliTextWriter(File file) { @@ -48,28 +49,52 @@ public class GeliTextWriter implements GenotypeWriter { * * @param locus the locus to add */ - public void addGenotypeCall(GenotypeOutput locus) { - SSGGenotypeCall call = (SSGGenotypeCall)locus; + public void addGenotypeCall(Genotype locus) { + double likelihoods[]; + int readDepth = -1; + double nextVrsBest = 0; + double nextVrsRef = 0; - mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", - locus.getLocation().getContig(), - locus.getLocation().getStart(), - locus.getReferencebase(), - call.getReadDepth(), - -1, - locus.getBases(), - call.getBestRef(), - call.getBestNext(), - call.getLikelihoods()[0], - call.getLikelihoods()[1], - call.getLikelihoods()[2], - call.getLikelihoods()[3], - call.getLikelihoods()[4], - call.getLikelihoods()[5], - call.getLikelihoods()[6], - call.getLikelihoods()[7], - call.getLikelihoods()[8], - call.getLikelihoods()[9])); + char ref = locus.getReference(); + + if (locus instanceof ReadBacked) { + readDepth = ((ReadBacked)locus).getReadCount(); + } + if (!(locus instanceof GenotypesBacked)) { + likelihoods = new double[10]; + Arrays.fill(likelihoods, 0.0); + } else { + likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + double[] lks; + lks = Arrays.copyOf(likelihoods,likelihoods.length); + Arrays.sort(lks); + nextVrsBest = lks[9] - lks[8]; + if (ref != 'X') { + int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal()); + nextVrsRef = lks[9] - likelihoods[index]; + } + } + // we have to calcuate our own + + mWriter.println(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", + locus.getLocation().getContig(), + locus.getLocation().getStart(), + ref, + readDepth, + -1, + locus.getBases(), + nextVrsRef, + nextVrsBest, + likelihoods[0], + likelihoods[1], + likelihoods[2], + likelihoods[3], + likelihoods[4], + likelihoods[5], + likelihoods[6], + likelihoods[7], + likelihoods[8], + likelihoods[9])); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index d1f40f9ab..893b54a27 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -1,17 +1,18 @@ package org.broadinstitute.sting.utils.genotype.glf; +import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BlockCompressedOutputStream; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.genotype.GenotypeOutput; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.IndelLikelihood; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.genotype.*; import java.io.DataOutputStream; import java.io.File; +import java.util.Arrays; +import java.util.List; /* * Copyright (c) 2009 The Broad Institute * @@ -76,12 +77,12 @@ public class GLFWriter implements GenotypeWriter { /** * add a point genotype to the GLF writer * - * @param contig the name of the contig you're calling in - * @param refBase the reference base, as a char - * @param genomicLoc the location the location on the reference contig - * @param readDepth the read depth at the specified postion - * @param rmsMapQ the root mean square of the mapping quality - * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods + * @param contig the name of the contig you're calling in + * @param refBase the reference base, as a char + * @param genomicLoc the location the location on the reference contig + * @param readDepth the read depth at the specified postion + * @param rmsMapQ the root mean square of the mapping quality + * @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods */ public void addGenotypeCall(SAMSequenceRecord contig, int genomicLoc, @@ -90,14 +91,14 @@ public class GLFWriter implements GenotypeWriter { int readDepth, LikelihoodObject lhValues) { - // check if we've jumped to a new contig + // check if we've jumped to a new contig checkSequence(contig.getSequenceName(), contig.getSequenceLength()); SinglePointCall call = new SinglePointCall(refBase, - genomicLoc - lastPos, - readDepth, - (short) rmsMapQ, - lhValues.toDoubleArray()); + genomicLoc - lastPos, + readDepth, + (short) rmsMapQ, + lhValues.toDoubleArray()); lastPos = genomicLoc; call.write(this.outputBinaryCodec); } @@ -105,20 +106,50 @@ public class GLFWriter implements GenotypeWriter { /** * Add a genotype, given a genotype locus * - * @param locus + * @param locus the genotype called at a locus */ @Override - public void addGenotypeCall(GenotypeOutput locus) { - SSGGenotypeCall call = (SSGGenotypeCall)locus; - LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); + public void addGenotypeCall(Genotype locus) { + char ref = locus.getReference(); + + // get likelihood information if available + LikelihoodObject obj; + if (locus instanceof GenotypesBacked) { + obj = new LikelihoodObject(((LikelihoodsBacked) locus).getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); + } else { + double values[] = new double[10]; + Arrays.fill(values,-255.0); + obj = new LikelihoodObject(values, LikelihoodObject.LIKELIHOOD_TYPE.LOG); + } obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); // transform! ... to negitive log likelihoods - this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj); + + double rms = -1; + // calculate the RMS mapping qualities and the read depth + if (locus instanceof ReadBacked) { + rms = calculateRMS(((ReadBacked)locus).getReads()); + } + this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)rms,ref,((ReadBacked)locus).getReadCount(),obj); } + + /** + * calculate the rms , given the read pileup + * + * @param reads the read array + * + * @return the rms of the read mapping qualities + */ + private double calculateRMS(List reads) { + int[] qualities = new int[reads.size()]; + for (int i = 0; i < reads.size(); i++) { + qualities[i] = reads.get(i).getMappingQuality(); + } + return MathUtils.rms(qualities); + } /** * add a variable length (indel, deletion, etc) to the genotype writer * - * @param contig the name of the contig you're calling in + * @param contig the name of the contig you're calling in * @param refBase the reference base * @param genomicLoc the location on the reference contig * @param readDepth the read depth at the specified postion @@ -138,19 +169,19 @@ public class GLFWriter implements GenotypeWriter { // check if we've jumped to a new contig checkSequence(contig.getSequenceName(), contig.getSequenceLength()); - + // normalize the two VariableLengthCall call = new VariableLengthCall(refBase, - genomicLoc - lastPos, - readDepth, - (short) rmsMapQ, - firstHomZyg.getLikelihood(), - secondHomZyg.getLikelihood(), - hetLikelihood, - firstHomZyg.getLengthOfIndel(), - firstHomZyg.getIndelSequence(), - secondHomZyg.getLengthOfIndel(), - secondHomZyg.getIndelSequence()); + genomicLoc - lastPos, + readDepth, + (short) rmsMapQ, + firstHomZyg.getLikelihood(), + secondHomZyg.getLikelihood(), + hetLikelihood, + firstHomZyg.getLengthOfIndel(), + firstHomZyg.getIndelSequence(), + secondHomZyg.getLengthOfIndel(), + secondHomZyg.getIndelSequence()); lastPos = genomicLoc; call.write(this.outputBinaryCodec); } @@ -158,8 +189,7 @@ public class GLFWriter implements GenotypeWriter { /** * add a no call to the genotype file, if supported. * - * @param position the position - * + * @param position the position */ @Override public void addNoCall(int position) { @@ -170,12 +200,12 @@ public class GLFWriter implements GenotypeWriter { /** * add a GLF record to the output file * - * @param contigName the contig name + * @param contigName the contig name * @param contigLength the contig length - * @param rec the GLF record to write. + * @param rec the GLF record to write. */ public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) { - checkSequence(contigName,contigLength); + checkSequence(contigName, contigLength); rec.write(this.outputBinaryCodec); } @@ -200,12 +230,12 @@ public class GLFWriter implements GenotypeWriter { * check to see if we've jumped to a new contig * * @param sequenceName the name for the sequence - * @param seqLength the sequence length + * @param seqLength the sequence length */ private void checkSequence(String sequenceName, int seqLength) { if ((referenceSequenceName == null) || (!referenceSequenceName.equals(sequenceName))) { if (this.referenceSequenceName != null) { // don't write the record the first time - this.writeEndRecord(); + this.writeEndRecord(); } referenceSequenceName = sequenceName; referenceSequenceLength = seqLength; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java index 2d9823e12..879e3f08b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.utils.genotype.tabular; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.GenotypeOutput; +import org.broadinstitute.sting.utils.genotype.*; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; +import java.util.Arrays; /** @@ -43,7 +43,23 @@ public class TabularLFWriter implements GenotypeWriter { * @param locus the locus to add */ @Override - public void addGenotypeCall(GenotypeOutput locus) { + public void addGenotypeCall(Genotype locus) { + double likelihoods[]; + int readDepth = -1; + double nextVrsBest = 0; + double nextVrsRef = 0; + if (!(locus instanceof GenotypesBacked)) { + likelihoods = new double[10]; + Arrays.fill(likelihoods, Double.MIN_VALUE); + } else { + likelihoods = ((LikelihoodsBacked) locus).getLikelihoods(); + + } + char ref = locus.getReference(); + + if (locus instanceof ReadBacked) { + readDepth = ((ReadBacked)locus).getReadCount(); + } /** * This output is not correct, but I don't we even use this format anymore. If we do, someone * should change this code @@ -51,14 +67,14 @@ public class TabularLFWriter implements GenotypeWriter { outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s", locus.getLocation().toString(), "NOT OUTPUTED", - locus.getReferencebase(), + ref, locus.getBases(), locus.getBases(), -1, -1, - locus.getBestRef(), - locus.getBestNext(), - locus.getReadDepth(), + nextVrsRef, + nextVrsBest, + readDepth, locus.getBases())); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index bf536d849..a5a0882f1 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -180,7 +180,7 @@ public class VCFReader implements Iterator, Iterable { // check to ensure that the column count of tokens is right if (tokens.length != mHeader.getColumnCount()) { - throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length); + throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line); } int index = 0; @@ -261,10 +261,14 @@ public class VCFReader implements Iterator, Iterable { * @param bases the list of bases for this genotype call */ private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { - if (Integer.valueOf(alleleNumber) == 0) + int alleleValue = Integer.valueOf(alleleNumber); + // check to make sure the allele value is within bounds + if (alleleValue < 0 || alleleValue > altAlleles.length) + throw new IllegalArgumentException("VCFReader: the allele value of " + alleleValue + " is out of bounds given the alternate allele list."); + if (alleleValue == 0) bases.add(String.valueOf(referenceBase)); else - bases.add(altAlleles[Integer.valueOf(alleleNumber) - 1]); + bases.add(altAlleles[alleleValue - 1]); }