changes for the genotype overhaul. Lots of changes focusing on the output side, from single sample genotyper to the output file formats like GLF and geli. Of note the genotype formats are still emitting posteriors as likelihoods; this is the way we've been doing it but it may change soon.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1529 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-09-04 05:31:15 +00:00
parent 2241173fff
commit 3c2ae55859
23 changed files with 604 additions and 1183 deletions

View File

@ -1,359 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class OldAndBustedGenotypeLikelihoods {
protected static final double[] oneMinusData = new double[Byte.MAX_VALUE];
protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
//protected static final double[] oneHalfMinusData = new double[Byte.MAX_VALUE];
protected static final double log10Of1_3 = log10(1.0 / 3.0);
private boolean filterQ0Bases = true;
static {
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
double e = pow(10, (qual / -10.0));
oneMinusData[qual] = log10(1.0 - e);
oneHalfMinusDataArachne[qual] = log10(0.5 - e / 2.0);
oneHalfMinusData3Base[qual] = log10(0.5 - e / 2.0 + e / 6.0);
}
}
private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual];
}
private double getOneHalfMinusQual(final byte qual) {
return oneHalfMinusData[qual];
}
//public double[] likelihoods;
public int coverage;
// The genotype priors;
private double priorHomRef;
private double priorHet;
private double priorHomVar;
private double[] oneHalfMinusData;
public double[] likelihoods;
public boolean isThreeStateErrors() {
return threeStateErrors;
}
public void setThreeStateErrors(boolean threeStateErrors) {
this.threeStateErrors = threeStateErrors;
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
}
private boolean threeStateErrors = false;
private boolean discoveryMode = false;
public static double[] computePriors(double h) {
double[] pdbls = new double[3];
pdbls[0] = 1.0 - (3.0 * h / 2.0);
pdbls[1] = h;
pdbls[2] = h / 2.0;
return pdbls;
}
public final static String[] genotypes = new String[10];
static {
genotypes[0] = "AA";
genotypes[1] = "AC";
genotypes[2] = "AG";
genotypes[3] = "AT";
genotypes[4] = "CC";
genotypes[5] = "CG";
genotypes[6] = "CT";
genotypes[7] = "GG";
genotypes[8] = "GT";
genotypes[9] = "TT";
}
/**
* set the mode to discovery
* @param isInDiscoveryMode
*/
public void setDiscovery(boolean isInDiscoveryMode) {
discoveryMode = isInDiscoveryMode;
}
// Store the 2nd-best base priors for on-genotype primary bases
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
// Store the 2nd-best base priors for off-genotype primary bases
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
public OldAndBustedGenotypeLikelihoods(double heterozygosity) {
double[] vals = computePriors(heterozygosity);
initialize(vals[0], vals[1], vals[2]);
}
public OldAndBustedGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar);
}
/**
* Are we ignoring Q0 bases during calculations?
* @return
*/
public boolean isFilteringQ0Bases() {
return filterQ0Bases;
}
/**
* Enable / disable filtering of Q0 bases. Enabled by default
*
* @param filterQ0Bases
*/
public void filterQ0Bases(boolean filterQ0Bases) {
this.filterQ0Bases = filterQ0Bases;
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
this.priorHomRef = priorHomRef;
this.priorHet = priorHet;
this.priorHomVar = priorHomVar;
likelihoods = new double[10];
coverage = 0;
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
}
public double getHomRefPrior() {
return priorHomRef;
}
public void setHomRefPrior(double priorHomRef) {
this.priorHomRef = priorHomRef;
}
public double getHetPrior() {
return priorHet;
}
public void setHetPrior(double priorHet) {
this.priorHet = priorHet;
}
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
public int add(char ref, char read, byte qual)
{
if (qual <= 0) {
if ( isFilteringQ0Bases() ) {
return 0;
} else {
qual = 1;
}
}
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
{
likelihoods[i] = 0;
}
}
//System.out.printf("%s %s%n", OldAndBustedGenotypeLikelihoods.class, this.toString('A'));
for (int i = 0; i < genotypes.length; i++)
{
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
likelihoods[i] += likelihood;
coverage += 1;
}
//System.out.printf("%s %s%n", OldAndBustedGenotypeLikelihoods.class, this.toString('A'));
return 1;
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
if (qual == 0) {
// zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
}
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double p_base;
if ((h1 == h2) && (h1 == read)) {
// hom
p_base = getOneMinusQual(qual);
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
// het
p_base = getOneHalfMinusQual(qual);
} else if ( threeStateErrors ) {
// error
//System.out.printf("%s %c %c %b %f %f%n", genotype, h1, h2, h1 != h2, log10Of2_3, log10Of1_3 );
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
} else {
// error
p_base = qual / -10.0;
}
return p_base;
}
public String[] sorted_genotypes;
public double[] sorted_likelihoods;
public void sort() {
Integer[] permutation = Utils.SortPermutation(likelihoods);
Integer[] reverse_permutation = new Integer[permutation.length];
for (int i = 0; i < reverse_permutation.length; i++) {
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
}
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
}
public String toString(char ref) {
this.sort();
double sum = 0;
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
for (int i = 0; i < sorted_genotypes.length; i++) {
if (i != 0) {
s = s + " ";
}
s = s + sorted_genotypes[i] + ":" + String.format("%.10f", sorted_likelihoods[i]);
sum += Math.pow(10,sorted_likelihoods[i]);
}
s = s + String.format(" %f", sum);
return s;
}
public void applyPrior(char ref, double[] allele_likelihoods) {
int k = 0;
for (int i = 0; i < 4; i++) {
for (int j = i; j < 4; j++) {
if (i == j) {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
} else {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void applyPrior(char ref) {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
this.sort();
}
public double LodVsNextBest() {
this.sort();
return sorted_likelihoods[0] - sorted_likelihoods[1];
}
double ref_likelihood = Double.NaN;
public double LodVsRef(char ref) {
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
ref_likelihood = sorted_likelihoods[0];
return (-1.0 * this.LodVsNextBest());
} else {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
ref_likelihood = likelihoods[i];
}
}
}
return sorted_likelihoods[0] - ref_likelihood;
}
public String BestGenotype() {
this.sort();
return this.sorted_genotypes[0];
}
public double BestPosterior() {
this.sort();
return this.sorted_likelihoods[0];
}
public double RefPosterior(char ref) {
this.LodVsRef(ref);
return this.ref_likelihood;
}
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
*
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
* @param ref the reference base
* @param pileup a pileup of the reads, containing the reads and their offsets
*
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) {
SAMRecord read = pileup.getReads().get(i);
squared += read.getMappingQuality() * read.getMappingQuality();
int offset = pileup.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
add(ref, base, qual);
}
// save off the likelihoods
if (likelihoods == null || likelihoods.length == 0) return null;
// Apply the two calculations
applyPrior(ref);
// lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>();
for (int x = 0; x < this.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
}
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
}
}

View File

@ -1,302 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.*;
/**
* @author aaron
* <p/>
* Class GenotypeCallImpl
* <p/>
* The single sample genotypers implementation of the genotype call, which contains
* extra information for the various genotype outputs
*/
public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
// our stored genotype locus
private final String mRefBase;
private final int mPloidy;
private TreeMap<Double, Genotype> mGenotypes = new TreeMap();
private double mLikelihoods[];
private double bestNext = 0;
private double bestRef = 0;
private int readDepth = 0;
private double rmsMapping = 0;
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
* @param discoveryMode are we in discovery mode or genotyping mode
* @param mRefBase the ref base
* @param mPloidy the ploidy
* @param genotypes the genotype
* @param likelihoods the likelihood
* @param pileup the pile-up of reads at the specified locus
*/
public SSGGenotypeCall(boolean discoveryMode, char mRefBase, int mPloidy, List<Genotype> genotypes, double likelihoods[], ReadBackedPileup pileup) {
this.mRefBase = String.valueOf(mRefBase).toUpperCase();
this.mPloidy = mPloidy;
if (genotypes.size() < 1) throw new IllegalArgumentException("Genotypes list size must be greater than 0");
String refStr = Utils.dupString(mRefBase, mPloidy).toUpperCase();
calculateBestNext(discoveryMode, genotypes, likelihoods, refStr);
this.readDepth = pileup.getReads().size();
rmsMapping = calculateRMS(pileup);
}
/**
* calculate the best and next best, and update the stored confidence scores based on these values
*
* @param discoveryMode are we in discovery mode?
* @param genotypes the genotypes
* @param likelihoods the likelihoods corresponding to the genotypes
* @param refStr the reference string, i.e. reb base[ploidy]
*/
private void calculateBestNext(boolean discoveryMode, List<Genotype> genotypes, double[] likelihoods, String refStr) {
int index = 0;
double ref = 0.0;
double best = Double.NEGATIVE_INFINITY;
double next = Double.NEGATIVE_INFINITY;
for (Genotype g : genotypes) {
if (g.getBases().toUpperCase().equals(refStr)) ref = likelihoods[index];
if (likelihoods[index] > best) {
next = best;
best = likelihoods[index];
} else if (likelihoods[index] > next) next = likelihoods[index];
index++;
}
bestNext = Math.abs(best - next);
bestRef = Math.abs(best - ref);
mLikelihoods = likelihoods;
index = 0;
// reset the confidence based on either the discovery mode or the genotype mode
for (Genotype g : genotypes) {
double val = (discoveryMode) ? Math.abs(likelihoods[index] - ref) : Math.abs(likelihoods[index] - next);
((BasicGenotype) g).setConfidenceScore(new BayesianConfidenceScore(val));
mGenotypes.put(likelihoods[index], g);
index++;
}
}
@Override
public boolean equals(Object other) {
if(other == null)
return false;
if(other instanceof SSGGenotypeCall) {
SSGGenotypeCall otherCall = (SSGGenotypeCall)other;
boolean likelihoodsMatch = true;
for ( int i = 0; i < mLikelihoods.length; i++ ) {
likelihoodsMatch = likelihoodsMatch && MathUtils.compareDoubles(mLikelihoods[i], otherCall.mLikelihoods[i]) == 0;
}
//System.out.printf("likelihoodsMatch = %b%n", likelihoodsMatch);
return this.mRefBase.equals(otherCall.mRefBase) &&
this.mPloidy == otherCall.mPloidy &&
MathUtils.compareDoubles(this.bestNext, otherCall.bestNext) == 0 &&
MathUtils.compareDoubles(this.bestRef, otherCall.bestRef) == 0 &&
this.readDepth == otherCall.readDepth &&
MathUtils.compareDoubles(this.rmsMapping, otherCall.rmsMapping) == 0 &&
likelihoodsMatch;
}
return false;
}
public String toString() {
return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s",
getLocation(), mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods));
}
/**
* calculate the rms , given the read pileup
*
* @param pileup
*
* @return
*/
private double calculateRMS(ReadBackedPileup pileup) {
List<SAMRecord> reads = pileup.getReads();
int[] qualities = new int[reads.size()];
for (int i=0; i < reads.size(); i++)
qualities[i] = reads.get(i).getMappingQuality();
return MathUtils.rms(qualities);
}
/**
* gets the reference base
*
* @return the reference base we represent
*/
@Override
public char getReferencebase() {
return mRefBase.charAt(0);
}
/**
* check to see if this called genotype is a variant, i.e. not homozygous reference
*
* @return true if it's not hom ref, false otherwise
*/
@Override
public boolean isVariation() {
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(getReferencebase());
}
/**
* get the confidence score
*
* @return get the confidence score that we're based on
*/
@Override
public ConfidenceScore getConfidenceScore() {
return mGenotypes.get(mGenotypes.descendingKeySet().first()).getConfidenceScore();
}
/**
* get the bases that represent this
*
* @return the bases, as a string
*/
@Override
public String getBases() {
return mGenotypes.get(mGenotypes.descendingKeySet().first()).getBases();
}
/**
* get the ploidy
*
* @return the ploidy value
*/
@Override
public int getPloidy() {
return this.mPloidy;
}
/**
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
*
* @return true if we're homozygous, false otherwise
*/
@Override
public boolean isHom() {
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHom();
}
/**
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
*
* @return true if we're het, false otherwise
*/
@Override
public boolean isHet() {
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHet();
}
/**
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location
*
* @return position on the genome wrapped in GenomeLoc object
*/
@Override
public GenomeLoc getLocation() {
return mGenotypes.get(mGenotypes.descendingKeySet().first()).getLocation();
}
/**
* returns true if the genotype is a point genotype, false if it's a indel / deletion
*
* @return true is a SNP
*/
@Override
public boolean isPointGenotype() {
return true;
}
/**
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base or bases
*
* @return true if we're a variant
*/
@Override
public boolean isVariant(char ref) {
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(ref);
}
/**
* return this genotype as a variant
*
* @return
*/
@Override
public Variant toVariant() {
return null; //To change body of implemented methods use File | Settings | File Templates.
}
/**
* get the genotypes, sorted in asscending order by their ConfidenceScores (the best
* to the worst ConfidenceScores)
*
* @return a list of the genotypes, sorted by ConfidenceScores
*/
@Override
public List<Genotype> getGenotypes() {
List<Genotype> newList = new ArrayList<Genotype>();
newList.addAll(this.mGenotypes.values());
return newList;
}
/**
* get the genotypes sorted lexigraphically
*
* @return a list of the genotypes sorted lexi
*/
@Override
public List<Genotype> getLexigraphicallySortedGenotypes() {
List<Genotype> newList = new ArrayList<Genotype>();
newList.addAll(this.mGenotypes.values());
Collections.sort(newList, new LexigraphicalComparator());
return newList;
}
/**
* return the likelihoods as a double array, in lexographic order
*
* @return the likelihoods
*/
public double[] getLikelihoods() {
return this.mLikelihoods;
}
public int getReadDepth() {
return readDepth;
}
public double getRmsMapping() {
return rmsMapping;
}
public double getBestNext() {
return bestNext;
}
public double getBestRef() {
return bestRef;
}
}

View File

@ -0,0 +1,279 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.Arrays;
import java.util.List;
/**
* @author aaron
* <p/>
* Class SSGenotypeCall
* <p/>
* The single sample implementation of the genotype interface, which contains
* extra information for the various genotype outputs
*/
public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked {
private final char mRefBase;
private final GenotypeLikelihoods mGenotypeLikelihoods;
// the next two values are filled in on-demand. Default to -1 since they can never be negitive
private final GenomeLoc mLocation;
private final ReadBackedPileup mPileup;
// if this is null, we were constructed with the intention that we'd represent the best genotype
private DiploidGenotype mGenotype = null;
// which genotype to compare to; if we're in discovery mode it's the ref allele, otherwise it's the next best
private DiploidGenotype mCompareTo = null;
// are we best vrs ref or best vrs next - for internal consumption only
private final boolean mBestVrsRef;
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
* @param discovery are we representing the best vrs next or best vrs ref
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
*/
public SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
mBestVrsRef = discovery;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh;
mLocation = location;
mPileup = pileup;
}
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
* @param discovery are we representing the best vrs next or best vrs ref
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
*/
SSGenotypeCall(boolean discovery, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
mBestVrsRef = discovery;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh;
mLocation = location;
mGenotype = genotype;
mPileup = pileup;
}
@Override
public boolean equals(Object other) {
if (other == null)
return false;
if (other instanceof SSGenotypeCall) {
SSGenotypeCall otherCall = (SSGenotypeCall) other;
if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) {
return false;
}
if (!this.mGenotype.equals(otherCall.mGenotype))
return false;
return (this.mRefBase == otherCall.mRefBase) &&
this.mPileup.equals(mPileup);
}
return false;
}
public String toString() {
return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f likelihoods=%s",
getLocation(), mRefBase, mPileup.getReads().size(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
}
/**
* get the confidence we have
*
* @return get the one minus error value
*/
@Override
public double getLog10PError() {
getBestGenotype();
getAltGenotype();
return Math.abs(mGenotypeLikelihoods.getPosterior(mGenotype) - mGenotypeLikelihoods.getPosterior(mCompareTo));
}
/**
* get the best genotype
*/
public DiploidGenotype getBestGenotype() {
if (mGenotype == null) {
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
mGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
}
return mGenotype;
}
/**
* get the alternate genotype
*/
public DiploidGenotype getAltGenotype() {
if (mCompareTo == null) {
if (this.mBestVrsRef) {
mCompareTo = DiploidGenotype.valueOf(Utils.dupString(this.getReference(),2));
} else {
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
mCompareTo = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
}
}
return mCompareTo;
}
/**
* get the bases that represent this
*
* @return the bases, as a string
*/
@Override
public String getBases() {
return getBestGenotype().toString();
}
/**
* get the ploidy
*
* @return the ploidy value
*/
@Override
public int getPloidy() {
return 2;
}
/**
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
*
* @return true if we're homozygous, false otherwise
*/
@Override
public boolean isHom() {
return getBestGenotype().isHom();
}
/**
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
*
* @return true if we're het, false otherwise
*/
@Override
public boolean isHet() {
return !isHom();
}
/**
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location
*
* @return position on the genome wrapped in GenomeLoc object
*/
@Override
public GenomeLoc getLocation() {
return this.mLocation;
}
/**
* returns true if the genotype is a point genotype, false if it's a indel / deletion
*
* @return true is a SNP
*/
@Override
public boolean isPointGenotype() {
return true;
}
/**
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base or bases
*
* @return true if we're a variant
*/
@Override
public boolean isVariant(char ref) {
return !Utils.dupString(this.getReference(),2).equals(getBestGenotype().toString());
}
/**
* return this genotype as a variant
*
* @return
*/
public Variant toVariation() {
return null; // the next step is to implement the variant system
}
/**
* return the likelihoods as a double array, in lexographic order
*
* @return the likelihoods
*/
public double[] getProbabilities() {
return this.mGenotypeLikelihoods.getPosteriors();
}
/**
* get the SAM records that back this genotype call
*
* @return a list of SAM records
*/
public List<SAMRecord> getReads() {
return this.mPileup.getReads();
}
/**
* get the count of reads
*
* @return the number of reads we're backed by
*/
public int getReadCount() {
return this.mPileup.getReads().size();
}
/**
* get the reference character
*
* @return
*/
public char getReference() {
return this.mRefBase;
}
/**
* get the likelihoods
*
* @return an array in lexigraphical order of the likelihoods
*/
public Genotype getGenotype(DiploidGenotype x) {
return new SSGenotypeCall(mBestVrsRef,mLocation,mRefBase,mGenotypeLikelihoods,mPileup,x);
}
/**
* get the likelihood information for this
*
* @return
*/
public double[] getLikelihoods() {
// TODO: this is wrong, obviously, but we've kept it for now to stay backward compatible with previous calls
return this.mGenotypeLikelihoods.getPosteriors();
}
}

View File

@ -5,23 +5,18 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import java.io.File;
import java.util.List;
import java.util.ArrayList;
@ReadFilters(ZeroMappingQualityReadFilter.class)
public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, GenotypeWriter> {
public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, GenotypeWriter> {
// Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false)
public File VARIANTS_FILE = null;
@ -74,7 +69,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
* @param refContext the reference base
* @param context contextual information around the locus
*/
public SSGGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
public SSGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
char ref = refContext.getBase();
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
@ -86,14 +81,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
gl.add(pileup, true);
gl.validate();
// lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>();
for ( DiploidGenotype g : DiploidGenotype.values() ) {
lst.add(new BasicGenotype(pileup.getLocation(), g.toString(),new BayesianConfidenceScore(gl.getLikelihood(g))));
}
//System.out.printf("At %s%n", new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, g.getPosteriors(), pileup));
return new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, gl.getPosteriors(), pileup);
return new SSGenotypeCall(!GENOTYPE, context.getLocation(), ref,gl, pileup);
}
/**
@ -138,11 +126,9 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
*
* @return an empty string
*/
public GenotypeWriter reduce(SSGGenotypeCall call, GenotypeWriter sum) {
if (call != null && (GENOTYPE || call.isVariation())) {
//System.out.printf("Score %s%n", call.getConfidenceScore());
if (call.getConfidenceScore().getScore() > LOD_THRESHOLD) {
//System.out.printf("Call %s%n", call);
public GenotypeWriter reduce(SSGenotypeCall call, GenotypeWriter sum) {
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
if (call.getLog10PError() >= LOD_THRESHOLD) {
sum.addGenotypeCall(call);
}
}

View File

@ -6,14 +6,17 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodGFF;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ListUtils;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
@ -77,9 +80,9 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
GenotypeCall call = SSG.map(tracker, ref, subContext);
SSGenotypeCall call = SSG.map(tracker, ref, subContext);
String callType = (call.isVariation()) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
if (call != null) {
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
}
@ -104,27 +107,52 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
}
// a method to support getting the geli string, since the AlleleFrequencyEstimate is going away
public String toGeliString (GenotypeCall locus) {
SSGGenotypeCall call = (SSGGenotypeCall)locus;
return String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
call.getLocation().getContig(),
call.getLocation().getStart(),
call.getReferencebase(),
call.getReadDepth(),
-1,
call.getBases(),
call.getBestRef(),
call.getBestNext(),
call.getLikelihoods()[0],
call.getLikelihoods()[1],
call.getLikelihoods()[2],
call.getLikelihoods()[3],
call.getLikelihoods()[4],
call.getLikelihoods()[5],
call.getLikelihoods()[6],
call.getLikelihoods()[7],
call.getLikelihoods()[8],
call.getLikelihoods()[9]);
public String toGeliString (Genotype locus) {
double likelihoods[];
int readDepth = -1;
double nextVrsBest = 0;
double nextVrsRef = 0;
char ref = locus.getReference();
if (locus instanceof ReadBacked) {
readDepth = ((ReadBacked)locus).getReadCount();
}
if (!(locus instanceof GenotypesBacked)) {
likelihoods = new double[10];
Arrays.fill(likelihoods, 0.0);
} else {
likelihoods = ((LikelihoodsBacked) locus).getLikelihoods();
double[] lks;
lks = Arrays.copyOf(likelihoods,likelihoods.length);
Arrays.sort(lks);
nextVrsBest = lks[9] - lks[8];
if (ref != 'X') {
int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal());
nextVrsRef = lks[9] - likelihoods[index];
}
}
// we have to calcuate our own
return new String(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
locus.getLocation().getContig(),
locus.getLocation().getStart(),
ref,
readDepth,
-1,
locus.getBases(),
nextVrsRef,
nextVrsBest,
likelihoods[0],
likelihoods[1],
likelihoods[2],
likelihoods[3],
likelihoods[4],
likelihoods[5],
likelihoods[6],
likelihoods[7],
likelihoods[8],
likelihoods[9]));
}
}

View File

@ -1,21 +1,19 @@
package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.io.PrintWriter;
import java.util.Set;
import java.util.List;
import java.util.LinkedList;
import java.util.ArrayList;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileWriter;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
/**
* Created by IntelliJ IDEA.
@ -252,11 +250,11 @@ public class ArtificialPoolContext {
}
public String genotypeAndConfidenceToString(int group, String spacer) {
GenotypeCall call = this.getGenotypeCall(group);
return (call.getGenotypes() + spacer + call.getConfidenceScore().toString());
Genotype call = this.getGenotype(group);
return (call.getBases() + spacer + call.getLog10PError()); // TODO: fix me
}
public GenotypeCall getGenotypeCall(int group) {
public Genotype getGenotype(int group) {
AlignmentContext alicon = this.getAlignmentContext();
Pair<List<SAMRecord>[],List<Integer>[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets());
return ssg.map(this.getRefMetaDataTracker(),this.getReferenceContext(),

View File

@ -1,169 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
/**
* @author aaron
* <p/>
* Class BasicGenotype
* <p/>
* A basic implementation of the genotype interface
*/
public class BasicGenotype implements Genotype {
// the bases that represent this genotype
private String mBases = "";
// the ploidy, assume 2 unless told otherwise
private int mPloidy = 2;
// the confidence score
protected ConfidenceScore mConfidenceScore;
// our location
private GenomeLoc mLoc;
/**
* construct a genotypeLikelihood, given the bases, the confidence score, and the ploidy
*
* @param loc the location of the genotype
* @param bases the bases that make up this genotype
* @param ploidy the ploidy of this genotype
* @param score the confidence score
*/
public BasicGenotype(GenomeLoc loc, String bases, int ploidy, ConfidenceScore score) {
this.mPloidy = ploidy;
if (bases.length() != ploidy) {
throw new IllegalArgumentException("The number of bases should match the ploidy");
}
this.mBases = bases;
this.mConfidenceScore = score;
this.mLoc = loc;
}
/**
* construct a genotypeLikelihood, given the bases and the confidence score, and assume the
* ploidy is 2.
*
* @param loc the location of the genotype
* @param bases the bases that make up this genotype
* @param score the confidence score
*/
public BasicGenotype(GenomeLoc loc, String bases, ConfidenceScore score) {
if (bases.length() != mPloidy) {
throw new IllegalArgumentException("The number of bases should match the ploidy");
}
this.mBases = bases;
this.mConfidenceScore = score;
this.mLoc = loc;
}
/**
* get the confidence score
*
* @return get the confidence score that we're based on
*/
public ConfidenceScore getConfidenceScore() {
return this.mConfidenceScore;
}
/**
* get the bases that represent this
*
* @return the bases, as a string
*/
public String getBases() {
return mBases;
}
/**
* get the ploidy
*
* @return the ploidy value
*/
public int getPloidy() {
return mPloidy;
}
/**
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
*
* @return true if we're homozygous, false otherwise
*/
public boolean isHom() {
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
char first = mBases.charAt(0);
for (char c : mBases.toCharArray()) {
if (c != first) return false;
}
return true;
}
/**
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
*
* @return true if we're het, false otherwise
*/
public boolean isHet() {
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
char first = mBases.charAt(0);
for (char c : mBases.toCharArray()) {
if (c != first) return true;
}
return false;
}
/**
* get the genotype's location
*
* @return a GenomeLoc representing the location
*/
public GenomeLoc getLocation() {
return mLoc;
}
/**
* returns true if the genotype is a point genotype, false if it's a indel / deletion
*
* @return true is a SNP
*/
@Override
public boolean isPointGenotype() {
return true;
}
/**
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base
*
* @return true if we're a variant
*/
@Override
public boolean isVariant(char ref) {
String ret = Utils.dupString(ref, this.getPloidy());
return !this.getBases().equals(ret);
}
/**
* return this genotype as a variant
*
* @return
*/
@Override
public Variant toVariant() {
return null;
}
/**
* set the confidence score
* @param confidenceScore
*/
public void setConfidenceScore(ConfidenceScore confidenceScore) {
this.mConfidenceScore = confidenceScore;
}
}

View File

@ -1,8 +1,6 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
import org.broadinstitute.sting.utils.genotype.Variant;
/**
* @author aaron
@ -13,11 +11,11 @@ import org.broadinstitute.sting.utils.genotype.Variant;
*/
public interface Genotype {
/**
* get the confidence score
* get the -1 * (log 10 of the error value)
*
* @return get the confidence score that we're based on
* @return the log based error estimate
*/
public ConfidenceScore getConfidenceScore();
public double getLog10PError();
/**
* get the bases that represent this
@ -71,16 +69,16 @@ public interface Genotype {
public boolean isVariant(char ref);
/**
* return this genotype as a variant
*
* @return
* get the reference base.
* @return a character, representing the reference base
*/
public Variant toVariant();
public char getReference();
/**
* return a readable string representation of this genotype
* return this genotype as a variant
*
* @return
* @return the variant
*/
public String toString();
public Variant toVariation();
}

View File

@ -1,45 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author aaron
* <p/>
* Interface GenotypeOutputFormat
* <p/>
* This interface is in adition to the GenotypeCall interface,
* but adds the required functionality that output (GenotypeWriter) interfaces
* need. It's fair game to return 0 or any value for these fields, but the methods
* are all used by various output writers
*/
public interface GenotypeOutput extends GenotypeCall {
/**
* return the likelihoods as a double array, in lexographic order
*
* @return the likelihoods
*/
public double[] getLikelihoods();
/**
* get the depth of the reads at this location
* @return the depth
*/
public int getReadDepth();
/**
* get the rms mapping qualities
* @return
*/
public double getRmsMapping();
/**
* get the best to the next best genotype
* @return
*/
public double getBestNext();
/**
* get the best compaired to the reference score
* @return
*/
public double getBestRef();
}

View File

@ -38,7 +38,7 @@ public interface GenotypeWriter {
* Add a genotype, given a genotype locus
* @param locus the locus to add
*/
public void addGenotypeCall(GenotypeOutput locus);
public void addGenotypeCall(Genotype locus);
/**
* add a no call to the genotype file, if supported.

View File

@ -0,0 +1,19 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
/**
* @author aaron
*
* Interface LikelihoodsBacked
*
* indicate that this genotype was called, and contains the other genotypes with their
* associtated information.
*/
public interface GenotypesBacked {
/**
* get the likelihoods
* @return an array in lexigraphical order of the likelihoods
*/
public Genotype getGenotype(DiploidGenotype x);
}

View File

@ -60,11 +60,6 @@ public class LikelihoodObject {
NEGITIVE_LOG, LOG, RAW;
}
// our qhet and qstar values; wait, what?
// TODO: are these really needed here? We have them to support the tabular output format only.
//private double qhat;
//private double qstar;
// our liklihood storage type
protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGITIVE_LOG;

View File

@ -0,0 +1,17 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author aaron
* Interface LikelihoodsBacked
*
* this interface indicates that the genotype is
* backed up by genotype likelihood information.
*/
public interface LikelihoodsBacked {
/**
* get the likelihood information for this
* @return
*/
public double[] getLikelihoods();
}

View File

@ -0,0 +1,26 @@
package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMRecord;
import java.util.List;
/**
* @author aaron
*
* Interface ReadBacked
*
* This interface indicates that a genotype or variant is backed by reads
*/
public interface ReadBacked {
/**
* get the SAM records that back this genotype call
* @return a list of SAM records
*/
public List<SAMRecord> getReads();
/**
* get the count of reads
* @return the number of reads we're backed by
*/
public int getReadCount();
}

View File

@ -1,8 +1,6 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
/**
* @author aaron
* <p/>
@ -23,13 +21,6 @@ public interface Variant {
*/
public double getNonRefAlleleFrequency();
/**
* get the confidence score for this variance
*
* @return the confidence score
*/
public ConfidenceScore getConfidenceScore();
/** @return the VARIANT_TYPE of the current variant */
public VARIANT_TYPE getType();

View File

@ -1,36 +0,0 @@
package org.broadinstitute.sting.utils.genotype.confidence;
/**
*
* @author aaron
*
* Class LikelihoodConfidenceScore
*
* A descriptions should go here. Blame aaron if it's missing.
*/
public class BayesianConfidenceScore extends ConfidenceScore {
public BayesianConfidenceScore(double score) {
super(score);
}
/**
* return the confidence method we're employing, UNKNOWN is an option
*
* @return the method of confidence we represent
*/
@Override
public SCORE_METHOD getConfidenceType() {
return SCORE_METHOD.LOD_SCORE;
}
/**
* get the confidence score, normalized to the range of [0-1]
*
* @return a confidence score
*/
@Override
public double normalizedConfidenceScore() {
return Math.pow(10,this.getScore());
}
}

View File

@ -1,70 +0,0 @@
package org.broadinstitute.sting.utils.genotype.confidence;
/**
* @author aaron
* <p/>
* Class ConfidenceScore
* <p/>
* this class represents the confidence in a genotype, and the method we used to obtain it
*/
public abstract class ConfidenceScore implements Comparable<ConfidenceScore> {
// the general error of a floating point value
private static final Double EPSILON = 1.0e-15;
/**
* the method we use to generate this confidence
*/
public enum SCORE_METHOD {
LOD_SCORE, CHIP, UNKNOWN;
}
private Double mScore;
public ConfidenceScore(double score) {
this.mScore = score;
}
/**
* compare this ConfidenceScore to another, throwing an exception if they're not the same scoring method
* @param o the other confidence score if
* @return 0 if equal
*/
@Override
public int compareTo(ConfidenceScore o) {
if (o.getConfidenceType() != this.getConfidenceType()) {
throw new UnsupportedOperationException("Attemped to compare Confidence scores with different methods");
}
double diff = mScore - o.mScore;
if (Math.abs(diff) < (EPSILON * Math.abs(mScore)))
return 0;
else if (diff < 0)
return -1;
else
return 1;
}
/**
* get the score
* @return a double representing the genotype score
*/
public Double getScore() {
return mScore;
}
/**
* return the confidence method we're employing, UNKNOWN is an option
* @return the method of confidence we represent
*/
public abstract SCORE_METHOD getConfidenceType();
/**
* get the confidence score, normalized to the range of [0-1]
* @return a confidence score
*/
public abstract double normalizedConfidenceScore();
public String toString() {
return String.format("%.2f %s", getScore(), getConfidenceType().toString());
}
}

View File

@ -1,22 +0,0 @@
package org.broadinstitute.sting.utils.genotype.confidence;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.Comparator;
/**
*
* @author aaron
*
* Class ConfidenceScoreComparator
*
* A descriptions should go here. Blame aaron if it's missing.
*/
public class ConfidenceScoreComparator implements Comparator<Genotype> {
@Override
public int compare(Genotype genotype, Genotype genotype1) {
return genotype.getConfidenceScore().compareTo(genotype1.getConfidenceScore());
}
}

View File

@ -3,14 +3,12 @@ package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall;
import org.broadinstitute.sting.utils.genotype.*;
import java.io.File;
import java.util.Arrays;
/*
@ -68,14 +66,11 @@ public class GeliAdapter implements GenotypeWriter {
* @param contig the contig you're calling in
* @param position the position on the contig
* @param referenceBase the reference base
* @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles
*/
public void addGenotypeCall(SAMSequenceRecord contig,
int position,
float rmsMapQuals,
char referenceBase,
int readDepth,
LikelihoodObject likelihoods) {
writer.addGenotypeLikelihoods(likelihoods.convert(writer.getFileHeader(), 1, position, (byte) referenceBase));
}
@ -102,10 +97,27 @@ public class GeliAdapter implements GenotypeWriter {
* @param locus the locus to add
*/
@Override
public void addGenotypeCall(GenotypeOutput locus) {
SSGGenotypeCall call = (SSGGenotypeCall)locus;
LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj);
public void addGenotypeCall(Genotype locus) {
double likelihoods[];
int readDepth = -1;
double nextVrsBest = 0;
double nextVrsRef = 0;
if (!(locus instanceof GenotypesBacked)) {
likelihoods = new double[10];
Arrays.fill(likelihoods, Double.MIN_VALUE);
} else {
likelihoods = ((LikelihoodsBacked) locus).getLikelihoods();
}
char ref = locus.getReference();
SSGenotypeCall call = (SSGenotypeCall)locus;
LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),
(int)locus.getLocation().getStart(),
ref,
obj);
}
/**

View File

@ -1,23 +1,23 @@
package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.Arrays;
/**
*
* @author aaron
*
* Class GeliTextWriter
*
* A descriptions should go here. Blame aaron if it's missing.
* @author aaron
* <p/>
* Class GeliTextWriter
* <p/>
* write out the geli text file format containing genotype information
*/
public class GeliTextWriter implements GenotypeWriter {
// where we write to
@ -25,6 +25,7 @@ public class GeliTextWriter implements GenotypeWriter {
/**
* create a geli text writer
*
* @param file the file to write to
*/
public GeliTextWriter(File file) {
@ -48,28 +49,52 @@ public class GeliTextWriter implements GenotypeWriter {
*
* @param locus the locus to add
*/
public void addGenotypeCall(GenotypeOutput locus) {
SSGGenotypeCall call = (SSGGenotypeCall)locus;
public void addGenotypeCall(Genotype locus) {
double likelihoods[];
int readDepth = -1;
double nextVrsBest = 0;
double nextVrsRef = 0;
mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
locus.getLocation().getContig(),
locus.getLocation().getStart(),
locus.getReferencebase(),
call.getReadDepth(),
-1,
locus.getBases(),
call.getBestRef(),
call.getBestNext(),
call.getLikelihoods()[0],
call.getLikelihoods()[1],
call.getLikelihoods()[2],
call.getLikelihoods()[3],
call.getLikelihoods()[4],
call.getLikelihoods()[5],
call.getLikelihoods()[6],
call.getLikelihoods()[7],
call.getLikelihoods()[8],
call.getLikelihoods()[9]));
char ref = locus.getReference();
if (locus instanceof ReadBacked) {
readDepth = ((ReadBacked)locus).getReadCount();
}
if (!(locus instanceof GenotypesBacked)) {
likelihoods = new double[10];
Arrays.fill(likelihoods, 0.0);
} else {
likelihoods = ((LikelihoodsBacked) locus).getLikelihoods();
double[] lks;
lks = Arrays.copyOf(likelihoods,likelihoods.length);
Arrays.sort(lks);
nextVrsBest = lks[9] - lks[8];
if (ref != 'X') {
int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal());
nextVrsRef = lks[9] - likelihoods[index];
}
}
// we have to calcuate our own
mWriter.println(String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
locus.getLocation().getContig(),
locus.getLocation().getStart(),
ref,
readDepth,
-1,
locus.getBases(),
nextVrsRef,
nextVrsBest,
likelihoods[0],
likelihoods[1],
likelihoods[2],
likelihoods[3],
likelihoods[4],
likelihoods[5],
likelihoods[6],
likelihoods[7],
likelihoods[8],
likelihoods[9]));
}
/**

View File

@ -1,17 +1,18 @@
package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.*;
import java.io.DataOutputStream;
import java.io.File;
import java.util.Arrays;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -76,12 +77,12 @@ public class GLFWriter implements GenotypeWriter {
/**
* add a point genotype to the GLF writer
*
* @param contig the name of the contig you're calling in
* @param refBase the reference base, as a char
* @param genomicLoc the location the location on the reference contig
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
* @param contig the name of the contig you're calling in
* @param refBase the reference base, as a char
* @param genomicLoc the location the location on the reference contig
* @param readDepth the read depth at the specified postion
* @param rmsMapQ the root mean square of the mapping quality
* @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
*/
public void addGenotypeCall(SAMSequenceRecord contig,
int genomicLoc,
@ -90,14 +91,14 @@ public class GLFWriter implements GenotypeWriter {
int readDepth,
LikelihoodObject lhValues) {
// check if we've jumped to a new contig
// check if we've jumped to a new contig
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
SinglePointCall call = new SinglePointCall(refBase,
genomicLoc - lastPos,
readDepth,
(short) rmsMapQ,
lhValues.toDoubleArray());
genomicLoc - lastPos,
readDepth,
(short) rmsMapQ,
lhValues.toDoubleArray());
lastPos = genomicLoc;
call.write(this.outputBinaryCodec);
}
@ -105,20 +106,50 @@ public class GLFWriter implements GenotypeWriter {
/**
* Add a genotype, given a genotype locus
*
* @param locus
* @param locus the genotype called at a locus
*/
@Override
public void addGenotypeCall(GenotypeOutput locus) {
SSGGenotypeCall call = (SSGGenotypeCall)locus;
LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
public void addGenotypeCall(Genotype locus) {
char ref = locus.getReference();
// get likelihood information if available
LikelihoodObject obj;
if (locus instanceof GenotypesBacked) {
obj = new LikelihoodObject(((LikelihoodsBacked) locus).getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
} else {
double values[] = new double[10];
Arrays.fill(values,-255.0);
obj = new LikelihoodObject(values, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
}
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); // transform! ... to negitive log likelihoods
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRmsMapping(),locus.getReferencebase(),locus.getReadDepth(),obj);
double rms = -1;
// calculate the RMS mapping qualities and the read depth
if (locus instanceof ReadBacked) {
rms = calculateRMS(((ReadBacked)locus).getReads());
}
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)rms,ref,((ReadBacked)locus).getReadCount(),obj);
}
/**
* calculate the rms , given the read pileup
*
* @param reads the read array
*
* @return the rms of the read mapping qualities
*/
private double calculateRMS(List<SAMRecord> reads) {
int[] qualities = new int[reads.size()];
for (int i = 0; i < reads.size(); i++) {
qualities[i] = reads.get(i).getMappingQuality();
}
return MathUtils.rms(qualities);
}
/**
* add a variable length (indel, deletion, etc) to the genotype writer
*
* @param contig the name of the contig you're calling in
* @param contig the name of the contig you're calling in
* @param refBase the reference base
* @param genomicLoc the location on the reference contig
* @param readDepth the read depth at the specified postion
@ -138,19 +169,19 @@ public class GLFWriter implements GenotypeWriter {
// check if we've jumped to a new contig
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
// normalize the two
VariableLengthCall call = new VariableLengthCall(refBase,
genomicLoc - lastPos,
readDepth,
(short) rmsMapQ,
firstHomZyg.getLikelihood(),
secondHomZyg.getLikelihood(),
hetLikelihood,
firstHomZyg.getLengthOfIndel(),
firstHomZyg.getIndelSequence(),
secondHomZyg.getLengthOfIndel(),
secondHomZyg.getIndelSequence());
genomicLoc - lastPos,
readDepth,
(short) rmsMapQ,
firstHomZyg.getLikelihood(),
secondHomZyg.getLikelihood(),
hetLikelihood,
firstHomZyg.getLengthOfIndel(),
firstHomZyg.getIndelSequence(),
secondHomZyg.getLengthOfIndel(),
secondHomZyg.getIndelSequence());
lastPos = genomicLoc;
call.write(this.outputBinaryCodec);
}
@ -158,8 +189,7 @@ public class GLFWriter implements GenotypeWriter {
/**
* add a no call to the genotype file, if supported.
*
* @param position the position
*
* @param position the position
*/
@Override
public void addNoCall(int position) {
@ -170,12 +200,12 @@ public class GLFWriter implements GenotypeWriter {
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
checkSequence(contigName,contigLength);
checkSequence(contigName, contigLength);
rec.write(this.outputBinaryCodec);
}
@ -200,12 +230,12 @@ public class GLFWriter implements GenotypeWriter {
* check to see if we've jumped to a new contig
*
* @param sequenceName the name for the sequence
* @param seqLength the sequence length
* @param seqLength the sequence length
*/
private void checkSequence(String sequenceName, int seqLength) {
if ((referenceSequenceName == null) || (!referenceSequenceName.equals(sequenceName))) {
if (this.referenceSequenceName != null) { // don't write the record the first time
this.writeEndRecord();
this.writeEndRecord();
}
referenceSequenceName = sequenceName;
referenceSequenceLength = seqLength;

View File

@ -1,12 +1,12 @@
package org.broadinstitute.sting.utils.genotype.tabular;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeOutput;
import org.broadinstitute.sting.utils.genotype.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Arrays;
/**
@ -43,7 +43,23 @@ public class TabularLFWriter implements GenotypeWriter {
* @param locus the locus to add
*/
@Override
public void addGenotypeCall(GenotypeOutput locus) {
public void addGenotypeCall(Genotype locus) {
double likelihoods[];
int readDepth = -1;
double nextVrsBest = 0;
double nextVrsRef = 0;
if (!(locus instanceof GenotypesBacked)) {
likelihoods = new double[10];
Arrays.fill(likelihoods, Double.MIN_VALUE);
} else {
likelihoods = ((LikelihoodsBacked) locus).getLikelihoods();
}
char ref = locus.getReference();
if (locus instanceof ReadBacked) {
readDepth = ((ReadBacked)locus).getReadCount();
}
/**
* This output is not correct, but I don't we even use this format anymore. If we do, someone
* should change this code
@ -51,14 +67,14 @@ public class TabularLFWriter implements GenotypeWriter {
outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s",
locus.getLocation().toString(),
"NOT OUTPUTED",
locus.getReferencebase(),
ref,
locus.getBases(),
locus.getBases(),
-1,
-1,
locus.getBestRef(),
locus.getBestNext(),
locus.getReadDepth(),
nextVrsRef,
nextVrsBest,
readDepth,
locus.getBases()));
}

View File

@ -180,7 +180,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
// check to ensure that the column count of tokens is right
if (tokens.length != mHeader.getColumnCount()) {
throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length);
throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);
}
int index = 0;
@ -261,10 +261,14 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @param bases the list of bases for this genotype call
*/
private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<String> bases) {
if (Integer.valueOf(alleleNumber) == 0)
int alleleValue = Integer.valueOf(alleleNumber);
// check to make sure the allele value is within bounds
if (alleleValue < 0 || alleleValue > altAlleles.length)
throw new IllegalArgumentException("VCFReader: the allele value of " + alleleValue + " is out of bounds given the alternate allele list.");
if (alleleValue == 0)
bases.add(String.valueOf(referenceBase));
else
bases.add(altAlleles[Integer.valueOf(alleleNumber) - 1]);
bases.add(altAlleles[alleleValue - 1]);
}