some initial changes from the first review of the genotype redesign, more to come.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1338 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
feb7238f10
commit
9cd53d3273
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.ListUtils;
|
|||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||
import org.broadinstitute.sting.utils.genotype.SSGGenotypeCall;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -90,7 +91,7 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
|||
LocusContext subContext = new LocusContext(context.getLocation(), sub_reads, sub_offsets);
|
||||
GenotypeCall call = SSG.map(tracker, ref, subContext);
|
||||
|
||||
String callType = (call.isVariation()) ? ((call.getBestVrsRef().first.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
||||
String callType = (call.isVariation()) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
||||
if (call != null) {
|
||||
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
|
||||
}
|
||||
|
|
@ -116,37 +117,26 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
|||
|
||||
// a method to support getting the geli string, since the AlleleFrequencyEstimate is going away
|
||||
public String toGeliString (GenotypeCall locus) {
|
||||
if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)");
|
||||
|
||||
|
||||
// this is to perserve the format string that we used to use
|
||||
double[] likelihoods = new double[10];
|
||||
int index = 0;
|
||||
List<Genotype> lt = locus.getLexigraphicallySortedGenotypes();
|
||||
for (Genotype G: lt) {
|
||||
likelihoods[index] = G.getLikelihood();
|
||||
index++;
|
||||
}
|
||||
|
||||
SSGGenotypeCall call = (SSGGenotypeCall)locus;
|
||||
return String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
|
||||
locus.getLocation().getContig(),
|
||||
locus.getLocation().getStart(),
|
||||
locus.getReferencebase(),
|
||||
locus.getReadDepth(),
|
||||
call.getLocation().getContig(),
|
||||
call.getLocation().getStart(),
|
||||
call.getReferencebase(),
|
||||
call.getReadDepth(),
|
||||
-1,
|
||||
locus.getGenotypes().get(0).getBases(),
|
||||
locus.getBestVrsRef().second.getScore(),
|
||||
locus.getBestVrsNext().second.getScore(),
|
||||
likelihoods[0],
|
||||
likelihoods[1],
|
||||
likelihoods[2],
|
||||
likelihoods[3],
|
||||
likelihoods[4],
|
||||
likelihoods[5],
|
||||
likelihoods[6],
|
||||
likelihoods[7],
|
||||
likelihoods[8],
|
||||
likelihoods[9]);
|
||||
call.getBases(),
|
||||
call.getBestRef(),
|
||||
call.getBestNext(),
|
||||
call.getLikelihoods()[0],
|
||||
call.getLikelihoods()[1],
|
||||
call.getLikelihoods()[2],
|
||||
call.getLikelihoods()[3],
|
||||
call.getLikelihoods()[4],
|
||||
call.getLikelihoods()[5],
|
||||
call.getLikelihoods()[6],
|
||||
call.getLikelihoods()[7],
|
||||
call.getLikelihoods()[8],
|
||||
call.getLikelihoods()[9]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,19 +10,16 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
|
||||
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.playground.utils.IndelLikelihood;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.BasicPileup;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
|
||||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||
public class SingleSampleGenotyper extends LocusWalker<GenotypeCall, GenotypeWriter> {
|
||||
public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, GenotypeWriter> {
|
||||
// Control output settings
|
||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
|
||||
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
|
||||
|
|
@ -126,22 +123,21 @@ public class SingleSampleGenotyper extends LocusWalker<GenotypeCall, GenotypeWri
|
|||
*
|
||||
* @return an AlleleFrequencyEstimate object
|
||||
*/
|
||||
public GenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
rationalizeSampleName(context.getReads().get(0));
|
||||
if (context.getLocation().getStart() == 73) {
|
||||
int stop = 1;
|
||||
}
|
||||
GenotypeLocus genotype = getGenotype(tracker, Character.toUpperCase(ref), context, sampleName);
|
||||
GenotypeCall call = null;
|
||||
if (genotype != null) {
|
||||
call = new GenotypeCallImpl(genotype, ref,
|
||||
new ConfidenceScore(this.LOD_THRESHOLD, (GENOTYPE ? ConfidenceScore.SCORE_METHOD.BEST_NEXT : ConfidenceScore.SCORE_METHOD.BEST_REF)));
|
||||
metricsOut.nextPosition(call, tracker);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
GenotypeLikelihoods G = callGenotype(tracker);
|
||||
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
|
||||
if (geno != null) {
|
||||
metricsOut.nextPosition(geno, tracker);
|
||||
}
|
||||
if (!SUPPRESS_METRICS) {
|
||||
metricsOut.printMetricsAtLocusIntervals(METRICS_INTERVAL);
|
||||
}
|
||||
return call;
|
||||
return geno;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -172,25 +168,6 @@ public class SingleSampleGenotyper extends LocusWalker<GenotypeCall, GenotypeWri
|
|||
return sampleName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the allele frequency of the underlying genotype at the given locus.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param ref the reference base
|
||||
* @param context contextual information around the locus
|
||||
* @param sample_name the name of the sample
|
||||
*
|
||||
* @return the allele frequency estimate
|
||||
*/
|
||||
private GenotypeLocus getGenotype(RefMetaDataTracker tracker, char ref, LocusContext context, String sample_name) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
// Handle single-base polymorphisms.
|
||||
GenotypeLikelihoods G = callGenotype(tracker);
|
||||
GenotypeLocus geno = G.callGenotypes(tracker, ref, pileup);
|
||||
|
||||
return geno;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls the underlying, single locus genotype of the sample
|
||||
*
|
||||
|
|
@ -211,26 +188,6 @@ public class SingleSampleGenotyper extends LocusWalker<GenotypeCall, GenotypeWri
|
|||
return G;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the likelihood of an indel at this locus.
|
||||
*
|
||||
* @param context contextual information around the locus
|
||||
* @param reads the reads that overlap this locus
|
||||
* @param offsets the offsets per read that identify the base at this locus
|
||||
*
|
||||
* @return the likelihood of the indel at this location
|
||||
*/
|
||||
private IndelLikelihood callIndel(LocusContext context, List<SAMRecord> reads, List<Integer> offsets) {
|
||||
String[] indels = BasicPileup.indelPileup(reads, offsets);
|
||||
IndelLikelihood indelCall = new IndelLikelihood(indels, 1e-4);
|
||||
|
||||
if (!indelCall.getType().equals("ref")) {
|
||||
System.out.printf("INDEL %s %s\n", context.getLocation(), indelCall);
|
||||
}
|
||||
|
||||
return indelCall;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether we're at a Hapmap site
|
||||
*
|
||||
|
|
@ -271,10 +228,9 @@ public class SingleSampleGenotyper extends LocusWalker<GenotypeCall, GenotypeWri
|
|||
*
|
||||
* @return an empty string
|
||||
*/
|
||||
public GenotypeWriter reduce(GenotypeCall call, GenotypeWriter sum) {
|
||||
public GenotypeWriter reduce(SSGGenotypeCall call, GenotypeWriter sum) {
|
||||
if (call != null && call.isVariation()) {
|
||||
if ((GENOTYPE && call.getBestVrsNext().second.getScore() > LOD_THRESHOLD) ||
|
||||
(call.getBestVrsRef().second.getScore() > LOD_THRESHOLD))
|
||||
if (call.getConfidenceScore().getScore() > LOD_THRESHOLD)
|
||||
sum.addGenotypeCall(call);
|
||||
}
|
||||
return sum;
|
||||
|
|
|
|||
|
|
@ -5,9 +5,10 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.ConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||
import org.broadinstitute.sting.utils.genotype.SSGGenotypeCall;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -56,8 +57,9 @@ public class AlleleMetrics {
|
|||
this.LOD_cutoff = lodThresold;
|
||||
}
|
||||
|
||||
public void nextPosition(GenotypeCall call, RefMetaDataTracker tracker) {
|
||||
num_loci_total += 1;
|
||||
public void nextPosition(GenotypeCall cl, RefMetaDataTracker tracker) {
|
||||
SSGGenotypeCall call = (SSGGenotypeCall)cl;
|
||||
num_loci_total += 1;
|
||||
|
||||
boolean is_dbSNP_SNP = false;
|
||||
boolean has_hapmap_chip_genotype = false;
|
||||
|
|
@ -80,10 +82,10 @@ public class AlleleMetrics {
|
|||
}
|
||||
}
|
||||
}
|
||||
Pair<Genotype, ConfidenceScore> result = call.getBestVrsRef();
|
||||
if (Math.abs(call.getBestVrsNext().second.getScore()) >= LOD_cutoff) { num_loci_confident += 1; }
|
||||
double result = call.getBestRef();
|
||||
if (Math.abs(call.getBestNext()) >= LOD_cutoff) { num_loci_confident += 1; }
|
||||
|
||||
if (call.isVariation() && result.second.getScore() >= LOD_cutoff)
|
||||
if (call.isVariation() && result >= LOD_cutoff)
|
||||
{
|
||||
// Confident variant.
|
||||
|
||||
|
|
@ -101,7 +103,7 @@ public class AlleleMetrics {
|
|||
String hapmap_genotype = hapmap_chip_genotype.getFeature();
|
||||
long refs=0, alts=0;
|
||||
double hapmap_q;
|
||||
String str = call.getBestVrsRef().first.getBases();
|
||||
String str = call.getBases();
|
||||
char alt = str.charAt(0);
|
||||
if (str.charAt(0) == call.getReferencebase()) alt = str.charAt(1);
|
||||
for (char c : hapmap_genotype.toCharArray()) {
|
||||
|
|
@ -121,8 +123,8 @@ public class AlleleMetrics {
|
|||
//out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
|
||||
|
||||
//System.out.printf("DBG %f %s\n", LOD_cutoff, alleleFreq.asTabularString());
|
||||
if (call.getBestVrsNext().second.getScore() >= LOD_cutoff) {
|
||||
|
||||
if (call.getBestNext() >= LOD_cutoff) {
|
||||
// TODO : should this all be commented out?
|
||||
/*
|
||||
System.out.printf("DBG %f %f %f %f\n",
|
||||
hapmap_q,
|
||||
|
|
@ -143,7 +145,7 @@ public class AlleleMetrics {
|
|||
}*/
|
||||
}
|
||||
|
||||
if (result.second.getScore() >= LOD_cutoff || -1 * result.second.getScore() >= LOD_cutoff) {
|
||||
if (result >= LOD_cutoff || -1 * result >= LOD_cutoff) {
|
||||
|
||||
// Now calculate ref / var performance - did we correctly classify the site as
|
||||
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
|
||||
|
|
|
|||
|
|
@ -4,10 +4,13 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class GenotypeLikelihoods implements GenotypeGenerator {
|
||||
// precalculate these for performance (pow/log10 is expensive!)
|
||||
|
|
@ -410,7 +413,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
|
||||
*/
|
||||
@Override
|
||||
public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
||||
public GenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
||||
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
|
||||
|
||||
|
||||
|
|
@ -437,14 +440,15 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||
|
||||
// lets setup the locus
|
||||
GenotypeLocus locus = new GenotypeLocusImpl(pileup.getLocation(), pileup.getReads().size(),Math.sqrt(squared/pileup.getReads().size()));
|
||||
List<Genotype> lst = new ArrayList<Genotype>();
|
||||
for (int x = 0; x < this.likelihoods.length; x++) {
|
||||
try {
|
||||
locus.addGenotype(new Genotype(this.genotypes[x],lklihoods[x],this.likelihoods[x]));
|
||||
} catch (InvalidGenotypeException e) {
|
||||
throw new StingException("Invalid Genotype value",e);
|
||||
}
|
||||
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
|
||||
}
|
||||
return locus;
|
||||
return new SSGGenotypeCall(ref,2,pileup.getLocation(),lst,likelihoods,pileup);
|
||||
}
|
||||
//TODO: add this to the above code
|
||||
boolean discovery = false;
|
||||
public void setDiscovery(boolean isInDiscoveryMode) {
|
||||
discovery = isInDiscoveryMode;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,158 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class BasicGenotype
|
||||
*
|
||||
* A basic implementation of the genotype interface
|
||||
*/
|
||||
public class BasicGenotype implements Genotype {
|
||||
|
||||
// the bases that represent this genotype
|
||||
private String mBases = "";
|
||||
|
||||
// the ploidy, assume 2 unless told otherwise
|
||||
private int mPloidy = 2;
|
||||
|
||||
// the confidence score
|
||||
protected ConfidenceScore mConfidenceScore;
|
||||
|
||||
// our location
|
||||
private GenomeLoc mLoc;
|
||||
|
||||
/**
|
||||
* construct a genotypeLikelihood, given the bases, the confidence score, and the ploidy
|
||||
*
|
||||
* @param loc the location of the genotype
|
||||
* @param bases the bases that make up this genotype
|
||||
* @param ploidy the ploidy of this genotype
|
||||
* @param score the confidence score
|
||||
*/
|
||||
public BasicGenotype(GenomeLoc loc, String bases, int ploidy, ConfidenceScore score) {
|
||||
this.mPloidy = ploidy;
|
||||
if (bases.length() != ploidy) {
|
||||
throw new IllegalArgumentException("The number of bases should match the ploidy");
|
||||
}
|
||||
this.mBases = bases;
|
||||
this.mConfidenceScore = score;
|
||||
this.mLoc = loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* construct a genotypeLikelihood, given the bases and the confidence score, and assume the
|
||||
* ploidy is 2.
|
||||
*
|
||||
* @param loc the location of the genotype
|
||||
* @param bases the bases that make up this genotype
|
||||
* @param score the confidence score
|
||||
*/
|
||||
public BasicGenotype(GenomeLoc loc, String bases, ConfidenceScore score) {
|
||||
if (bases.length() != mPloidy) {
|
||||
throw new IllegalArgumentException("The number of bases should match the ploidy");
|
||||
}
|
||||
this.mBases = bases;
|
||||
this.mConfidenceScore = score;
|
||||
this.mLoc = loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the confidence score
|
||||
* @return get the confidence score that we're based on
|
||||
*/
|
||||
public ConfidenceScore getConfidenceScore() {
|
||||
return this.mConfidenceScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
public String getBases() {
|
||||
return mBases;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy
|
||||
* @return the ploidy value
|
||||
*/
|
||||
public int getPloidy() {
|
||||
return mPloidy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're homozygous, false otherwise
|
||||
*/
|
||||
public boolean isHom() {
|
||||
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
|
||||
char first = mBases.charAt(0);
|
||||
for (char c : mBases.toCharArray()) {
|
||||
if (c != first) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're het, false otherwise
|
||||
*/
|
||||
public boolean isHet() {
|
||||
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
|
||||
char first = mBases.charAt(0);
|
||||
for (char c : mBases.toCharArray()) {
|
||||
if (c != first) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotype's location
|
||||
* @return a GenomeLoc representing the location
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return mLoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
* @return true is a SNP
|
||||
*/
|
||||
@Override
|
||||
public boolean isPointGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
@Override
|
||||
public boolean isVariant(char ref) {
|
||||
String ret = Utils.dupString(ref,this.getPloidy());
|
||||
return !this.getBases().equals(ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* return this genotype as a variant
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public Variant toVariant() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,113 +1,71 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenotypeLikelihood
|
||||
* <p/>
|
||||
* This class encompasses all the information that is associated with a genotype
|
||||
* and it's likelihood, mainly:
|
||||
* <p/>
|
||||
* Likelihood value
|
||||
* This class emcompasses all the basic information about a genotype
|
||||
*/
|
||||
public class Genotype {
|
||||
private double mLikelihood = 0.0;
|
||||
private double mPosteriorProb = 0.0;
|
||||
private String mBases = "";
|
||||
private int mPloidy = 2; // assume diploid
|
||||
|
||||
public interface Genotype {
|
||||
/**
|
||||
* construct a genotypeLikelihood, given the bases, the posterior, and the likelihood
|
||||
*
|
||||
* @param bases the bases that make up this genotype
|
||||
* @param posterior the posterior probability of this genotype
|
||||
* @param likelihood the likelihood of this genotype
|
||||
* @param ploidy the ploidy of this genotype
|
||||
* get the confidence score
|
||||
* @return get the confidence score that we're based on
|
||||
*/
|
||||
public Genotype(String bases, double posterior, double likelihood, int ploidy) {
|
||||
this.mPloidy = ploidy;
|
||||
if (bases.length() != ploidy) {
|
||||
throw new IllegalArgumentException("The number of bases should match the ploidy");
|
||||
}
|
||||
this.mLikelihood = likelihood;
|
||||
this.mBases = bases;
|
||||
this.mPosteriorProb = posterior;
|
||||
}
|
||||
|
||||
/**
|
||||
* construct a genotypeLikelihood, given the bases, the posterior, and the likelihood
|
||||
*
|
||||
* @param bases the bases that make up this genotype
|
||||
* @param posterior the posterior probability of this genotype
|
||||
* @param likelihood the likelihood of this genotype
|
||||
*/
|
||||
public Genotype(String bases, double posterior, double likelihood) {
|
||||
if (bases.length() != mPloidy) {
|
||||
throw new IllegalArgumentException("The number of bases should match the ploidy");
|
||||
}
|
||||
this.mLikelihood = likelihood;
|
||||
this.mBases = bases;
|
||||
this.mPosteriorProb = posterior;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the likelihood value
|
||||
*
|
||||
* @return a double, representing the likelihood
|
||||
*/
|
||||
public double getLikelihood() {
|
||||
return mLikelihood;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the posterior value
|
||||
*
|
||||
* @return a double, representing the posterior
|
||||
*/
|
||||
public double getPosteriorProb() {
|
||||
return mPosteriorProb;
|
||||
}
|
||||
public ConfidenceScore getConfidenceScore();
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
*
|
||||
* @return
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
public String getBases() {
|
||||
return mBases;
|
||||
}
|
||||
public String getBases();
|
||||
|
||||
public int getPloidy() {
|
||||
return mPloidy;
|
||||
}
|
||||
/**
|
||||
* get the ploidy
|
||||
* @return the ploidy value
|
||||
*/
|
||||
public int getPloidy();
|
||||
|
||||
/**
|
||||
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return
|
||||
* @return true if we're homozygous, false otherwise
|
||||
*/
|
||||
public boolean isHom() {
|
||||
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
|
||||
char first = mBases.charAt(0);
|
||||
for (char c: mBases.toCharArray()) {
|
||||
if (c != first) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
public boolean isHom();
|
||||
|
||||
/**
|
||||
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're het, false otherwise
|
||||
*/
|
||||
public boolean isHet();
|
||||
|
||||
/**
|
||||
* get the genotype's location
|
||||
* @return a GenomeLoc representing the location
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
* @return true is a SNP
|
||||
*/
|
||||
public boolean isPointGenotype();
|
||||
|
||||
/**
|
||||
* given the reference, are we a variant? (non-ref)
|
||||
* @param ref the reference base or bases
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant(char ref);
|
||||
|
||||
/**
|
||||
* return this genotype as a variant
|
||||
* @return
|
||||
*/
|
||||
public boolean isHet() {
|
||||
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
|
||||
char first = mBases.charAt(0);
|
||||
for (char c: mBases.toCharArray()) {
|
||||
if (c != first) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public Variant toVariant();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -10,14 +13,7 @@ import org.broadinstitute.sting.utils.Pair;
|
|||
* Genotype call interface, for indicating that a genotype is
|
||||
* also a genotype call.
|
||||
*/
|
||||
public interface GenotypeCall extends GenotypeLocus{
|
||||
/**
|
||||
* get the confidence
|
||||
*
|
||||
* @return a ConfidenceScore representing the LOD score that this genotype was called with
|
||||
*/
|
||||
public ConfidenceScore getConfidence();
|
||||
|
||||
public interface GenotypeCall extends Genotype {
|
||||
/**
|
||||
* gets the reference base
|
||||
*
|
||||
|
|
@ -26,27 +22,48 @@ public interface GenotypeCall extends GenotypeLocus{
|
|||
public char getReferencebase();
|
||||
|
||||
/**
|
||||
* get the best vrs the next best genotype LOD score
|
||||
* @return the genotype, and a LOD for best - next
|
||||
*/
|
||||
public Pair<Genotype,ConfidenceScore> getBestVrsNext();
|
||||
|
||||
/**
|
||||
* get the best vrs the reference allele.
|
||||
* @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked
|
||||
* with is variation
|
||||
*/
|
||||
public Pair<Genotype,ConfidenceScore> getBestVrsRef();
|
||||
|
||||
/**
|
||||
* check to see if this call is a variant, i.e. not homozygous reference
|
||||
* check to see if this called genotype is a variant, i.e. not homozygous reference
|
||||
* @return true if it's not hom ref, false otherwise
|
||||
*/
|
||||
public boolean isVariation();
|
||||
|
||||
/**
|
||||
* return genotype locus, with our data
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
public GenotypeLocus toGenotypeLocus();
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
/**
|
||||
* get the genotypes, sorted in asscending order by their ConfidenceScores (the best
|
||||
* to the worst ConfidenceScores)
|
||||
*
|
||||
* @return a list of the genotypes, sorted by ConfidenceScores
|
||||
*/
|
||||
public List<Genotype> getGenotypes();
|
||||
|
||||
/**
|
||||
* get the genotypes sorted lexigraphically
|
||||
*
|
||||
* @return a list of the genotypes sorted lexi
|
||||
*/
|
||||
public List<Genotype> getLexigraphicallySortedGenotypes();
|
||||
|
||||
}
|
||||
|
||||
class LexigraphicalComparator implements Comparator<Genotype> {
|
||||
private final Double EPSILON = 1.0e-15;
|
||||
|
||||
@Override
|
||||
public int compare(Genotype genotype, Genotype genotype1) {
|
||||
return genotype.getBases().compareTo(genotype1.getBases());
|
||||
}
|
||||
}
|
||||
|
||||
class ConfidenceScoreSort implements Comparator<Genotype> {
|
||||
@Override
|
||||
public int compare(Genotype genotype, Genotype genotype1) {
|
||||
return genotype.getConfidenceScore().compareTo(genotype1.getConfidenceScore());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,225 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenotypeCallImpl
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class GenotypeCallImpl implements GenotypeCall {
|
||||
|
||||
// our stored genotype locus
|
||||
private final GenotypeLocus mLocus;
|
||||
private final char mRefBase;
|
||||
private final ConfidenceScore mScore;
|
||||
|
||||
/**
|
||||
* generate a GenotypeCall object with the specified locus info and reference base
|
||||
*
|
||||
* @param mLocus the locus
|
||||
* @param mRefBase the reference base to use
|
||||
*/
|
||||
public GenotypeCallImpl(GenotypeLocus mLocus, char mRefBase, ConfidenceScore mScore) {
|
||||
if (mLocus.getGenotypes().size() < 1) throw new StingException("Genotype Locus is empty");
|
||||
this.mLocus = mLocus;
|
||||
this.mRefBase = String.valueOf(mRefBase).toUpperCase().charAt(0);
|
||||
this.mScore = mScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the confidence
|
||||
*
|
||||
* @return a ConfidenceScore representing the LOD score that this genotype was called with
|
||||
*/
|
||||
@Override
|
||||
public ConfidenceScore getConfidence() {
|
||||
return mScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base
|
||||
*
|
||||
* @return the reference base we represent
|
||||
*/
|
||||
@Override
|
||||
public char getReferencebase() {
|
||||
return mRefBase;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the best vrs the next best genotype LOD score
|
||||
*
|
||||
* @return the genotype, and a LOD for best - next
|
||||
*/
|
||||
@Override
|
||||
public Pair<Genotype, ConfidenceScore> getBestVrsNext() {
|
||||
List<Genotype> genos = this.mLocus.getGenotypes();
|
||||
if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes");
|
||||
return new Pair<Genotype, ConfidenceScore>(genos.get(0),
|
||||
new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - genos.get(1).getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT));
|
||||
}
|
||||
|
||||
/**
|
||||
* get the best vrs the reference allele.
|
||||
*
|
||||
* @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked
|
||||
* with is variation
|
||||
*/
|
||||
@Override
|
||||
public Pair<Genotype, ConfidenceScore> getBestVrsRef() {
|
||||
List<Genotype> genos = this.mLocus.getGenotypes();
|
||||
|
||||
// find the reference allele
|
||||
String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase();
|
||||
Genotype refGenotype = findRefGenotype(ref, genos);
|
||||
if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes");
|
||||
return new Pair<Genotype, ConfidenceScore>(genos.get(0),
|
||||
new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - refGenotype.getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT));
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference genotype object
|
||||
*
|
||||
* @param ref the reference as a ploidy count homozygous string
|
||||
* @param genos the genotype list
|
||||
*
|
||||
* @return a genotype for the
|
||||
*/
|
||||
private static Genotype findRefGenotype(String ref, List<Genotype> genos) {
|
||||
Genotype refGenotype = null;
|
||||
for (Genotype g : genos) {
|
||||
if (g.getBases().equals(ref)) refGenotype = g;
|
||||
}
|
||||
if (refGenotype == null) {
|
||||
for (Genotype g : genos) {
|
||||
System.err.println(g.getBases());
|
||||
}
|
||||
throw new StingException("Unable to find the reference genotype + " + ref + " size of genotype list = " + genos.size());
|
||||
}
|
||||
return refGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* check to see if this call is a variant, i.e. not homozygous reference
|
||||
*
|
||||
* @return true if it's not hom ref, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isVariation() {
|
||||
List<Genotype> genos = this.mLocus.getGenotypes();
|
||||
String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase();
|
||||
return !(genos.get(0).getBases().equals(ref));
|
||||
}
|
||||
|
||||
/** return genotype locus, with our data */
|
||||
@Override
|
||||
public GenotypeLocus toGenotypeLocus() {
|
||||
return mLocus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLocus.getLocation();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy at this locus
|
||||
*
|
||||
* @return an integer representing the genotype ploidy at this location
|
||||
*/
|
||||
@Override
|
||||
public int getPloidy() {
|
||||
return mLocus.getPloidy();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes, sorted in asscending order by their likelihoods (the best
|
||||
* to the worst likelihoods)
|
||||
*
|
||||
* @return a list of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
return mLocus.getGenotypes();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes and their posteriors
|
||||
*
|
||||
* @return a list of the poseriors
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getPosteriors() {
|
||||
return mLocus.getPosteriors();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes sorted lexigraphically
|
||||
*
|
||||
* @return a list of the genotypes sorted lexi
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getLexigraphicallySortedGenotypes() {
|
||||
return mLocus.getLexigraphicallySortedGenotypes();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the read depth at this position
|
||||
*
|
||||
* @return the read depth, -1 if it is unknown
|
||||
*/
|
||||
@Override
|
||||
public int getReadDepth() {
|
||||
return mLocus.getReadDepth();
|
||||
}
|
||||
|
||||
/**
|
||||
* add a genotype to the collection
|
||||
*
|
||||
* @param genotype
|
||||
*
|
||||
* @throws InvalidGenotypeException
|
||||
*/
|
||||
@Override
|
||||
public void addGenotype(Genotype genotype) throws InvalidGenotypeException {
|
||||
mLocus.addGenotype(genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the root mean square (RMS) of the mapping qualities
|
||||
*
|
||||
* @return the RMS, or a value < 0 if it's not available
|
||||
*/
|
||||
@Override
|
||||
public double getRMSMappingQuals() {
|
||||
return mLocus.getRMSMappingQuals();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a variant, given the reference, and a lod score
|
||||
*
|
||||
* @param refBase the reference base
|
||||
* @param score the threshold to use to determine if it's a variant or not
|
||||
*
|
||||
* @return a variant object, or null if no genotypes meet the criteria
|
||||
*/
|
||||
@Override
|
||||
public Variant toGenotypeCall(char refBase, ConfidenceScore score) {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
|
@ -22,5 +22,5 @@ public interface GenotypeGenerator {
|
|||
* @param pileup a pileup of the reads, containing the reads and their offsets
|
||||
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
|
||||
*/
|
||||
public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup);
|
||||
public GenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,132 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Comparator;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface Genotype
|
||||
* <p/>
|
||||
* This interface represents the collection of genotypes at a specific locus
|
||||
*/
|
||||
public interface GenotypeLocus {
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
/**
|
||||
* get the ploidy at this locus
|
||||
*
|
||||
* @return an integer representing the genotype ploidy at this location
|
||||
*/
|
||||
public int getPloidy();
|
||||
|
||||
/**
|
||||
* get the genotypes, sorted in asscending order by their likelihoods (the best
|
||||
* to the worst likelihoods)
|
||||
*
|
||||
* @return a list of the genotypes, sorted by likelihoods
|
||||
*/
|
||||
public List<Genotype> getGenotypes();
|
||||
|
||||
/**
|
||||
* get the genotypes and their posteriors
|
||||
*
|
||||
* @return a list of the genotypes, sorted by poseriors
|
||||
*/
|
||||
public List<Genotype> getPosteriors();
|
||||
|
||||
/**
|
||||
* get the genotypes sorted lexigraphically
|
||||
*
|
||||
* @return a list of the genotypes sorted lexi
|
||||
*/
|
||||
public List<Genotype> getLexigraphicallySortedGenotypes();
|
||||
|
||||
|
||||
/**
|
||||
* get the read depth at this position
|
||||
*
|
||||
* @return the read depth, -1 if it is unknown
|
||||
*/
|
||||
public int getReadDepth();
|
||||
|
||||
/**
|
||||
* add a genotype to the collection
|
||||
*
|
||||
* @param genotype
|
||||
*
|
||||
* @throws InvalidGenotypeException
|
||||
*/
|
||||
public void addGenotype(Genotype genotype) throws InvalidGenotypeException;
|
||||
|
||||
/**
|
||||
* get the root mean square (RMS) of the mapping qualities
|
||||
*
|
||||
* @return the RMS, or a value < 0 if it's not available
|
||||
*/
|
||||
public double getRMSMappingQuals();
|
||||
|
||||
/**
|
||||
* create a variant, given the reference, and a lod score
|
||||
*
|
||||
* @param refBase the reference base
|
||||
* @param score the threshold to use to determine if it's a variant or not
|
||||
*
|
||||
* @return a variant object, or null if no genotypes meet the criteria
|
||||
*/
|
||||
public Variant toGenotypeCall(char refBase, ConfidenceScore score);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* the following are helper Comparator classes for the above sort orders, that may be useful
|
||||
* for anyone implementing the GenotypeLocus interface
|
||||
*/
|
||||
class PosteriorComparator implements Comparator<Genotype> {
|
||||
private final Double EPSILON = 1.0e-15;
|
||||
|
||||
@Override
|
||||
public int compare(Genotype genotype, Genotype genotype1) {
|
||||
double diff = genotype.getPosteriorProb() - genotype1.getPosteriorProb();
|
||||
if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getPosteriorProb())))
|
||||
return 0;
|
||||
else if (diff < 0)
|
||||
return 1;
|
||||
else
|
||||
return -1; // TODO: BACKWARD NOW
|
||||
}
|
||||
}
|
||||
|
||||
class LexigraphicalComparator implements Comparator<Genotype> {
|
||||
private final Double EPSILON = 1.0e-15;
|
||||
|
||||
@Override
|
||||
public int compare(Genotype genotype, Genotype genotype1) {
|
||||
return genotype.getBases().compareTo(genotype1.getBases());
|
||||
}
|
||||
}
|
||||
|
||||
class LikelihoodComparator implements Comparator<Genotype> {
|
||||
private final Double EPSILON = 1.0e-15;
|
||||
|
||||
@Override
|
||||
public int compare(Genotype genotype, Genotype genotype1) {
|
||||
double diff = genotype.getLikelihood() - genotype1.getLikelihood();
|
||||
if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getLikelihood())))
|
||||
return 0;
|
||||
else if (diff < 0)
|
||||
return 1; // TODO: BACKWARD NOW
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenotypeBucket
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class GenotypeLocusImpl implements GenotypeLocus {
|
||||
|
||||
private final List<Genotype> mGenotypes = new ArrayList<Genotype>();
|
||||
private GenomeLoc mLocation = null;
|
||||
private int mReadDepth = -1;
|
||||
private double mRMSMappingQual = -1;
|
||||
|
||||
public GenotypeLocusImpl(GenomeLoc location, int readDepth, double rmsMappingQual) {
|
||||
this.mLocation = location;
|
||||
mReadDepth = readDepth;
|
||||
mRMSMappingQual = rmsMappingQual;
|
||||
}
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy at this locus
|
||||
*
|
||||
* @return an integer representing the genotype ploidy at this location
|
||||
*/
|
||||
@Override
|
||||
public int getPloidy() {
|
||||
return 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes, sorted in asscending order by their likelihoods (the best
|
||||
* to the worst likelihoods)
|
||||
*
|
||||
* @return a list of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
Collections.sort(this.mGenotypes, new LikelihoodComparator());
|
||||
return this.mGenotypes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* get the genotypes and their posteriors
|
||||
*
|
||||
* @return a list of the poseriors
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getPosteriors() {
|
||||
Collections.sort(this.mGenotypes, new PosteriorComparator());
|
||||
return this.mGenotypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes sorted lexigraphically
|
||||
*
|
||||
* @return a list of the genotypes sorted lexi
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getLexigraphicallySortedGenotypes() {
|
||||
Collections.sort(this.mGenotypes, new LexigraphicalComparator());
|
||||
return this.mGenotypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the read depth at this position
|
||||
*
|
||||
* @return the read depth, -1 if it is unknown
|
||||
*/
|
||||
@Override
|
||||
public int getReadDepth() {
|
||||
return mReadDepth;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* add a genotype to the collection
|
||||
*
|
||||
* @param genotype
|
||||
*
|
||||
* @throws InvalidGenotypeException
|
||||
*/
|
||||
@Override
|
||||
public void addGenotype(Genotype genotype) throws InvalidGenotypeException {
|
||||
this.mGenotypes.add(genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the root mean square (RMS) of the mapping qualities
|
||||
*
|
||||
* @return the RMS, or a value < 0 if it's not available
|
||||
*/
|
||||
@Override
|
||||
public double getRMSMappingQuals() {
|
||||
return mRMSMappingQual;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a variant, given the reference, and a lod score
|
||||
*
|
||||
* @param refBase the reference base
|
||||
* @param score the threshold to use to determine if it's a variant or not
|
||||
*
|
||||
* @return a variant object, or null if no genotypes meet the criteria
|
||||
*/
|
||||
@Override
|
||||
public Variant toGenotypeCall(char refBase, ConfidenceScore score) {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,241 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.ConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenotypeCallImpl
|
||||
* <p/>
|
||||
* The single sample genotypers implementation of the genotype call, plus
|
||||
* some extra booty for the genotype writers of this world
|
||||
*/
|
||||
public class SSGGenotypeCall implements GenotypeCall {
|
||||
// TODO: make SSG into a more robust Genotype call interface
|
||||
// our stored genotype locus
|
||||
private final char mRefBase;
|
||||
private final int mPloidy;
|
||||
private final GenomeLoc mLoc;
|
||||
private TreeMap<Double, Genotype> mGenotypes = new TreeMap();
|
||||
private double mLikelihoods[];
|
||||
private double bestNext = 0;
|
||||
private double bestRef = 0;
|
||||
|
||||
private int readDepth;
|
||||
private double rmsMapping;
|
||||
|
||||
public SSGGenotypeCall(char mRefBase, int mPloidy, GenomeLoc mLoc, List<Genotype> genotypes, double likelihoods[], ReadBackedPileup pileup) {
|
||||
this.mRefBase = mRefBase;
|
||||
this.mPloidy = mPloidy;
|
||||
this.mLoc = mLoc;
|
||||
if (genotypes.size() < 1) throw new IllegalArgumentException("Genotypes list size must be greater than 0");
|
||||
int index = 0;
|
||||
String refStr = Utils.dupString(mRefBase, mPloidy).toUpperCase();
|
||||
double ref = 0.0;
|
||||
double best = Double.NEGATIVE_INFINITY; // plus one
|
||||
double next = Double.NEGATIVE_INFINITY;
|
||||
for (Genotype g : genotypes) {
|
||||
if (g.getBases().toUpperCase().equals(refStr)) ref = likelihoods[index];
|
||||
if (likelihoods[index] > best) {
|
||||
next = best;
|
||||
best = likelihoods[index];
|
||||
} else if (likelihoods[index] > next) next = likelihoods[index];
|
||||
index++;
|
||||
}
|
||||
bestNext = Math.abs(best - next);
|
||||
bestRef = Math.abs(best - ref);
|
||||
mLikelihoods = likelihoods;
|
||||
index = 0;
|
||||
for (Genotype g : genotypes) {
|
||||
((BasicGenotype)g).mConfidenceScore = new BayesianConfidenceScore(Math.abs(likelihoods[index] - ref));
|
||||
mGenotypes.put(likelihoods[index],g);
|
||||
index++;
|
||||
}
|
||||
|
||||
this.readDepth = pileup.getReads().size();
|
||||
rmsMapping = Math.sqrt(calculateRMS(pileup) / readDepth);
|
||||
}
|
||||
|
||||
private double calculateRMS(ReadBackedPileup pileup) {
|
||||
double rms = 0.0;
|
||||
for (SAMRecord r : pileup.getReads()) {
|
||||
rms += r.getMappingQuality() * r.getMappingQuality();
|
||||
}
|
||||
return rms;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base
|
||||
*
|
||||
* @return the reference base we represent
|
||||
*/
|
||||
@Override
|
||||
public char getReferencebase() {
|
||||
return mRefBase;
|
||||
}
|
||||
|
||||
/**
|
||||
* check to see if this called genotype is a variant, i.e. not homozygous reference
|
||||
*
|
||||
* @return true if it's not hom ref, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isVariation() {
|
||||
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(mRefBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the confidence score
|
||||
*
|
||||
* @return get the confidence score that we're based on
|
||||
*/
|
||||
@Override
|
||||
public ConfidenceScore getConfidenceScore() {
|
||||
return mGenotypes.get(mGenotypes.descendingKeySet().first()).getConfidenceScore();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
@Override
|
||||
public String getBases() {
|
||||
return mGenotypes.get(mGenotypes.descendingKeySet().first()).getBases();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy
|
||||
*
|
||||
* @return the ploidy value
|
||||
*/
|
||||
@Override
|
||||
public int getPloidy() {
|
||||
return this.mPloidy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're homozygous, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isHom() {
|
||||
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHom();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're het, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isHet() {
|
||||
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isHet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mGenotypes.get(mGenotypes.descendingKeySet().first()).getLocation();
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
* @return true is a SNP
|
||||
*/
|
||||
@Override
|
||||
public boolean isPointGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base or bases
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
@Override
|
||||
public boolean isVariant(char ref) {
|
||||
return mGenotypes.get(mGenotypes.descendingKeySet().first()).isVariant(ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* return this genotype as a variant
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public Variant toVariant() {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes, sorted in asscending order by their ConfidenceScores (the best
|
||||
* to the worst ConfidenceScores)
|
||||
*
|
||||
* @return a list of the genotypes, sorted by ConfidenceScores
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
List<Genotype> newList = new ArrayList<Genotype>();
|
||||
newList.addAll(this.mGenotypes.values());
|
||||
return newList;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes sorted lexigraphically
|
||||
*
|
||||
* @return a list of the genotypes sorted lexi
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getLexigraphicallySortedGenotypes() {
|
||||
List<Genotype> newList = new ArrayList<Genotype>();
|
||||
newList.addAll(this.mGenotypes.values());
|
||||
Collections.sort(newList, new LexigraphicalComparator());
|
||||
return newList;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the likelihoods as a double array, in lexographic order
|
||||
*
|
||||
* @return the likelihoods
|
||||
*/
|
||||
public double[] getLikelihoods() {
|
||||
return this.mLikelihoods;
|
||||
}
|
||||
|
||||
public int getReadDepth() {
|
||||
return readDepth;
|
||||
}
|
||||
|
||||
public double getRmsMapping() {
|
||||
return rmsMapping;
|
||||
}
|
||||
|
||||
public double getBestNext() {
|
||||
return bestNext;
|
||||
}
|
||||
|
||||
public double getBestRef() {
|
||||
return bestRef;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
package org.broadinstitute.sting.utils.genotype.confidence;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class LikelihoodConfidenceScore
|
||||
*
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class BayesianConfidenceScore extends ConfidenceScore {
|
||||
public BayesianConfidenceScore(double score) {
|
||||
super(score);
|
||||
}
|
||||
|
||||
/**
|
||||
* return the confidence method we're employing, UNKNOWN is an option
|
||||
*
|
||||
* @return the method of confidence we represent
|
||||
*/
|
||||
@Override
|
||||
public SCORE_METHOD getConfidenceType() {
|
||||
return SCORE_METHOD.LOD_SCORE;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the confidence score, normalized to the range of [0-1]
|
||||
*
|
||||
* @return a confidence score
|
||||
*/
|
||||
@Override
|
||||
public double normalizedConfidenceScore() {
|
||||
return Math.pow(10,this.getScore());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
package org.broadinstitute.sting.utils.genotype.confidence;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -8,32 +7,19 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
* <p/>
|
||||
* this class represents the confidence in a genotype, and the method we used to obtain it
|
||||
*/
|
||||
public class ConfidenceScore implements Comparable<ConfidenceScore> {
|
||||
public abstract class ConfidenceScore implements Comparable<ConfidenceScore> {
|
||||
|
||||
// the general error of a floating point value
|
||||
private static final Double EPSILON = 1.0e-15;
|
||||
|
||||
public enum SCORE_METHOD {
|
||||
BEST_NEXT, BEST_REF, OTHER;
|
||||
LOD_SCORE, UNKNOWN;
|
||||
}
|
||||
|
||||
private Double mScore;
|
||||
private SCORE_METHOD mMethod;
|
||||
|
||||
public ConfidenceScore(double score, SCORE_METHOD method) {
|
||||
public ConfidenceScore(double score) {
|
||||
this.mScore = score;
|
||||
this.mMethod = method;
|
||||
}
|
||||
|
||||
/**
|
||||
* generate a confidence score, given the two likelihoods, and the method used
|
||||
*
|
||||
* @param likelihoodOne the first likelihood
|
||||
* @param likelihoodTwo the second likelihood
|
||||
* @param method the method used to determine the likelihood
|
||||
*/
|
||||
public ConfidenceScore(double likelihoodOne, double likelihoodTwo, SCORE_METHOD method) {
|
||||
this.mScore = likelihoodOne / likelihoodTwo;
|
||||
this.mMethod = method;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -43,7 +29,7 @@ public class ConfidenceScore implements Comparable<ConfidenceScore> {
|
|||
*/
|
||||
@Override
|
||||
public int compareTo(ConfidenceScore o) {
|
||||
if (o.mMethod != this.mMethod) {
|
||||
if (o.getConfidenceType() != this.getConfidenceType()) {
|
||||
throw new UnsupportedOperationException("Attemped to compare Confidence scores with different methods");
|
||||
}
|
||||
double diff = mScore - o.mScore;
|
||||
|
|
@ -55,11 +41,23 @@ public class ConfidenceScore implements Comparable<ConfidenceScore> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the score
|
||||
* @return a double representing the genotype score
|
||||
*/
|
||||
public Double getScore() {
|
||||
return mScore;
|
||||
}
|
||||
|
||||
public SCORE_METHOD getMethod() {
|
||||
return mMethod;
|
||||
}
|
||||
/**
|
||||
* return the confidence method we're employing, UNKNOWN is an option
|
||||
* @return the method of confidence we represent
|
||||
*/
|
||||
public abstract SCORE_METHOD getConfidenceType();
|
||||
|
||||
/**
|
||||
* get the confidence score, normalized to the range of [0-1]
|
||||
* @return a confidence score
|
||||
*/
|
||||
public abstract double normalizedConfidenceScore();
|
||||
}
|
||||
|
|
@ -1,14 +1,13 @@
|
|||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.SSGGenotypeCall;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -42,37 +41,27 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
* @param locus the locus to add
|
||||
*/
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)");
|
||||
|
||||
|
||||
// this is to perserve the format string that we used to use
|
||||
double[] likelihoods = new double[10];
|
||||
int index = 0;
|
||||
List<Genotype> lt = locus.getLexigraphicallySortedGenotypes();
|
||||
for (Genotype G: lt) {
|
||||
likelihoods[index] = G.getLikelihood();
|
||||
index++;
|
||||
}
|
||||
SSGGenotypeCall call = (SSGGenotypeCall)locus;
|
||||
|
||||
mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
|
||||
locus.getLocation().getContig(),
|
||||
locus.getLocation().getStart(),
|
||||
locus.getReferencebase(),
|
||||
locus.getReadDepth(),
|
||||
call.getReadDepth(),
|
||||
-1,
|
||||
locus.getGenotypes().get(0).getBases(),
|
||||
locus.getBestVrsRef().second.getScore(),
|
||||
locus.getBestVrsNext().second.getScore(),
|
||||
likelihoods[0],
|
||||
likelihoods[1],
|
||||
likelihoods[2],
|
||||
likelihoods[3],
|
||||
likelihoods[4],
|
||||
likelihoods[5],
|
||||
likelihoods[6],
|
||||
likelihoods[7],
|
||||
likelihoods[8],
|
||||
likelihoods[9]));
|
||||
locus.getBases(),
|
||||
call.getConfidenceScore().getScore(),
|
||||
locus.getConfidenceScore().getScore(),
|
||||
call.getLikelihoods()[0],
|
||||
call.getLikelihoods()[1],
|
||||
call.getLikelihoods()[2],
|
||||
call.getLikelihoods()[3],
|
||||
call.getLikelihoods()[4],
|
||||
call.getLikelihoods()[5],
|
||||
call.getLikelihoods()[6],
|
||||
call.getLikelihoods()[7],
|
||||
call.getLikelihoods()[8],
|
||||
call.getLikelihoods()[9]));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ import org.broadinstitute.sting.utils.genotype.*;
|
|||
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -106,18 +105,10 @@ public class GLFWriter implements GenotypeWriter {
|
|||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
//TODO: CODEWORD
|
||||
// this is to perserve the format string that we used to use
|
||||
double[] posteriors = new double[10];
|
||||
int index = 0;
|
||||
List<Genotype> lt = locus.getLexigraphicallySortedGenotypes();
|
||||
for (Genotype G: lt) {
|
||||
posteriors[index] = G.getLikelihood();
|
||||
index++;
|
||||
}
|
||||
|
||||
LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRMSMappingQuals(),locus.getReferencebase(),locus.getReadDepth(),obj);
|
||||
SSGGenotypeCall call = (SSGGenotypeCall)locus;
|
||||
LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
// TODO: fix me aaron
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)0.0,locus.getReferencebase(),0,obj);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue