small code cleanup, a couple of little changes to SSGGenotypeCall

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1343 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-30 19:47:37 +00:00
parent fbc7d44bc7
commit 0087234ed7
5 changed files with 71 additions and 41 deletions

View File

@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.playground.utils.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.ReadBackedPileup;
@ -167,7 +166,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
int offset = offsets.get(i);
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
}
G.ApplyPrior(ref, allele_likelihoods);
G.applyPrior(ref, allele_likelihoods);
// Handle indels
if (CALL_INDELS)

View File

@ -14,12 +14,14 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
import java.io.File;
@ReadFilters(ZeroMappingQualityReadFilter.class)
public class SingleSampleGenotyper extends LocusWalker<org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall, GenotypeWriter> {
public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, GenotypeWriter> {
// Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
@ -131,14 +133,14 @@ public class SingleSampleGenotyper extends LocusWalker<org.broadinstitute.sting.
*
* @return an AlleleFrequencyEstimate object
*/
public org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
rationalizeSampleName(context.getReads().get(0));
if (context.getLocation().getStart() == 73) {
int stop = 1;
}
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
GenotypeLikelihoods G = callGenotype(tracker);
org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall geno = (org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
if (geno != null) {
metricsOut.nextPosition(geno, tracker);
}

View File

@ -80,7 +80,15 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
private double priorHomVar;
private double[] oneHalfMinusData;
private boolean threeBaseErrors = false;
private boolean discoveryMode = false;
/**
* set the mode to discovery
* @param isInDiscoveryMode
*/
public void setDiscovery(boolean isInDiscoveryMode) {
discoveryMode = isInDiscoveryMode;
}
// Store the 2nd-best base priors for on-genotype primary bases
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
@ -287,7 +295,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
return s;
}
public void ApplyPrior(char ref, double[] allele_likelihoods) {
public void applyPrior(char ref, double[] allele_likelihoods) {
int k = 0;
for (int i = 0; i < 4; i++) {
for (int j = i; j < 4; j++) {
@ -302,7 +310,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
this.sort();
}
public void ApplyPrior(char ref) {
public void applyPrior(char ref) {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
@ -420,8 +428,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
*/
@Override
public GenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
@ -436,13 +443,8 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
// save off the likelihoods
if (likelihoods == null || likelihoods.length == 0) return null;
double lklihoods[] = new double[likelihoods.length];
System.arraycopy(likelihoods, 0, lklihoods, 0, likelihoods.length);
ApplyPrior(ref);
// Apply the two calculations
applyPrior(ref);
applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
// lets setup the locus
@ -450,11 +452,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
for (int x = 0; x < this.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
}
return new SSGGenotypeCall(ref,2,pileup.getLocation(),lst,likelihoods,pileup);
}
//TODO: add this to the above code
boolean discovery = false;
public void setDiscovery(boolean isInDiscoveryMode) {
discovery = isInDiscoveryMode;
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
}
}

View File

@ -22,32 +22,55 @@ import java.util.TreeMap;
* <p/>
* Class GenotypeCallImpl
* <p/>
* The single sample genotypers implementation of the genotype call, plus
* some extra booty for the genotype writers of this world
* The single sample genotypers implementation of the genotype call, which contains
* extra information for the various genotype outputs
*/
public class SSGGenotypeCall implements GenotypeCall {
// TODO: make SSG into a more robust Genotype call interface
// our stored genotype locus
private final String mRefBase;
private final int mPloidy;
private final GenomeLoc mLoc;
private TreeMap<Double, Genotype> mGenotypes = new TreeMap();
private double mLikelihoods[];
private double bestNext = 0;
private double bestRef = 0;
private int readDepth;
private double rmsMapping;
private int readDepth = 0;
private double rmsMapping = 0;
public SSGGenotypeCall(char mRefBase, int mPloidy, GenomeLoc mLoc, List<Genotype> genotypes, double likelihoods[], ReadBackedPileup pileup) {
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
* @param discoveryMode are we in discovery mode or genotyping mode
* @param mRefBase the ref base
* @param mPloidy the ploidy
* @param genotypes the genotype
* @param likelihoods the likelihood
* @param pileup the pile-up of reads at the specified locus
*/
public SSGGenotypeCall(boolean discoveryMode, char mRefBase, int mPloidy, List<Genotype> genotypes, double likelihoods[], ReadBackedPileup pileup) {
this.mRefBase = String.valueOf(mRefBase).toUpperCase();
this.mPloidy = mPloidy;
this.mLoc = mLoc;
if (genotypes.size() < 1) throw new IllegalArgumentException("Genotypes list size must be greater than 0");
int index = 0;
String refStr = Utils.dupString(mRefBase, mPloidy).toUpperCase();
calculateBestNext(discoveryMode, genotypes, likelihoods, refStr);
this.readDepth = pileup.getReads().size();
rmsMapping = Math.sqrt(calculateRMS(pileup) / readDepth);
}
/**
* calculate the best and next best, and update the stored confidence scores based on these values
*
* @param discoveryMode are we in discovery mode?
* @param genotypes the genotypes
* @param likelihoods the likelihoods corresponding to the genotypes
* @param refStr the reference string, i.e. reb base[ploidy]
*/
private void calculateBestNext(boolean discoveryMode, List<Genotype> genotypes, double[] likelihoods, String refStr) {
int index = 0;
double ref = 0.0;
double best = Double.NEGATIVE_INFINITY; // plus one
double best = Double.NEGATIVE_INFINITY;
double next = Double.NEGATIVE_INFINITY;
for (Genotype g : genotypes) {
if (g.getBases().toUpperCase().equals(refStr)) ref = likelihoods[index];
@ -61,16 +84,23 @@ public class SSGGenotypeCall implements GenotypeCall {
bestRef = Math.abs(best - ref);
mLikelihoods = likelihoods;
index = 0;
for (Genotype g : genotypes) {
((BasicGenotype)g).setConfidenceScore( new BayesianConfidenceScore(Math.abs(likelihoods[index] - ref)));
mGenotypes.put(likelihoods[index],g);
index++;
}
this.readDepth = pileup.getReads().size();
rmsMapping = Math.sqrt(calculateRMS(pileup) / readDepth);
// reset the confidence based on either the discovery mode or the genotype mode
for (Genotype g : genotypes) {
double val = (discoveryMode) ? Math.abs(likelihoods[index] - best) : Math.abs(likelihoods[index] - ref);
((BasicGenotype) g).setConfidenceScore(new BayesianConfidenceScore(val));
mGenotypes.put(likelihoods[index], g);
index++;
}
}
/**
* calculate the rms , given the read pileup
*
* @param pileup
*
* @return
*/
private double calculateRMS(ReadBackedPileup pileup) {
double rms = 0.0;
for (SAMRecord r : pileup.getReads()) {
@ -174,6 +204,7 @@ public class SSGGenotypeCall implements GenotypeCall {
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base or bases
*
* @return true if we're a variant
*/
@Override

View File

@ -111,7 +111,7 @@ public class GLFWriter implements GenotypeWriter {
public void addGenotypeCall(GenotypeCall locus) {
SSGGenotypeCall call = (SSGGenotypeCall)locus;
LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
// TODO: fix me aaron
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); // transform! ... to negitive log likelihoods
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)0.0,locus.getReferencebase(),0,obj);
}