small code cleanup, a couple of little changes to SSGGenotypeCall
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1343 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fbc7d44bc7
commit
0087234ed7
|
|
@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
import org.broadinstitute.sting.playground.utils.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
|
|
@ -167,7 +166,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
|||
int offset = offsets.get(i);
|
||||
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
||||
}
|
||||
G.ApplyPrior(ref, allele_likelihoods);
|
||||
G.applyPrior(ref, allele_likelihoods);
|
||||
|
||||
// Handle indels
|
||||
if (CALL_INDELS)
|
||||
|
|
|
|||
|
|
@ -14,12 +14,14 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||
public class SingleSampleGenotyper extends LocusWalker<org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall, GenotypeWriter> {
|
||||
public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, GenotypeWriter> {
|
||||
// Control output settings
|
||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
|
||||
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
|
||||
|
|
@ -131,14 +133,14 @@ public class SingleSampleGenotyper extends LocusWalker<org.broadinstitute.sting.
|
|||
*
|
||||
* @return an AlleleFrequencyEstimate object
|
||||
*/
|
||||
public org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
rationalizeSampleName(context.getReads().get(0));
|
||||
if (context.getLocation().getStart() == 73) {
|
||||
int stop = 1;
|
||||
}
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
GenotypeLikelihoods G = callGenotype(tracker);
|
||||
org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall geno = (org.broadinstitute.sting.utils.genotype.calls.SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
|
||||
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
|
||||
if (geno != null) {
|
||||
metricsOut.nextPosition(geno, tracker);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -80,7 +80,15 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
private double priorHomVar;
|
||||
private double[] oneHalfMinusData;
|
||||
private boolean threeBaseErrors = false;
|
||||
private boolean discoveryMode = false;
|
||||
|
||||
/**
|
||||
* set the mode to discovery
|
||||
* @param isInDiscoveryMode
|
||||
*/
|
||||
public void setDiscovery(boolean isInDiscoveryMode) {
|
||||
discoveryMode = isInDiscoveryMode;
|
||||
}
|
||||
// Store the 2nd-best base priors for on-genotype primary bases
|
||||
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
|
||||
|
||||
|
|
@ -287,7 +295,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
return s;
|
||||
}
|
||||
|
||||
public void ApplyPrior(char ref, double[] allele_likelihoods) {
|
||||
public void applyPrior(char ref, double[] allele_likelihoods) {
|
||||
int k = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = i; j < 4; j++) {
|
||||
|
|
@ -302,7 +310,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
this.sort();
|
||||
}
|
||||
|
||||
public void ApplyPrior(char ref) {
|
||||
public void applyPrior(char ref) {
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
|
|
@ -420,8 +428,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
*/
|
||||
@Override
|
||||
public GenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
||||
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
|
||||
|
||||
filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
|
||||
|
||||
// for calculating the rms of the mapping qualities
|
||||
double squared = 0.0;
|
||||
|
|
@ -436,13 +443,8 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
// save off the likelihoods
|
||||
if (likelihoods == null || likelihoods.length == 0) return null;
|
||||
|
||||
double lklihoods[] = new double[likelihoods.length];
|
||||
|
||||
System.arraycopy(likelihoods, 0, lklihoods, 0, likelihoods.length);
|
||||
|
||||
|
||||
ApplyPrior(ref);
|
||||
|
||||
// Apply the two calculations
|
||||
applyPrior(ref);
|
||||
applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||
|
||||
// lets setup the locus
|
||||
|
|
@ -450,11 +452,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
for (int x = 0; x < this.likelihoods.length; x++) {
|
||||
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
|
||||
}
|
||||
return new SSGGenotypeCall(ref,2,pileup.getLocation(),lst,likelihoods,pileup);
|
||||
}
|
||||
//TODO: add this to the above code
|
||||
boolean discovery = false;
|
||||
public void setDiscovery(boolean isInDiscoveryMode) {
|
||||
discovery = isInDiscoveryMode;
|
||||
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,32 +22,55 @@ import java.util.TreeMap;
|
|||
* <p/>
|
||||
* Class GenotypeCallImpl
|
||||
* <p/>
|
||||
* The single sample genotypers implementation of the genotype call, plus
|
||||
* some extra booty for the genotype writers of this world
|
||||
* The single sample genotypers implementation of the genotype call, which contains
|
||||
* extra information for the various genotype outputs
|
||||
*/
|
||||
public class SSGGenotypeCall implements GenotypeCall {
|
||||
// TODO: make SSG into a more robust Genotype call interface
|
||||
// our stored genotype locus
|
||||
private final String mRefBase;
|
||||
private final int mPloidy;
|
||||
private final GenomeLoc mLoc;
|
||||
private TreeMap<Double, Genotype> mGenotypes = new TreeMap();
|
||||
private double mLikelihoods[];
|
||||
private double bestNext = 0;
|
||||
private double bestRef = 0;
|
||||
|
||||
private int readDepth;
|
||||
private double rmsMapping;
|
||||
private int readDepth = 0;
|
||||
private double rmsMapping = 0;
|
||||
|
||||
public SSGGenotypeCall(char mRefBase, int mPloidy, GenomeLoc mLoc, List<Genotype> genotypes, double likelihoods[], ReadBackedPileup pileup) {
|
||||
/**
|
||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||
*
|
||||
* @param discoveryMode are we in discovery mode or genotyping mode
|
||||
* @param mRefBase the ref base
|
||||
* @param mPloidy the ploidy
|
||||
* @param genotypes the genotype
|
||||
* @param likelihoods the likelihood
|
||||
* @param pileup the pile-up of reads at the specified locus
|
||||
*/
|
||||
public SSGGenotypeCall(boolean discoveryMode, char mRefBase, int mPloidy, List<Genotype> genotypes, double likelihoods[], ReadBackedPileup pileup) {
|
||||
this.mRefBase = String.valueOf(mRefBase).toUpperCase();
|
||||
this.mPloidy = mPloidy;
|
||||
this.mLoc = mLoc;
|
||||
if (genotypes.size() < 1) throw new IllegalArgumentException("Genotypes list size must be greater than 0");
|
||||
int index = 0;
|
||||
String refStr = Utils.dupString(mRefBase, mPloidy).toUpperCase();
|
||||
calculateBestNext(discoveryMode, genotypes, likelihoods, refStr);
|
||||
|
||||
this.readDepth = pileup.getReads().size();
|
||||
rmsMapping = Math.sqrt(calculateRMS(pileup) / readDepth);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* calculate the best and next best, and update the stored confidence scores based on these values
|
||||
*
|
||||
* @param discoveryMode are we in discovery mode?
|
||||
* @param genotypes the genotypes
|
||||
* @param likelihoods the likelihoods corresponding to the genotypes
|
||||
* @param refStr the reference string, i.e. reb base[ploidy]
|
||||
*/
|
||||
private void calculateBestNext(boolean discoveryMode, List<Genotype> genotypes, double[] likelihoods, String refStr) {
|
||||
int index = 0;
|
||||
double ref = 0.0;
|
||||
double best = Double.NEGATIVE_INFINITY; // plus one
|
||||
double best = Double.NEGATIVE_INFINITY;
|
||||
double next = Double.NEGATIVE_INFINITY;
|
||||
for (Genotype g : genotypes) {
|
||||
if (g.getBases().toUpperCase().equals(refStr)) ref = likelihoods[index];
|
||||
|
|
@ -61,16 +84,23 @@ public class SSGGenotypeCall implements GenotypeCall {
|
|||
bestRef = Math.abs(best - ref);
|
||||
mLikelihoods = likelihoods;
|
||||
index = 0;
|
||||
for (Genotype g : genotypes) {
|
||||
((BasicGenotype)g).setConfidenceScore( new BayesianConfidenceScore(Math.abs(likelihoods[index] - ref)));
|
||||
mGenotypes.put(likelihoods[index],g);
|
||||
index++;
|
||||
}
|
||||
|
||||
this.readDepth = pileup.getReads().size();
|
||||
rmsMapping = Math.sqrt(calculateRMS(pileup) / readDepth);
|
||||
// reset the confidence based on either the discovery mode or the genotype mode
|
||||
for (Genotype g : genotypes) {
|
||||
double val = (discoveryMode) ? Math.abs(likelihoods[index] - best) : Math.abs(likelihoods[index] - ref);
|
||||
((BasicGenotype) g).setConfidenceScore(new BayesianConfidenceScore(val));
|
||||
mGenotypes.put(likelihoods[index], g);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* calculate the rms , given the read pileup
|
||||
*
|
||||
* @param pileup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private double calculateRMS(ReadBackedPileup pileup) {
|
||||
double rms = 0.0;
|
||||
for (SAMRecord r : pileup.getReads()) {
|
||||
|
|
@ -174,6 +204,7 @@ public class SSGGenotypeCall implements GenotypeCall {
|
|||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base or bases
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ public class GLFWriter implements GenotypeWriter {
|
|||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
SSGGenotypeCall call = (SSGGenotypeCall)locus;
|
||||
LikelihoodObject obj = new LikelihoodObject(call.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
// TODO: fix me aaron
|
||||
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG); // transform! ... to negitive log likelihoods
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)0.0,locus.getReferencebase(),0,obj);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue