1. Print out allele frequency range (from joint estimation model only).

2. Don't print verbose output from SLOD calculation (it's just a repeat of previous output).



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2032 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-13 03:59:13 +00:00
parent 72825c4848
commit 555fb975de
6 changed files with 92 additions and 8 deletions

View File

@ -38,6 +38,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected int POOL_SIZE;
protected double CONFIDENCE_THRESHOLD;
protected double MINIMUM_ALLELE_FREQUENCY;
protected double ALLELE_FREQUENCY_RANGE;
protected int maxDeletionsInPileup;
protected String assumedSingleSample;
protected PrintWriter verboseWriter;
@ -70,6 +71,9 @@ public abstract class GenotypeCalculationModel implements Cloneable {
POOL_SIZE = UAC.POOLSIZE;
CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD;
MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
ALLELE_FREQUENCY_RANGE = UAC.ALLELE_FREQUENCY_RANGE;
if ( ALLELE_FREQUENCY_RANGE < 0.0 || ALLELE_FREQUENCY_RANGE > 1.0 )
throw new StingException("Allele frequency fraction must be a value between 0.0 and 1.0");
maxDeletionsInPileup = UAC.MAX_DELETIONS;
assumedSingleSample = UAC.ASSUME_SINGLE_SAMPLE;
if ( UAC.VERBOSE != null ) {

View File

@ -212,7 +212,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
return HWvalues;
}
private void normalizeFromLog10(double[] array) {
private static void normalizeFromLog10(double[] array) {
// for precision purposes, we need to add (or really subtract, since they're
// all negative) the largest value; also, we need to convert to normal-space.
double maxValue = findMaxEntry(array).first;
@ -298,7 +298,6 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
if ( !ALL_BASE_MODE && phredScaledConfidence < CONFIDENCE_THRESHOLD )
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);
ArrayList<Genotype> calls = new ArrayList<Genotype>();
// first, populate the sample-specific data
@ -332,7 +331,10 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
}
if ( locusdata instanceof AlleleFrequencyBacked ) {
((AlleleFrequencyBacked)locusdata).setAlleleFrequency(bestAFguess);
int bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second;
((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
AlleleFrequencyBacked.AlleleFrequencyRange range = computeAFrange(alleleFrequencyPosteriors[indexOfMax], frequencyEstimationPoints-1, bestAFguess, ALLELE_FREQUENCY_RANGE);
((AlleleFrequencyBacked)locusdata).setAlleleFrequencyRange(range);
}
if ( locusdata instanceof IDBacked ) {
rodDbSNP dbsnp = getDbSNP(tracker);
@ -404,13 +406,13 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
// the forward lod
initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.FORWARD);
calculateAlleleFrequencyPosteriors(ref, loc);
calculateAlleleFrequencyPosteriors(ref, null);
double forwardLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double forwardLog10PofF = Math.log10(PofFs[indexOfMax]);
// the reverse lod
initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.REVERSE);
calculateAlleleFrequencyPosteriors(ref, loc);
calculateAlleleFrequencyPosteriors(ref, null);
double reverseLog10PofNull = Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
double reverseLog10PofF = Math.log10(PofFs[indexOfMax]);
@ -430,4 +432,23 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
return new Pair<List<Genotype>, GenotypeLocusData>(calls, locusdata);
}
// computes the range of allele frequencies making up the given fraction of the total probability
private static AlleleFrequencyBacked.AlleleFrequencyRange computeAFrange(double[] alleleFrequencyProbs, int N, int bestAFguess, double fraction) {
double totalProb = alleleFrequencyProbs[bestAFguess];
int lowIndex = bestAFguess;
int highIndex = bestAFguess;
while ( totalProb < fraction ) {
if ( lowIndex > 1 ) {
lowIndex--;
totalProb += alleleFrequencyProbs[lowIndex];
}
if ( highIndex < N ) {
highIndex++;
totalProb += alleleFrequencyProbs[highIndex];
}
}
return new AlleleFrequencyBacked.AlleleFrequencyRange((double)lowIndex / (double)N, (double)highIndex / (double)N, fraction);
}
}

View File

@ -83,6 +83,9 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "max_coverage", shortName = "coverage", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false)
public Integer MAX_READS_IN_PILEUP = 10000;
@Argument(fullName = "min_allele_frequency", shortName = "minFreq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
@Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
public double MINIMUM_ALLELE_FREQUENCY = 1e-8;
@Argument(fullName = "allele_frequency_range", shortName = "freq_range", doc = "The range/fraction to emit of the total probability over all frequencies (in JOINT_ESTIMATION model only)", required = false)
public double ALLELE_FREQUENCY_RANGE = 0.95;
}

View File

@ -11,7 +11,7 @@ public interface AlleleFrequencyBacked {
/**
*
* @return returns the allele frequency for this genotype
* @return returns the best allele frequency for this genotype
*/
public double getAlleleFrequency();
@ -21,4 +21,39 @@ public interface AlleleFrequencyBacked {
*/
public void setAlleleFrequency(double frequency);
/**
*
* @return returns the allele frequency for this genotype
*/
public AlleleFrequencyRange getAlleleFrequencyRange();
/**
*
* @param range the allele frequency range for this genotype
*/
public void setAlleleFrequencyRange(AlleleFrequencyRange range);
/**
* A class representing a range of allele frequencies that make up a given
* fraction of the total probability over all frequencies.
*/
public class AlleleFrequencyRange {
private double mLow, mHigh, mFraction;
public AlleleFrequencyRange(double low, double high, double fraction) {
mLow = low;
mHigh = high;
mFraction = fraction;
}
public double getLowEnd() { return mLow; }
public double getHighEnd() { return mHigh; }
public double getPercentageOfProbability() { return mFraction; }
public String toString() {
return String.format("%.2f-%.2f,%.0f%%", mLow, mHigh, 100*mFraction);
}
}
}

View File

@ -21,8 +21,9 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
// the strand score lod
private double mSLOD = 0.0;
// the allele frequency
// the allele frequency fields
private double mAlleleFrequency = 0.0;
private AlleleFrequencyRange mAFrange = null;
// the location
private GenomeLoc mLoc;
@ -115,6 +116,23 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
mAlleleFrequency = frequency;
}
/**
* get the allele frequency range
*
* @return the allele frequency range
*/
public AlleleFrequencyRange getAlleleFrequencyRange() {
return mAFrange;
}
/**
*
* @param range the allele frequency range for this genotype
*/
public void setAlleleFrequencyRange(AlleleFrequencyRange range) {
mAFrange = range;
}
/**
* @return returns the dbsnp id for this genotype
*/

View File

@ -189,6 +189,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
if ( locusdata != null ) {
infoFields.put("SB", String.format("%.2f", locusdata.getSLOD()));
infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency()));
AlleleFrequencyBacked.AlleleFrequencyRange range = locusdata.getAlleleFrequencyRange();
if ( range != null )
infoFields.put("AFrange", range.toString());
Map<String, String> otherFields = locusdata.getFields();
if ( otherFields != null ) {
infoFields.putAll(otherFields);