Two ugly hopefully temporary fixes for new genotyping model:

a) In Indel genotyper: we can't deal yet with extended events correctly and we are still triggering at each extended event which results in repeated records on a vcf. So, to avoid this, keep track of start position of candidate variantes we've visited and if we've visited a variant before we don't do it again.
b) Avoid infinite terms in QUAL and in genotype likelihoods which can happen if posterior AF happens to be exactly zero. For now, hard-code a minimum value of each term of the posterior AF likelihood to be -300 (ie 1e-300 in lin space). This can be solved with better and smarter log-to-lin conversions and some precision fixes in AF calculation.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4455 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2010-10-08 00:53:16 +00:00
parent 0a2e76e9dc
commit 3838823262
2 changed files with 39 additions and 6 deletions

View File

@ -55,11 +55,13 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
boolean DEBUGOUT = false;
private HaplotypeIndelErrorModel model;
private ArrayList<Integer> sitesVisited;
protected DindelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
model = new HaplotypeIndelErrorModel(maxReadDeletionLength, insertionStartProbability,
insertionEndProbability, alphaDeletionProbability, HAPLOTYPE_SIZE, false, DEBUGOUT);
sitesVisited = new ArrayList<Integer>();
}
public Allele getLikelihoods(RefMetaDataTracker tracker,
@ -82,9 +84,15 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
if (!vc.isIndel())
return null;
if (sitesVisited.contains(new Integer(vc.getStart())))
return null;
sitesVisited.add(new Integer(vc.getStart()));
if ( !(priors instanceof DiploidIndelGenotypePriors) )
throw new StingException("Only diploid-based Indel priors are supported in the DINDEL GL model");
int eventLength = vc.getReference().getBaseString().length() - vc.getAlternateAllele(0).getBaseString().length();
// assume only one alt allele for now
if (eventLength<0)

View File

@ -43,7 +43,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private boolean DEBUGOUT = false;
private boolean SIMPLE_GREEDY_GENOTYPER = false;
private static final double EPS = 1e-300;
private static final double LOGEPS = -300;
protected ExactAFCalculationModel(int N, Logger logger, PrintStream verboseWriter) {
super(N, logger, verboseWriter);
}
@ -60,15 +62,20 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
alleleFrequencyPosteriors = updateAFEstimate(GLs, alleleFrequencyPriors);
double meanAF = computeMeanAF(alleleFrequencyPosteriors);
if (DEBUGOUT)
if (DEBUGOUT) {
double meanAF = computeMeanAF(alleleFrequencyPosteriors);
System.out.format("Mean AF: %5.4f. PVariant: %5.5f\n", meanAF,1.0-alleleFrequencyPosteriors[0]);
}
for (int k=0; k < alleleFrequencyPosteriors.length; k++) {
log10AlleleFrequencyPosteriors[k] = Math.log10(alleleFrequencyPosteriors[k]);
if (alleleFrequencyPosteriors[k] > 1-EPS)
log10AlleleFrequencyPosteriors[k] = -EPS;
else if (alleleFrequencyPosteriors[k] < EPS)
log10AlleleFrequencyPosteriors[k] = LOGEPS;
else
log10AlleleFrequencyPosteriors[k] = Math.log10(alleleFrequencyPosteriors[k]);
}
}
@ -131,6 +138,24 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
/**
* Can be overridden by concrete subclasses
* @param contexts alignment contexts
* @param GLs genotype likelihoods
* @param log10AlleleFrequencyPosteriors allele frequency results
* @param AFofMaxLikelihood allele frequency of max likelihood
*
* @return calls
*/
public Map<String, Genotype> assignGenotypes(Map<String, StratifiedAlignmentContext> contexts,
Map<String, BiallelicGenotypeLikelihoods> GLs,
double[] log10AlleleFrequencyPosteriors,
int AFofMaxLikelihood) {
// overriding implementation in AlleleFrequencyCalculationModel to avoid filling out AF matrix which is not used.
return generateCalls(contexts, GLs, AFofMaxLikelihood);
}
protected Map<String, Genotype> generateCalls(Map<String, StratifiedAlignmentContext> contexts,
Map<String, BiallelicGenotypeLikelihoods> GLs,
int bestAFguess) {
@ -242,4 +267,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
return calls;
}
}
}