Implementing reference confidence estimate in UGv2 as per UGv1
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4542 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cf9c9ae241
commit
225cf49128
|
|
@ -79,6 +79,9 @@ public class UnifiedGenotyperEngine {
|
||||||
// the priors object
|
// the priors object
|
||||||
private GenotypePriors genotypePriors;
|
private GenotypePriors genotypePriors;
|
||||||
|
|
||||||
|
// samples in input
|
||||||
|
private Set<String> samples = new TreeSet<String>();
|
||||||
|
|
||||||
// the various loggers and writers
|
// the various loggers and writers
|
||||||
private Logger logger = null;
|
private Logger logger = null;
|
||||||
private PrintStream verboseWriter = null;
|
private PrintStream verboseWriter = null;
|
||||||
|
|
@ -98,16 +101,18 @@ public class UnifiedGenotyperEngine {
|
||||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||||
// get the number of samples
|
// get the number of samples
|
||||||
// if we're supposed to assume a single sample, do so
|
// if we're supposed to assume a single sample, do so
|
||||||
int numSamples;
|
samples = new TreeSet<String>();
|
||||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||||
numSamples = 1;
|
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||||
else
|
else
|
||||||
numSamples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size();
|
samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
|
||||||
initialize(toolkit, UAC, null, null, null, numSamples);
|
|
||||||
|
initialize(toolkit, UAC, null, null, null, samples.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) {
|
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
|
||||||
initialize(toolkit, UAC, logger, verboseWriter, engine, numSamples);
|
this.samples = new TreeSet<String>(samples);
|
||||||
|
initialize(toolkit, UAC, logger, verboseWriter, engine, samples.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) {
|
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) {
|
||||||
|
|
@ -153,7 +158,7 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
// estimate our confidence in a reference call and return
|
// estimate our confidence in a reference call and return
|
||||||
if ( GLs.size() == 0 )
|
if ( GLs.size() == 0 )
|
||||||
return estimateReferenceConfidence(stratifiedContexts, genotypePriors.getHeterozygosity(), false);
|
return estimateReferenceConfidence(stratifiedContexts, genotypePriors.getHeterozygosity(), false, 1.0);
|
||||||
|
|
||||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||||
|
|
@ -194,7 +199,7 @@ public class UnifiedGenotyperEngine {
|
||||||
if ( !UAC.ALL_BASES_MODE && !passesEmitThreshold(phredScaledConfidence, bestAFguess, atTriggerTrack) ) {
|
if ( !UAC.ALL_BASES_MODE && !passesEmitThreshold(phredScaledConfidence, bestAFguess, atTriggerTrack) ) {
|
||||||
// technically, at this point our confidence in a reference call isn't accurately estimated
|
// technically, at this point our confidence in a reference call isn't accurately estimated
|
||||||
// because it didn't take into account samples with no data, so let's get a better estimate
|
// because it didn't take into account samples with no data, so let's get a better estimate
|
||||||
return estimateReferenceConfidence(stratifiedContexts, genotypePriors.getHeterozygosity(), true);
|
return estimateReferenceConfidence(stratifiedContexts, genotypePriors.getHeterozygosity(), true, 1.0 - PofF);
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the genotypes
|
// create the genotypes
|
||||||
|
|
@ -356,26 +361,19 @@ public class UnifiedGenotyperEngine {
|
||||||
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||||
}
|
}
|
||||||
|
|
||||||
private VariantCallContext estimateReferenceConfidence(Map<String, StratifiedAlignmentContext> contexts, double theta, boolean ignoreCoveredSamples) {
|
private VariantCallContext estimateReferenceConfidence(Map<String, StratifiedAlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
|
||||||
|
|
||||||
// TODO: implement me
|
double P_of_ref = initialPofRef;
|
||||||
|
|
||||||
double P_of_ref = 1.0;
|
|
||||||
|
|
||||||
// use the AF=0 prob if it's calculated
|
|
||||||
//if ( ignoreCoveredSamples )
|
|
||||||
// P_of_ref = 1.0 - PofFs[BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele)];
|
|
||||||
|
|
||||||
// for each sample that we haven't examined yet
|
// for each sample that we haven't examined yet
|
||||||
//for ( String sample : samples ) {
|
for ( String sample : samples ) {
|
||||||
// boolean isCovered = contexts.containsKey(sample);
|
boolean isCovered = contexts.containsKey(sample);
|
||||||
// if ( ignoreCoveredSamples && isCovered )
|
if ( ignoreCoveredSamples && isCovered )
|
||||||
// continue;
|
continue;
|
||||||
|
|
||||||
P_of_ref = 0.5;
|
int depth = isCovered ? contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().size() : 0;
|
||||||
// int depth = isCovered ? contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().size() : 0;
|
P_of_ref *= 1.0 - (theta / 2.0) * MathUtils.binomialProbability(0, depth, 0.5);
|
||||||
// P_of_ref *= 1.0 - (theta / 2.0) * MathUtils.binomialProbability(0, depth, 0.5);
|
}
|
||||||
//}
|
|
||||||
|
|
||||||
return new VariantCallContext(QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING);
|
return new VariantCallContext(QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -78,9 +78,6 @@ public class UnifiedGenotyperV2 extends LocusWalker<VariantCallContext, UnifiedG
|
||||||
// the annotation engine
|
// the annotation engine
|
||||||
private VariantAnnotatorEngine annotationEngine;
|
private VariantAnnotatorEngine annotationEngine;
|
||||||
|
|
||||||
// samples in input
|
|
||||||
private Set<String> samples = new TreeSet<String>();
|
|
||||||
|
|
||||||
// enable deletions in the pileup
|
// enable deletions in the pileup
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
||||||
|
|
@ -118,6 +115,7 @@ public class UnifiedGenotyperV2 extends LocusWalker<VariantCallContext, UnifiedG
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
// get all of the unique sample names
|
// get all of the unique sample names
|
||||||
// if we're supposed to assume a single sample, do so
|
// if we're supposed to assume a single sample, do so
|
||||||
|
Set<String> samples = new TreeSet<String>();
|
||||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||||
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||||
else
|
else
|
||||||
|
|
@ -128,7 +126,7 @@ public class UnifiedGenotyperV2 extends LocusWalker<VariantCallContext, UnifiedG
|
||||||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
|
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
|
||||||
|
|
||||||
annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse);
|
annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse);
|
||||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples.size());
|
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
|
||||||
|
|
||||||
// initialize the header
|
// initialize the header
|
||||||
writer.writeHeader(new VCFHeader(getHeaderInfo(), samples)) ;
|
writer.writeHeader(new VCFHeader(getHeaderInfo(), samples)) ;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue