Fix all-bases-mode and genotype-mode in the UG and add integration tests for them.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2295 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4e54b91ce4
commit
40c2d7a4bc
|
|
@ -44,9 +44,8 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
}
|
||||
|
||||
// are we above the lod threshold for emitting calls (and not in all-bases mode)?
|
||||
if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) {
|
||||
return new Pair<VariationCall, List<Genotype>>(null, null);
|
||||
}
|
||||
if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestIsRef) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
|
||||
return new Pair<VariationCall, List<Genotype>>(null, null);
|
||||
|
||||
// generate the calls
|
||||
List<Genotype> calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts);
|
||||
|
|
|
|||
|
|
@ -51,22 +51,18 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
// if there are no non-ref bases...
|
||||
if ( bestAlternateAllele == null ) {
|
||||
// if we don't want all bases, then we can just return
|
||||
if ( !ALL_BASE_MODE )
|
||||
if ( !ALL_BASE_MODE && !GENOTYPE_MODE )
|
||||
return new Pair<VariationCall, List<Genotype>>(null, null);
|
||||
|
||||
// otherwise, we care about the ref base
|
||||
bestAlternateAllele = ref;
|
||||
|
||||
// TODO -- figure out what to do here!
|
||||
|
||||
// otherwise, choose any alternate allele (it doesn't really matter)
|
||||
bestAlternateAllele = (ref != 'A' ? 'A' : 'C');
|
||||
}
|
||||
else {
|
||||
initializeAlleleFrequencies(frequencyEstimationPoints);
|
||||
|
||||
initialize(ref, contexts, StratifiedContext.OVERALL);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.OVERALL);
|
||||
calculatePofFs(ref, frequencyEstimationPoints);
|
||||
}
|
||||
initializeAlleleFrequencies(frequencyEstimationPoints);
|
||||
|
||||
initialize(ref, contexts, StratifiedContext.OVERALL);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.OVERALL);
|
||||
calculatePofFs(ref, frequencyEstimationPoints);
|
||||
|
||||
// print out stats if we have a writer
|
||||
if ( verboseWriter != null )
|
||||
|
|
@ -311,19 +307,30 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
protected List<Genotype> makeGenotypeCalls(char ref, char alt, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
|
||||
// by default, we return no genotypes
|
||||
return new ArrayList<Genotype>();
|
||||
}
|
||||
}
|
||||
|
||||
protected Pair<VariationCall, List<Genotype>> createCalls(RefMetaDataTracker tracker, char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc, int frequencyEstimationPoints) {
|
||||
// only need to look at the most likely alternate allele
|
||||
int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele);
|
||||
|
||||
double phredScaledConfidence = QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * log10PofDgivenAFi[indexOfMax][0];
|
||||
int bestAFguess = Utils.findIndexOfMaxEntry(alleleFrequencyPosteriors[indexOfMax]);
|
||||
double phredScaledConfidence;
|
||||
if ( bestAFguess != 0 ) {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
if ( Double.isInfinite(phredScaledConfidence) )
|
||||
phredScaledConfidence = -10.0 * log10PofDgivenAFi[indexOfMax][0];
|
||||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofFs[indexOfMax]);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
double sum = 0.0;
|
||||
for (int i = 1; i < frequencyEstimationPoints; i++)
|
||||
sum += log10PofDgivenAFi[indexOfMax][i];
|
||||
phredScaledConfidence = -10.0 * sum;
|
||||
}
|
||||
}
|
||||
|
||||
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
|
||||
if ( !ALL_BASE_MODE && (bestAFguess == 0 || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
|
||||
if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
|
||||
return new Pair<VariationCall, List<Genotype>>(null, null);
|
||||
|
||||
// populate the sample-specific data
|
||||
|
|
@ -333,8 +340,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP);
|
||||
if ( locusdata != null ) {
|
||||
locusdata.addAlternateAllele(bestAlternateAllele.toString());
|
||||
locusdata.setNonRefAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
|
||||
if ( bestAFguess != 0 ) {
|
||||
locusdata.addAlternateAllele(bestAlternateAllele.toString());
|
||||
locusdata.setNonRefAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
|
||||
}
|
||||
if ( locusdata instanceof ConfidenceBacked ) {
|
||||
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,9 +67,8 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
}
|
||||
|
||||
// are we above the lod threshold for emitting calls (and not in all-bases mode)?
|
||||
if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) {
|
||||
return new Pair<VariationCall, List<Genotype>>(null, null);
|
||||
}
|
||||
if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestIsRef) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
|
||||
return new Pair<VariationCall, List<Genotype>>(null, null);
|
||||
|
||||
// we can now create the genotype call object
|
||||
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
|
|
|
|||
|
|
@ -48,10 +48,10 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)
|
||||
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||
|
||||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes or just the variants?", required = false)
|
||||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
|
||||
public boolean GENOTYPE = false;
|
||||
|
||||
@Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output nonconfident variant or confident ref calls too?", required = false)
|
||||
@Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output all callable bases?", required = false)
|
||||
public boolean ALL_BASES = false;
|
||||
|
||||
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
||||
|
|
|
|||
|
|
@ -101,6 +101,22 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGenotypeModeJoint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
|
||||
Arrays.asList("6971e0bfa524c0e006b3c3ccef52520a"));
|
||||
executeTest("testGenotypeMode - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllBasesModeJoint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
|
||||
Arrays.asList("9f54482c1594bdd1e28b4cf2e51f944f"));
|
||||
executeTest("testAllBasesMode - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
//@Test
|
||||
//public void testGLF() {
|
||||
// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
|
|||
Loading…
Reference in New Issue