Fix all-bases-mode and genotype-mode in the UG and add integration tests for them.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2295 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-09 17:41:30 +00:00
parent 4e54b91ce4
commit 40c2d7a4bc
5 changed files with 50 additions and 27 deletions

View File

@ -44,9 +44,8 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
} }
// are we above the lod threshold for emitting calls (and not in all-bases mode)? // are we above the lod threshold for emitting calls (and not in all-bases mode)?
if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) { if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestIsRef) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
return new Pair<VariationCall, List<Genotype>>(null, null); return new Pair<VariationCall, List<Genotype>>(null, null);
}
// generate the calls // generate the calls
List<Genotype> calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts); List<Genotype> calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts);

View File

@ -51,22 +51,18 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// if there are no non-ref bases... // if there are no non-ref bases...
if ( bestAlternateAllele == null ) { if ( bestAlternateAllele == null ) {
// if we don't want all bases, then we can just return // if we don't want all bases, then we can just return
if ( !ALL_BASE_MODE ) if ( !ALL_BASE_MODE && !GENOTYPE_MODE )
return new Pair<VariationCall, List<Genotype>>(null, null); return new Pair<VariationCall, List<Genotype>>(null, null);
// otherwise, we care about the ref base // otherwise, choose any alternate allele (it doesn't really matter)
bestAlternateAllele = ref; bestAlternateAllele = (ref != 'A' ? 'A' : 'C');
// TODO -- figure out what to do here!
} }
else {
initializeAlleleFrequencies(frequencyEstimationPoints);
initialize(ref, contexts, StratifiedContext.OVERALL); initializeAlleleFrequencies(frequencyEstimationPoints);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.OVERALL);
calculatePofFs(ref, frequencyEstimationPoints); initialize(ref, contexts, StratifiedContext.OVERALL);
} calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.OVERALL);
calculatePofFs(ref, frequencyEstimationPoints);
// print out stats if we have a writer // print out stats if we have a writer
if ( verboseWriter != null ) if ( verboseWriter != null )
@ -317,13 +313,24 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// only need to look at the most likely alternate allele // only need to look at the most likely alternate allele
int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele); int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele);
double phredScaledConfidence = QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[indexOfMax][0]);
if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * log10PofDgivenAFi[indexOfMax][0];
int bestAFguess = Utils.findIndexOfMaxEntry(alleleFrequencyPosteriors[indexOfMax]); int bestAFguess = Utils.findIndexOfMaxEntry(alleleFrequencyPosteriors[indexOfMax]);
double phredScaledConfidence;
if ( bestAFguess != 0 ) {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[indexOfMax][0]);
if ( Double.isInfinite(phredScaledConfidence) )
phredScaledConfidence = -10.0 * log10PofDgivenAFi[indexOfMax][0];
} else {
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofFs[indexOfMax]);
if ( Double.isInfinite(phredScaledConfidence) ) {
double sum = 0.0;
for (int i = 1; i < frequencyEstimationPoints; i++)
sum += log10PofDgivenAFi[indexOfMax][i];
phredScaledConfidence = -10.0 * sum;
}
}
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
if ( !ALL_BASE_MODE && (bestAFguess == 0 || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
return new Pair<VariationCall, List<Genotype>>(null, null); return new Pair<VariationCall, List<Genotype>>(null, null);
// populate the sample-specific data // populate the sample-specific data
@ -333,8 +340,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// *** note that calculating strand bias involves overwriting data structures, so we do that last // *** note that calculating strand bias involves overwriting data structures, so we do that last
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP); VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP);
if ( locusdata != null ) { if ( locusdata != null ) {
locusdata.addAlternateAllele(bestAlternateAllele.toString()); if ( bestAFguess != 0 ) {
locusdata.setNonRefAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1)); locusdata.addAlternateAllele(bestAlternateAllele.toString());
locusdata.setNonRefAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
}
if ( locusdata instanceof ConfidenceBacked ) { if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
} }

View File

@ -67,9 +67,8 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
} }
// are we above the lod threshold for emitting calls (and not in all-bases mode)? // are we above the lod threshold for emitting calls (and not in all-bases mode)?
if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) { if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestIsRef) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
return new Pair<VariationCall, List<Genotype>>(null, null); return new Pair<VariationCall, List<Genotype>>(null, null);
}
// we can now create the genotype call object // we can now create the genotype call object
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation()); GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());

View File

@ -48,10 +48,10 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false) @Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes or just the variants?", required = false) @Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public boolean GENOTYPE = false; public boolean GENOTYPE = false;
@Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output nonconfident variant or confident ref calls too?", required = false) @Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output all callable bases?", required = false)
public boolean ALL_BASES = false; public boolean ALL_BASES = false;
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false) @Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)

View File

@ -101,6 +101,22 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
executeTest("testSingleSamplePilot2 - Joint Estimate", spec); executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
} }
@Test
public void testGenotypeModeJoint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
Arrays.asList("6971e0bfa524c0e006b3c3ccef52520a"));
executeTest("testGenotypeMode - Joint Estimate", spec);
}
@Test
public void testAllBasesModeJoint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
Arrays.asList("9f54482c1594bdd1e28b4cf2e51f944f"));
executeTest("testAllBasesMode - Joint Estimate", spec);
}
//@Test //@Test
//public void testGLF() { //public void testGLF() {
// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( // WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(