diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index 1f2e71bf9..37c3138d6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -44,9 +44,8 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode } // are we above the lod threshold for emitting calls (and not in all-bases mode)? - if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) { - return new Pair>(null, null); - } + if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestIsRef) || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) + return new Pair>(null, null); // generate the calls List calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index 6418915ad..d5a1e26ad 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -51,22 +51,18 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc // if there are no non-ref bases... if ( bestAlternateAllele == null ) { // if we don't want all bases, then we can just return - if ( !ALL_BASE_MODE ) + if ( !ALL_BASE_MODE && !GENOTYPE_MODE ) return new Pair>(null, null); - // otherwise, we care about the ref base - bestAlternateAllele = ref; - - // TODO -- figure out what to do here! - + // otherwise, choose any alternate allele (it doesn't really matter) + bestAlternateAllele = (ref != 'A' ? 'A' : 'C'); } - else { - initializeAlleleFrequencies(frequencyEstimationPoints); - initialize(ref, contexts, StratifiedContext.OVERALL); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.OVERALL); - calculatePofFs(ref, frequencyEstimationPoints); - } + initializeAlleleFrequencies(frequencyEstimationPoints); + + initialize(ref, contexts, StratifiedContext.OVERALL); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedContext.OVERALL); + calculatePofFs(ref, frequencyEstimationPoints); // print out stats if we have a writer if ( verboseWriter != null ) @@ -311,19 +307,30 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc protected List makeGenotypeCalls(char ref, char alt, HashMap contexts, GenomeLoc loc) { // by default, we return no genotypes return new ArrayList(); - } + } protected Pair> createCalls(RefMetaDataTracker tracker, char ref, HashMap contexts, GenomeLoc loc, int frequencyEstimationPoints) { // only need to look at the most likely alternate allele int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele); - double phredScaledConfidence = QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[indexOfMax][0]); - if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * log10PofDgivenAFi[indexOfMax][0]; int bestAFguess = Utils.findIndexOfMaxEntry(alleleFrequencyPosteriors[indexOfMax]); + double phredScaledConfidence; + if ( bestAFguess != 0 ) { + phredScaledConfidence = QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[indexOfMax][0]); + if ( Double.isInfinite(phredScaledConfidence) ) + phredScaledConfidence = -10.0 * log10PofDgivenAFi[indexOfMax][0]; + } else { + phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofFs[indexOfMax]); + if ( Double.isInfinite(phredScaledConfidence) ) { + double sum = 0.0; + for (int i = 1; i < frequencyEstimationPoints; i++) + sum += log10PofDgivenAFi[indexOfMax][i]; + phredScaledConfidence = -10.0 * sum; + } + } // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero - if ( !ALL_BASE_MODE && (bestAFguess == 0 || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) + if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) return new Pair>(null, null); // populate the sample-specific data @@ -333,8 +340,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc // *** note that calculating strand bias involves overwriting data structures, so we do that last VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP); if ( locusdata != null ) { - locusdata.addAlternateAllele(bestAlternateAllele.toString()); - locusdata.setNonRefAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1)); + if ( bestAFguess != 0 ) { + locusdata.addAlternateAllele(bestAlternateAllele.toString()); + locusdata.setNonRefAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1)); + } if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 3880266ec..963bcfd66 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -67,9 +67,8 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation } // are we above the lod threshold for emitting calls (and not in all-bases mode)? - if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) { - return new Pair>(null, null); - } + if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestIsRef) || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) + return new Pair>(null, null); // we can now create the genotype call object GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation()); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index a092e2221..08633d5bb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -48,10 +48,10 @@ public class UnifiedArgumentCollection { @Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - @Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes or just the variants?", required = false) + @Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false) public boolean GENOTYPE = false; - @Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output nonconfident variant or confident ref calls too?", required = false) + @Argument(fullName = "output_all_callable_bases", shortName = "all_bases", doc = "Should we output all callable bases?", required = false) public boolean ALL_BASES = false; @Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false) diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 4d51b2c91..ec6389eaf 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -101,6 +101,22 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } + @Test + public void testGenotypeModeJoint() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1, + Arrays.asList("6971e0bfa524c0e006b3c3ccef52520a")); + executeTest("testGenotypeMode - Joint Estimate", spec); + } + + @Test + public void testAllBasesModeJoint() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1, + Arrays.asList("9f54482c1594bdd1e28b4cf2e51f944f")); + executeTest("testAllBasesMode - Joint Estimate", spec); + } + //@Test //public void testGLF() { // WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(