diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index f9518a35c..d833e9f8e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -47,7 +47,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final double[][] log10AlleleFrequencyPriors, final AlleleFrequencyCalculationResult result) { - final GenotypesContext GLs = vc.getGenotypes(); + GenotypesContext GLs = vc.getGenotypes(); List alleles = vc.getAlleles(); // don't try to genotype too many alternate alleles @@ -58,7 +58,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { alleles.add(vc.getReference()); for ( int i = 0; i < MAX_ALTERNATE_ALLELES_TO_GENOTYPE; i++ ) alleles.add(vc.getAlternateAllele(i)); - UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false); + GLs = UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false); } //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index aa5776007..c84c944b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -795,6 +795,10 @@ public class UnifiedGenotyperEngine { // then we can keep the PLs as is; otherwise, we determine which ones to keep if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { likelihoodIndexesToUse = new ArrayList(30); + + // make sure that we've cached enough data + if ( numOriginalAltAlleles > PLIndexToAlleleIndex.length - 1 ) + calculatePLcache(numOriginalAltAlleles); final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles]; final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles]; @@ -834,20 +838,29 @@ public class UnifiedGenotyperEngine { newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); } - // if there is no mass on the (new) likelihoods or we weren't asked to assign a genotype, then just no-call the sample - if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { + // if there is no mass on the (new) likelihoods, then just no-call the sample + if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false)); - continue; } + else { + Map attrs = new HashMap(g.getAttributes()); + if ( numNewAltAlleles == 0 ) + attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); + else + attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods)); - final Genotype newGT = assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles); - newGTs.add(newGT); + // if we weren't asked to assign a genotype, then just no-call the sample + if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) + newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false)); + else + newGTs.add(assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles, attrs)); + } } return newGTs; } - protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List allelesToUse, int numNewAltAlleles) { + protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List allelesToUse, int numNewAltAlleles, Map attrs) { // find the genotype with maximum likelihoods int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex]; @@ -857,11 +870,6 @@ public class UnifiedGenotyperEngine { myAlleles.add(allelesToUse.get(alleles[1])); final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods); - Map attrs = new HashMap(originalGT.getAttributes()); - if ( numNewAltAlleles == 0 ) - attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); - else - attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods)); return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index fd6738123..125242a2f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("d356cbaf240d7025d1aecdabaff3a3e0")); + Arrays.asList("e4d2904b406f37d99fbe8f52ae75254f")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("1d1956fd7b0f0d30935674b2f5019860")); + Arrays.asList("21f7b6c8b7eaccad1754a832bac79a65")); executeTest("test MultiSample Phase1 indels with complicated records", spec4); }