diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 57cc5594a..eee89674a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -46,8 +46,6 @@ import java.util.*; public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { - private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50; - private boolean ALLOW_MULTIPLE_ALLELES; private final boolean useAlleleFromVCF; @@ -56,15 +54,19 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC super(UAC, logger); ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC; useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; + + // make sure the PL cache has been initialized with enough alleles + if ( UnifiedGenotyperEngine.PLIndexToAlleleIndex == null || UnifiedGenotyperEngine.PLIndexToAlleleIndex.length < 4 ) // +1 for 0 alt alleles + UnifiedGenotyperEngine.calculatePLcache(3); } - public VariantContext getLikelihoods(RefMetaDataTracker tracker, - ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenotypePriors priors, - Allele alternateAlleleToUse, - boolean useBAQedPileup) { + public VariantContext getLikelihoods(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final Map contexts, + final AlignmentContextUtils.ReadOrientation contextType, + final GenotypePriors priors, + final Allele alternateAlleleToUse, + final boolean useBAQedPileup) { if ( !(priors instanceof DiploidSNPGenotypePriors) ) throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model"); @@ -79,6 +81,20 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC alleles.add(Allele.create(refBase, true)); final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles); + // calculate the GLs + ArrayList GLs = new ArrayList(contexts.size()); + for ( Map.Entry sample : contexts.entrySet() ) { + ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); + if ( useBAQedPileup ) + pileup = createBAQedPileup( pileup ); + + // create the GenotypeLikelihoods object + final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error); + final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE); + if ( nGoodBases > 0 ) + GLs.add(new SampleGenotypeData(sample.getKey(), GL, getFilteredDepth(pileup))); + } + // find the alternate allele(s) that we should be using if ( alternateAlleleToUse != null ) { basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true; @@ -93,7 +109,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true; } else { - determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup); + determineAlternateAlleles(basesToUse, refBase, GLs); // how many alternate alleles are we using? int alleleCounter = Utils.countSetBits(basesToUse); @@ -125,22 +141,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC builder.alleles(alleles); // create the genotypes; no-call everyone for now - GenotypesContext genotypes = GenotypesContext.create(); + final GenotypesContext genotypes = GenotypesContext.create(); final List noCall = new ArrayList(); noCall.add(Allele.NO_CALL); - for ( Map.Entry sample : contexts.entrySet() ) { - ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); - if ( useBAQedPileup ) - pileup = createBAQedPileup( pileup ); - - // create the GenotypeLikelihoods object - final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error); - final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE); - if ( nGoodBases == 0 ) - continue; - - final double[] allLikelihoods = GL.getLikelihoods(); + for ( SampleGenotypeData sampleData : GLs ) { + final double[] allLikelihoods = sampleData.GL.getLikelihoods(); final double[] myLikelihoods = new double[numLikelihoods]; int myLikelihoodsIndex = 0; @@ -151,60 +157,48 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC } // normalize in log space so that max element is zero. - GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true)); + final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true)); - HashMap attributes = new HashMap(); - attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup)); + final HashMap attributes = new HashMap(); + attributes.put(VCFConstants.DEPTH_KEY, sampleData.depth); attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods); - genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false)); + genotypes.add(new Genotype(sampleData.name, noCall, Genotype.NO_LOG10_PERROR, null, attributes, false)); } return builder.genotypes(genotypes).make(); } // fills in the allelesToUse array - protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map contexts, boolean useBAQedPileup) { - int[] qualCounts = new int[4]; + protected void determineAlternateAlleles(final boolean[] allelesToUse, final byte ref, final List sampleDataList) { - for ( Map.Entry sample : contexts.entrySet() ) { - // calculate the sum of quality scores for each base - ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup(); - for ( PileupElement p : pileup ) { - // ignore deletions - if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) ) - continue; + final int baseIndexOfRef = BaseUtils.simpleBaseToBaseIndex(ref); + final int PLindexOfRef = DiploidGenotype.createDiploidGenotype(ref, ref).ordinal(); + final double[] likelihoodCounts = new double[4]; - final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase()); - if ( index >= 0 ) { - qualCounts[index] += p.getQual(); - } + // based on the GLs, find the alternate alleles with the most probability + for ( SampleGenotypeData sampleData : sampleDataList ) { + final double[] likelihoods = sampleData.GL.getLikelihoods(); + final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); + if ( PLindexOfBestGL != PLindexOfRef ) { + int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[3][PLindexOfBestGL]; + if ( alleles[0] != baseIndexOfRef ) + likelihoodCounts[alleles[0]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef]; + // don't double-count it + if ( alleles[1] != baseIndexOfRef && alleles[1] != alleles[0] ) + likelihoodCounts[alleles[1]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef]; } } if ( ALLOW_MULTIPLE_ALLELES ) { - for ( byte altAllele : BaseUtils.BASES ) { - if ( altAllele == ref ) - continue; - int index = BaseUtils.simpleBaseToBaseIndex(altAllele); - if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) { - allelesToUse[index] = true; + for ( int i = 0; i < 4; i++ ) { + if ( likelihoodCounts[i] > 0.0 ) { + allelesToUse[i] = true; } } } else { - // set the non-ref base which has the maximum quality score sum - int maxCount = 0; - int indexOfMax = 0; - for ( byte altAllele : BaseUtils.BASES ) { - if ( altAllele == ref ) - continue; - int index = BaseUtils.simpleBaseToBaseIndex(altAllele); - if ( qualCounts[index] > maxCount ) { - maxCount = qualCounts[index]; - indexOfMax = index; - } - } - - if ( maxCount > 0 ) + // set the non-ref base which has the maximum sum of non-ref GLs + final int indexOfMax = MathUtils.maxElementIndex(likelihoodCounts); + if ( likelihoodCounts[indexOfMax] > 0.0 ) allelesToUse[indexOfMax] = true; } } @@ -227,4 +221,16 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC public byte getQual( final int offset ) { return BAQ.calcBAQFromTag(getRead(), offset, true); } } -} \ No newline at end of file + private static class SampleGenotypeData { + + public final String name; + public final DiploidSNPGenotypeLikelihoods GL; + public final int depth; + + public SampleGenotypeData(final String name, final DiploidSNPGenotypeLikelihoods GL, final int depth) { + this.name = name; + this.GL = GL; + this.depth = depth; + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 3c6131d6c..d4518078b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testOutputParameter() { HashMap e = new HashMap(); e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" ); - e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "42e4ea7878ef8d96215accb3ba4e97b7" ); - e.put( "--output_mode EMIT_ALL_SITES", "e0443c720149647469f2a2f3fb73942f" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "553f6b4cbf380885bec9dd634cf68742" ); + e.put( "--output_mode EMIT_ALL_SITES", "6d8624e45ad9dae5803ac705b39e4ffa" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(