From e47a113c9f18bc75564091554517a297ff9e2ca1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 12 Dec 2011 23:02:45 -0500 Subject: [PATCH] Enabled multi-allelic SNP discovery in the UG. Needs loads of testing so do not use yet. While working in the UG engine, I removed the extraneous and unnecessary MultiallelicGenotypeLikelihoods class: now a VariantContext with PL-annotated Genotypes is passed around instead. Integration tests pass so it must all work, right? --- .../BiallelicGenotypeLikelihoods.java | 94 --------- .../walkers/genotyper/DiploidGenotype.java | 21 +- .../GenotypeLikelihoodsCalculationModel.java | 19 +- ...elGenotypeLikelihoodsCalculationModel.java | 60 ++++-- .../MultiallelicGenotypeLikelihoods.java | 52 ----- ...NPGenotypeLikelihoodsCalculationModel.java | 182 ++++++++++++------ .../genotyper/UnifiedGenotyperEngine.java | 79 +------- 7 files changed, 183 insertions(+), 324 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java deleted file mode 100644 index fbd9c1dbf..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import org.broadinstitute.sting.utils.variantcontext.Allele; - -public class BiallelicGenotypeLikelihoods { - - private String sample; - private double[] GLs; - private Allele A, B; - private int depth; - - /** - * Create a new object for sample with given alleles and genotype likelihoods - * - * @param sample sample name - * @param A allele A - * @param B allele B - * @param log10AALikelihoods AA likelihoods - * @param log10ABLikelihoods AB likelihoods - * @param log10BBLikelihoods BB likelihoods - * @param depth the read depth used in creating the likelihoods - */ - public BiallelicGenotypeLikelihoods(String sample, - Allele A, - Allele B, - double log10AALikelihoods, - double log10ABLikelihoods, - double log10BBLikelihoods, - int depth) { - this.sample = sample; - this.A = A; - this.B = B; - this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods}; - this.depth = depth; - } - - public String getSample() { - return sample; - } - - public double getAALikelihoods() { - return GLs[0]; - } - - public double getABLikelihoods() { - return GLs[1]; - } - - public double getBBLikelihoods() { - return GLs[2]; - } - - public double[] getLikelihoods() { - return GLs; - } - - public Allele getAlleleA() { - return A; - } - - public Allele getAlleleB() { - return B; - } - - public int getDepth() { - return depth; - } -} - diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java index 106bb1982..09936c112 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java @@ -27,13 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.BaseUtils; -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: Aug 4, 2009 - * Time: 6:46:09 PM - * To change this template use File | Settings | File Templates. - */ public enum DiploidGenotype { AA ('A', 'A'), AC ('A', 'C'), @@ -110,6 +103,20 @@ public enum DiploidGenotype { return conversionMatrix[index1][index2]; } + /** + * create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly + * @param baseIndex1 base1 + * @param baseIndex2 base2 + * @return the diploid genotype + */ + public static DiploidGenotype createDiploidGenotype(int baseIndex1, int baseIndex2) { + if ( baseIndex1 == -1 ) + throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character"); + if ( baseIndex2 == -1 ) + throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character"); + return conversionMatrix[baseIndex1][baseIndex2]; + } + private static final DiploidGenotype[][] conversionMatrix = { { DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT }, { DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT }, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 74c55dbfe..b30a25414 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Map; @@ -79,19 +80,17 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { * @param contexts stratified alignment contexts * @param contextType stratified context type * @param priors priors to use for GLs - * @param GLs hash of sample->GL to fill in * @param alternateAlleleToUse the alternate allele to use, null if not set * @param useBAQedPileup should we use the BAQed pileup or the raw one? - * @return genotype likelihoods per sample for AA, AB, BB + * @return variant context where genotypes are no-called but with GLs */ - public abstract Allele getLikelihoods(RefMetaDataTracker tracker, - ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenotypePriors priors, - Map GLs, - Allele alternateAlleleToUse, - boolean useBAQedPileup); + public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker, + ReferenceContext ref, + Map contexts, + AlignmentContextUtils.ReadOrientation contextType, + GenotypePriors priors, + Allele alternateAlleleToUse, + boolean useBAQedPileup); protected int getFilteredDepth(ReadBackedPileup pileup) { int count = 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 14d647b6d..653a6f6e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -41,8 +42,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; @@ -243,7 +243,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood // get deletion length int dLen = Integer.valueOf(bestAltAllele.substring(1)); // get ref bases of accurate deletion - int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart()); + int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart(); //System.out.println(new String(ref.getBases())); byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen); @@ -270,19 +270,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private final static EnumSet allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED); - public Allele getLikelihoods(RefMetaDataTracker tracker, - ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenotypePriors priors, - Map GLs, - Allele alternateAlleleToUse, - boolean useBAQedPileup) { + public VariantContext getLikelihoods(RefMetaDataTracker tracker, + ReferenceContext ref, + Map contexts, + AlignmentContextUtils.ReadOrientation contextType, + GenotypePriors priors, + Allele alternateAlleleToUse, + boolean useBAQedPileup) { if ( tracker == null ) return null; - GenomeLoc loc = ref.getLocus(); Allele refAllele, altAllele; VariantContext vc = null; @@ -368,10 +366,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(), ref, hsize, numPrefBases); + // start making the VariantContext + final int endLoc = calculateEndPos(alleleList, refAllele, loc); + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase()); + + // create the genotypes; no-call everyone for now + GenotypesContext genotypes = GenotypesContext.create(); + final List noCall = new ArrayList(); + noCall.add(Allele.NO_CALL); + // For each sample, get genotype likelihoods based on pileup // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. - // initialize the GenotypeLikelihoods - GLs.clear(); for ( Map.Entry sample : contexts.entrySet() ) { AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); @@ -384,11 +389,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood if (pileup != null ) { final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap()); + GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods); - GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), - alleleList, - genotypeLikelihoods, - getFilteredDepth(pileup))); + HashMap attributes = new HashMap(); + attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup)); + attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods); + genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false)); if (DEBUG) { System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString()); @@ -399,9 +405,25 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } } - return refAllele; + return builder.genotypes(genotypes).make(); } + private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { + // for indels, stop location is one more than ref allele length + boolean hasNullAltAllele = false; + for ( Allele a : alleles ) { + if ( a.isNull() ) { + hasNullAltAllele = true; + break; + } + } + + int endLoc = loc.getStart() + refAllele.length(); + if( !hasNullAltAllele ) + endLoc--; + + return endLoc; + } public static HashMap> getIndelLikelihoodMap() { return indelLikelihoodMap.get(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java deleted file mode 100755 index 4f378b24a..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; - -import java.util.ArrayList; -import java.util.List; - -/** - * Created by IntelliJ IDEA. - * User: delangel - * Date: 6/1/11 - * Time: 10:38 AM - * To change this template use File | Settings | File Templates. - */ -public class MultiallelicGenotypeLikelihoods { - private String sample; - private double[] GLs; - private List alleleList; - private int depth; - - public MultiallelicGenotypeLikelihoods(String sample, - List A, - double[] log10Likelihoods, int depth) { - /* Check for consistency between likelihood vector and number of alleles */ - int numAlleles = A.size(); - if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2) - throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!")); - - this.sample = sample; - this.alleleList = A; - this.GLs = log10Likelihoods; - this.depth = depth; - } - - public String getSample() { - return sample; - } - - public double[] getLikelihoods() { - return GLs; - } - - public List getAlleles() { - return alleleList; - } - - public int getDepth() { - return depth; - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 9bdc754e9..4087443f8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -31,107 +31,147 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.*; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; +import java.util.*; public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { - // the alternate allele with the largest sum of quality scores - protected Byte bestAlternateAllele = null; + private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50; + + private boolean ALLOW_MULTIPLE_ALLELES; private final boolean useAlleleFromVCF; protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); + ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC; useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; } - public Allele getLikelihoods(RefMetaDataTracker tracker, - ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenotypePriors priors, - Map GLs, - Allele alternateAlleleToUse, - boolean useBAQedPileup) { + public VariantContext getLikelihoods(RefMetaDataTracker tracker, + ReferenceContext ref, + Map contexts, + AlignmentContextUtils.ReadOrientation contextType, + GenotypePriors priors, + Allele alternateAlleleToUse, + boolean useBAQedPileup) { if ( !(priors instanceof DiploidSNPGenotypePriors) ) throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model"); - byte refBase = ref.getBase(); - Allele refAllele = Allele.create(refBase, true); + final boolean[] basesToUse = new boolean[4]; + final byte refBase = ref.getBase(); + final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase); - // find the alternate allele with the largest sum of quality scores + // start making the VariantContext + final GenomeLoc loc = ref.getLocus(); + final List alleles = new ArrayList(); + alleles.add(Allele.create(refBase, true)); + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles); + + // find the alternate allele(s) that we should be using if ( alternateAlleleToUse != null ) { - bestAlternateAllele = alternateAlleleToUse.getBases()[0]; + basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true; } else if ( useAlleleFromVCF ) { - VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles); + final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles); - // ignore places where we don't have a variant - if ( vc == null ) + // ignore places where we don't have a SNP + if ( vc == null || !vc.isSNP() ) return null; - if ( !vc.isBiallelic() ) { - // for multi-allelic sites go back to the reads and find the most likely alternate allele - initializeBestAlternateAllele(refBase, contexts, useBAQedPileup); - } else { - bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0]; - } + for ( Allele allele : vc.getAlternateAlleles() ) + basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true; } else { - initializeBestAlternateAllele(refBase, contexts, useBAQedPileup); + + determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup); + + // how many alternate alleles are we using? + int alleleCounter = countSetBits(basesToUse); + + // if there are no non-ref alleles... + if ( alleleCounter == 0 ) { + // if we only want variants, then we don't need to calculate genotype likelihoods + if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) + return builder.make(); + + // otherwise, choose any alternate allele (it doesn't really matter) + basesToUse[indexOfRefBase == 0 ? 1 : 0] = true; + } } - // if there are no non-ref bases... - if ( bestAlternateAllele == null ) { - // if we only want variants, then we don't need to calculate genotype likelihoods - if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) - return refAllele; - - // otherwise, choose any alternate allele (it doesn't really matter) - bestAlternateAllele = (byte)(refBase != 'A' ? 'A' : 'C'); + // create the alternate alleles and the allele ordering (the ordering is crucial for the GLs) + final int numAltAlleles = countSetBits(basesToUse); + final int[] alleleOrdering = new int[numAltAlleles + 1]; + alleleOrdering[0] = indexOfRefBase; + int alleleOrderingIndex = 1; + int numLikelihoods = 1; + for ( int i = 0; i < 4; i++ ) { + if ( i != indexOfRefBase && basesToUse[i] ) { + alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false)); + alleleOrdering[alleleOrderingIndex++] = i; + numLikelihoods += alleleOrderingIndex; + } } + builder.alleles(alleles); - Allele altAllele = Allele.create(bestAlternateAllele, false); + // create the genotypes; no-call everyone for now + GenotypesContext genotypes = GenotypesContext.create(); + final List noCall = new ArrayList(); + noCall.add(Allele.NO_CALL); for ( Map.Entry sample : contexts.entrySet() ) { ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); - if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); } + if ( useBAQedPileup ) + pileup = createBAQedPileup( pileup ); // create the GenotypeLikelihoods object - DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error); - int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE); + final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error); + final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE); if ( nGoodBases == 0 ) continue; - double[] likelihoods = GL.getLikelihoods(); + final double[] allLikelihoods = GL.getLikelihoods(); + final double[] myLikelihoods = new double[numLikelihoods]; - DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase); - DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele); - DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele); - ArrayList aList = new ArrayList(); - aList.add(refAllele); - aList.add(altAllele); - double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ; + int myLikelihoodsIndex = 0; + for ( int i = 0; i <= numAltAlleles; i++ ) { + for ( int j = i; j <= numAltAlleles; j++ ) { + myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()]; + } + } // normalize in log space so that max element is zero. - GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), - aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup))); + GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true)); + + HashMap attributes = new HashMap(); + attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup)); + attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods); + genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false)); } - return refAllele; + return builder.genotypes(genotypes).make(); } - protected void initializeBestAlternateAllele(byte ref, Map contexts, boolean useBAQedPileup) { + private int countSetBits(boolean[] array) { + int counter = 0; + for ( int i = 0; i < array.length; i++ ) { + if ( array[i] ) + counter++; + } + return counter; + } + + // fills in the allelesToUse array + protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map contexts, boolean useBAQedPileup) { int[] qualCounts = new int[4]; for ( Map.Entry sample : contexts.entrySet() ) { @@ -139,7 +179,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup(); for ( PileupElement p : pileup ) { // ignore deletions - if ( p.isDeletion() || (! p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE )) + if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) ) continue; final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase()); @@ -149,17 +189,31 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC } } - // set the non-ref base with maximum quality score sum - int maxCount = 0; - bestAlternateAllele = null; - for ( byte altAllele : BaseUtils.BASES ) { - if ( altAllele == ref ) - continue; - int index = BaseUtils.simpleBaseToBaseIndex(altAllele); - if ( qualCounts[index] > maxCount ) { - maxCount = qualCounts[index]; - bestAlternateAllele = altAllele; + if ( ALLOW_MULTIPLE_ALLELES ) { + for ( byte altAllele : BaseUtils.BASES ) { + if ( altAllele == ref ) + continue; + int index = BaseUtils.simpleBaseToBaseIndex(altAllele); + if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) { + allelesToUse[index] = true; + } } + } else { + // set the non-ref base which has the maximum quality score sum + int maxCount = 0; + int indexOfMax = 0; + for ( byte altAllele : BaseUtils.BASES ) { + if ( altAllele == ref ) + continue; + int index = BaseUtils.simpleBaseToBaseIndex(altAllele); + if ( qualCounts[index] > maxCount ) { + maxCount = qualCounts[index]; + indexOfMax = index; + } + } + + if ( maxCount > 0 ) + allelesToUse[indexOfMax] = true; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3a86743de..21aaeffba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -219,14 +219,7 @@ public class UnifiedGenotyperEngine { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - Map GLs = new HashMap(); - - Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine); - - if ( refAllele != null ) - return createVariantContextFromLikelihoods(refContext, refAllele, GLs); - else - return null; + return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { @@ -261,40 +254,6 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vc, false); } - private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map GLs) { - // no-call everyone for now - List noCall = new ArrayList(); - noCall.add(Allele.NO_CALL); - - Set alleles = new LinkedHashSet(); - alleles.add(refAllele); - boolean addedAltAlleles = false; - - GenotypesContext genotypes = GenotypesContext.create(); - for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) { - if ( !addedAltAlleles ) { - addedAltAlleles = true; - // ordering important to maintain consistency - for (Allele a: GL.getAlleles()) { - alleles.add(a); - } - } - - HashMap attributes = new HashMap(); - //GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods()); - GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(GL.getLikelihoods()); - attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth()); - attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods); - - genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false)); - } - - GenomeLoc loc = refContext.getLocus(); - int endLoc = calculateEndPos(alleles, refAllele, loc); - - return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make(); - } - public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { return calculateGenotypes(null, null, null, null, vc, model); } @@ -494,42 +453,6 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } - private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { - // TODO - temp fix until we can deal with extended events properly - // for indels, stop location is one more than ref allele length - boolean isSNP = true, hasNullAltAllele = false; - for (Allele a : alleles){ - if (a.length() != 1) { - isSNP = false; - break; - } - } - for (Allele a : alleles){ - if (a.isNull()) { - hasNullAltAllele = true; - break; - } - } - // standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null - // standard insertion: ref allele length = 0, endLos = startLoc - // mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length - // So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less - // - // todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler, - // the whole vc processing infrastructure seems too brittle and riddled with special case handling - - - int endLoc = loc.getStart(); - if ( !isSNP) { - endLoc += refAllele.length(); - if(!hasNullAltAllele) - endLoc--; - - } - - return endLoc; - } - private Map getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) { Map stratifiedContexts = null;