Enabled multi-allelic SNP discovery in the UG. Needs loads of testing so do not use yet. While working in the UG engine, I removed the extraneous and unnecessary MultiallelicGenotypeLikelihoods class: now a VariantContext with PL-annotated Genotypes is passed around instead. Integration tests pass so it must all work, right?

This commit is contained in:
Eric Banks 2011-12-12 23:02:45 -05:00
parent 5cc1e72fdb
commit e47a113c9f
7 changed files with 183 additions and 324 deletions

View File

@ -1,94 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.variantcontext.Allele;
public class BiallelicGenotypeLikelihoods {
private String sample;
private double[] GLs;
private Allele A, B;
private int depth;
/**
* Create a new object for sample with given alleles and genotype likelihoods
*
* @param sample sample name
* @param A allele A
* @param B allele B
* @param log10AALikelihoods AA likelihoods
* @param log10ABLikelihoods AB likelihoods
* @param log10BBLikelihoods BB likelihoods
* @param depth the read depth used in creating the likelihoods
*/
public BiallelicGenotypeLikelihoods(String sample,
Allele A,
Allele B,
double log10AALikelihoods,
double log10ABLikelihoods,
double log10BBLikelihoods,
int depth) {
this.sample = sample;
this.A = A;
this.B = B;
this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods};
this.depth = depth;
}
public String getSample() {
return sample;
}
public double getAALikelihoods() {
return GLs[0];
}
public double getABLikelihoods() {
return GLs[1];
}
public double getBBLikelihoods() {
return GLs[2];
}
public double[] getLikelihoods() {
return GLs;
}
public Allele getAlleleA() {
return A;
}
public Allele getAlleleB() {
return B;
}
public int getDepth() {
return depth;
}
}

View File

@ -27,13 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils;
/**
* Created by IntelliJ IDEA.
* User: depristo
* Date: Aug 4, 2009
* Time: 6:46:09 PM
* To change this template use File | Settings | File Templates.
*/
public enum DiploidGenotype {
AA ('A', 'A'),
AC ('A', 'C'),
@ -110,6 +103,20 @@ public enum DiploidGenotype {
return conversionMatrix[index1][index2];
}
/**
* create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly
* @param baseIndex1 base1
* @param baseIndex2 base2
* @return the diploid genotype
*/
public static DiploidGenotype createDiploidGenotype(int baseIndex1, int baseIndex2) {
if ( baseIndex1 == -1 )
throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character");
if ( baseIndex2 == -1 )
throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character");
return conversionMatrix[baseIndex1][baseIndex2];
}
private static final DiploidGenotype[][] conversionMatrix = {
{ DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT },
{ DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT },

View File

@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Map;
@ -79,19 +80,17 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
* @param contexts stratified alignment contexts
* @param contextType stratified context type
* @param priors priors to use for GLs
* @param GLs hash of sample->GL to fill in
* @param alternateAlleleToUse the alternate allele to use, null if not set
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
* @return genotype likelihoods per sample for AA, AB, BB
* @return variant context where genotypes are no-called but with GLs
*/
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse,
boolean useBAQedPileup);
public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Allele alternateAlleleToUse,
boolean useBAQedPileup);
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -41,8 +42,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
@ -243,7 +243,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
// get deletion length
int dLen = Integer.valueOf(bestAltAllele.substring(1));
// get ref bases of accurate deletion
int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart());
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
//System.out.println(new String(ref.getBases()));
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
@ -270,19 +270,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
public Allele getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse,
boolean useBAQedPileup) {
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Allele alternateAlleleToUse,
boolean useBAQedPileup) {
if ( tracker == null )
return null;
GenomeLoc loc = ref.getLocus();
Allele refAllele, altAllele;
VariantContext vc = null;
@ -368,10 +366,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
ref, hsize, numPrefBases);
// start making the VariantContext
final int endLoc = calculateEndPos(alleleList, refAllele, loc);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
// For each sample, get genotype likelihoods based on pileup
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
// initialize the GenotypeLikelihoods
GLs.clear();
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
@ -384,11 +389,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (pileup != null ) {
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
alleleList,
genotypeLikelihoods,
getFilteredDepth(pileup)));
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
if (DEBUG) {
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
@ -399,9 +405,25 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
}
return refAllele;
return builder.genotypes(genotypes).make();
}
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// for indels, stop location is one more than ref allele length
boolean hasNullAltAllele = false;
for ( Allele a : alleles ) {
if ( a.isNull() ) {
hasNullAltAllele = true;
break;
}
}
int endLoc = loc.getStart() + refAllele.length();
if( !hasNullAltAllele )
endLoc--;
return endLoc;
}
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
return indelLikelihoodMap.get();

View File

@ -1,52 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: 6/1/11
* Time: 10:38 AM
* To change this template use File | Settings | File Templates.
*/
public class MultiallelicGenotypeLikelihoods {
private String sample;
private double[] GLs;
private List<Allele> alleleList;
private int depth;
public MultiallelicGenotypeLikelihoods(String sample,
List<Allele> A,
double[] log10Likelihoods, int depth) {
/* Check for consistency between likelihood vector and number of alleles */
int numAlleles = A.size();
if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2)
throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!"));
this.sample = sample;
this.alleleList = A;
this.GLs = log10Likelihoods;
this.depth = depth;
}
public String getSample() {
return sample;
}
public double[] getLikelihoods() {
return GLs;
}
public List<Allele> getAlleles() {
return alleleList;
}
public int getDepth() {
return depth;
}
}

View File

@ -31,107 +31,147 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.*;
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
// the alternate allele with the largest sum of quality scores
protected Byte bestAlternateAllele = null;
private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50;
private boolean ALLOW_MULTIPLE_ALLELES;
private final boolean useAlleleFromVCF;
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
}
public Allele getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Map<String, MultiallelicGenotypeLikelihoods> GLs,
Allele alternateAlleleToUse,
boolean useBAQedPileup) {
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, AlignmentContext> contexts,
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Allele alternateAlleleToUse,
boolean useBAQedPileup) {
if ( !(priors instanceof DiploidSNPGenotypePriors) )
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
byte refBase = ref.getBase();
Allele refAllele = Allele.create(refBase, true);
final boolean[] basesToUse = new boolean[4];
final byte refBase = ref.getBase();
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
// find the alternate allele with the largest sum of quality scores
// start making the VariantContext
final GenomeLoc loc = ref.getLocus();
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add(Allele.create(refBase, true));
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
// find the alternate allele(s) that we should be using
if ( alternateAlleleToUse != null ) {
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
} else if ( useAlleleFromVCF ) {
VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
// ignore places where we don't have a variant
if ( vc == null )
// ignore places where we don't have a SNP
if ( vc == null || !vc.isSNP() )
return null;
if ( !vc.isBiallelic() ) {
// for multi-allelic sites go back to the reads and find the most likely alternate allele
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
} else {
bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0];
}
for ( Allele allele : vc.getAlternateAlleles() )
basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
} else {
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup);
// how many alternate alleles are we using?
int alleleCounter = countSetBits(basesToUse);
// if there are no non-ref alleles...
if ( alleleCounter == 0 ) {
// if we only want variants, then we don't need to calculate genotype likelihoods
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
return builder.make();
// otherwise, choose any alternate allele (it doesn't really matter)
basesToUse[indexOfRefBase == 0 ? 1 : 0] = true;
}
}
// if there are no non-ref bases...
if ( bestAlternateAllele == null ) {
// if we only want variants, then we don't need to calculate genotype likelihoods
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
return refAllele;
// otherwise, choose any alternate allele (it doesn't really matter)
bestAlternateAllele = (byte)(refBase != 'A' ? 'A' : 'C');
// create the alternate alleles and the allele ordering (the ordering is crucial for the GLs)
final int numAltAlleles = countSetBits(basesToUse);
final int[] alleleOrdering = new int[numAltAlleles + 1];
alleleOrdering[0] = indexOfRefBase;
int alleleOrderingIndex = 1;
int numLikelihoods = 1;
for ( int i = 0; i < 4; i++ ) {
if ( i != indexOfRefBase && basesToUse[i] ) {
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
alleleOrdering[alleleOrderingIndex++] = i;
numLikelihoods += alleleOrderingIndex;
}
}
builder.alleles(alleles);
Allele altAllele = Allele.create(bestAlternateAllele, false);
// create the genotypes; no-call everyone for now
GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); }
if ( useBAQedPileup )
pileup = createBAQedPileup( pileup );
// create the GenotypeLikelihoods object
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
if ( nGoodBases == 0 )
continue;
double[] likelihoods = GL.getLikelihoods();
final double[] allLikelihoods = GL.getLikelihoods();
final double[] myLikelihoods = new double[numLikelihoods];
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase);
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele);
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele);
ArrayList<Allele> aList = new ArrayList<Allele>();
aList.add(refAllele);
aList.add(altAllele);
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
int myLikelihoodsIndex = 0;
for ( int i = 0; i <= numAltAlleles; i++ ) {
for ( int j = i; j <= numAltAlleles; j++ ) {
myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()];
}
}
// normalize in log space so that max element is zero.
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
}
return refAllele;
return builder.genotypes(genotypes).make();
}
protected void initializeBestAlternateAllele(byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
private int countSetBits(boolean[] array) {
int counter = 0;
for ( int i = 0; i < array.length; i++ ) {
if ( array[i] )
counter++;
}
return counter;
}
// fills in the allelesToUse array
protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
int[] qualCounts = new int[4];
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
@ -139,7 +179,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup();
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() || (! p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE ))
if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) )
continue;
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
@ -149,17 +189,31 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
}
}
// set the non-ref base with maximum quality score sum
int maxCount = 0;
bestAlternateAllele = null;
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( qualCounts[index] > maxCount ) {
maxCount = qualCounts[index];
bestAlternateAllele = altAllele;
if ( ALLOW_MULTIPLE_ALLELES ) {
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) {
allelesToUse[index] = true;
}
}
} else {
// set the non-ref base which has the maximum quality score sum
int maxCount = 0;
int indexOfMax = 0;
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( qualCounts[index] > maxCount ) {
maxCount = qualCounts[index];
indexOfMax = index;
}
}
if ( maxCount > 0 )
allelesToUse[indexOfMax] = true;
}
}

View File

@ -219,14 +219,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
}
Map<String, MultiallelicGenotypeLikelihoods> GLs = new HashMap<String, MultiallelicGenotypeLikelihoods>();
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
if ( refAllele != null )
return createVariantContextFromLikelihoods(refContext, refAllele, GLs);
else
return null;
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
}
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -261,40 +254,6 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vc, false);
}
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
// no-call everyone for now
List<Allele> noCall = new ArrayList<Allele>();
noCall.add(Allele.NO_CALL);
Set<Allele> alleles = new LinkedHashSet<Allele>();
alleles.add(refAllele);
boolean addedAltAlleles = false;
GenotypesContext genotypes = GenotypesContext.create();
for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) {
if ( !addedAltAlleles ) {
addedAltAlleles = true;
// ordering important to maintain consistency
for (Allele a: GL.getAlleles()) {
alleles.add(a);
}
}
HashMap<String, Object> attributes = new HashMap<String, Object>();
//GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(GL.getLikelihoods());
attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth());
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
}
GenomeLoc loc = refContext.getLocus();
int endLoc = calculateEndPos(alleles, refAllele, loc);
return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make();
}
public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
return calculateGenotypes(null, null, null, null, vc, model);
}
@ -494,42 +453,6 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
}
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// TODO - temp fix until we can deal with extended events properly
// for indels, stop location is one more than ref allele length
boolean isSNP = true, hasNullAltAllele = false;
for (Allele a : alleles){
if (a.length() != 1) {
isSNP = false;
break;
}
}
for (Allele a : alleles){
if (a.isNull()) {
hasNullAltAllele = true;
break;
}
}
// standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null
// standard insertion: ref allele length = 0, endLos = startLoc
// mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length
// So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less
//
// todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler,
// the whole vc processing infrastructure seems too brittle and riddled with special case handling
int endLoc = loc.getStart();
if ( !isSNP) {
endLoc += refAllele.length();
if(!hasNullAltAllele)
endLoc--;
}
return endLoc;
}
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
Map<String, AlignmentContext> stratifiedContexts = null;