Enabled multi-allelic SNP discovery in the UG. Needs loads of testing so do not use yet. While working in the UG engine, I removed the extraneous and unnecessary MultiallelicGenotypeLikelihoods class: now a VariantContext with PL-annotated Genotypes is passed around instead. Integration tests pass so it must all work, right?
This commit is contained in:
parent
5cc1e72fdb
commit
e47a113c9f
|
|
@ -1,94 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
public class BiallelicGenotypeLikelihoods {
|
||||
|
||||
private String sample;
|
||||
private double[] GLs;
|
||||
private Allele A, B;
|
||||
private int depth;
|
||||
|
||||
/**
|
||||
* Create a new object for sample with given alleles and genotype likelihoods
|
||||
*
|
||||
* @param sample sample name
|
||||
* @param A allele A
|
||||
* @param B allele B
|
||||
* @param log10AALikelihoods AA likelihoods
|
||||
* @param log10ABLikelihoods AB likelihoods
|
||||
* @param log10BBLikelihoods BB likelihoods
|
||||
* @param depth the read depth used in creating the likelihoods
|
||||
*/
|
||||
public BiallelicGenotypeLikelihoods(String sample,
|
||||
Allele A,
|
||||
Allele B,
|
||||
double log10AALikelihoods,
|
||||
double log10ABLikelihoods,
|
||||
double log10BBLikelihoods,
|
||||
int depth) {
|
||||
this.sample = sample;
|
||||
this.A = A;
|
||||
this.B = B;
|
||||
this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods};
|
||||
this.depth = depth;
|
||||
}
|
||||
|
||||
public String getSample() {
|
||||
return sample;
|
||||
}
|
||||
|
||||
public double getAALikelihoods() {
|
||||
return GLs[0];
|
||||
}
|
||||
|
||||
public double getABLikelihoods() {
|
||||
return GLs[1];
|
||||
}
|
||||
|
||||
public double getBBLikelihoods() {
|
||||
return GLs[2];
|
||||
}
|
||||
|
||||
public double[] getLikelihoods() {
|
||||
return GLs;
|
||||
}
|
||||
|
||||
public Allele getAlleleA() {
|
||||
return A;
|
||||
}
|
||||
|
||||
public Allele getAlleleB() {
|
||||
return B;
|
||||
}
|
||||
|
||||
public int getDepth() {
|
||||
return depth;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -27,13 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: Aug 4, 2009
|
||||
* Time: 6:46:09 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public enum DiploidGenotype {
|
||||
AA ('A', 'A'),
|
||||
AC ('A', 'C'),
|
||||
|
|
@ -110,6 +103,20 @@ public enum DiploidGenotype {
|
|||
return conversionMatrix[index1][index2];
|
||||
}
|
||||
|
||||
/**
|
||||
* create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly
|
||||
* @param baseIndex1 base1
|
||||
* @param baseIndex2 base2
|
||||
* @return the diploid genotype
|
||||
*/
|
||||
public static DiploidGenotype createDiploidGenotype(int baseIndex1, int baseIndex2) {
|
||||
if ( baseIndex1 == -1 )
|
||||
throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character");
|
||||
if ( baseIndex2 == -1 )
|
||||
throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character");
|
||||
return conversionMatrix[baseIndex1][baseIndex2];
|
||||
}
|
||||
|
||||
private static final DiploidGenotype[][] conversionMatrix = {
|
||||
{ DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT },
|
||||
{ DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT },
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -79,19 +80,17 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
* @param contexts stratified alignment contexts
|
||||
* @param contextType stratified context type
|
||||
* @param priors priors to use for GLs
|
||||
* @param GLs hash of sample->GL to fill in
|
||||
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
||||
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
||||
* @return genotype likelihoods per sample for AA, AB, BB
|
||||
* @return variant context where genotypes are no-called but with GLs
|
||||
*/
|
||||
public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup);
|
||||
public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup);
|
||||
|
||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
int count = 0;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
|||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -41,8 +42,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
|||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -243,7 +243,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
// get deletion length
|
||||
int dLen = Integer.valueOf(bestAltAllele.substring(1));
|
||||
// get ref bases of accurate deletion
|
||||
int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart());
|
||||
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
|
||||
|
||||
//System.out.println(new String(ref.getBases()));
|
||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
||||
|
|
@ -270,19 +270,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
private final static EnumSet<VariantContext.Type> allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED);
|
||||
|
||||
public Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
Allele refAllele, altAllele;
|
||||
VariantContext vc = null;
|
||||
|
|
@ -368,10 +366,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
||||
ref, hsize, numPrefBases);
|
||||
|
||||
// start making the VariantContext
|
||||
final int endLoc = calculateEndPos(alleleList, refAllele, loc);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase());
|
||||
|
||||
// create the genotypes; no-call everyone for now
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
final List<Allele> noCall = new ArrayList<Allele>();
|
||||
noCall.add(Allele.NO_CALL);
|
||||
|
||||
// For each sample, get genotype likelihoods based on pileup
|
||||
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
|
||||
// initialize the GenotypeLikelihoods
|
||||
GLs.clear();
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
|
@ -384,11 +389,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
if (pileup != null ) {
|
||||
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods);
|
||||
|
||||
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
||||
alleleList,
|
||||
genotypeLikelihoods,
|
||||
getFilteredDepth(pileup)));
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString());
|
||||
|
|
@ -399,9 +405,25 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
}
|
||||
|
||||
return refAllele;
|
||||
return builder.genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
||||
// for indels, stop location is one more than ref allele length
|
||||
boolean hasNullAltAllele = false;
|
||||
for ( Allele a : alleles ) {
|
||||
if ( a.isNull() ) {
|
||||
hasNullAltAllele = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int endLoc = loc.getStart() + refAllele.length();
|
||||
if( !hasNullAltAllele )
|
||||
endLoc--;
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
||||
public static HashMap<PileupElement,LinkedHashMap<Allele,Double>> getIndelLikelihoodMap() {
|
||||
return indelLikelihoodMap.get();
|
||||
|
|
|
|||
|
|
@ -1,52 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: delangel
|
||||
* Date: 6/1/11
|
||||
* Time: 10:38 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class MultiallelicGenotypeLikelihoods {
|
||||
private String sample;
|
||||
private double[] GLs;
|
||||
private List<Allele> alleleList;
|
||||
private int depth;
|
||||
|
||||
public MultiallelicGenotypeLikelihoods(String sample,
|
||||
List<Allele> A,
|
||||
double[] log10Likelihoods, int depth) {
|
||||
/* Check for consistency between likelihood vector and number of alleles */
|
||||
int numAlleles = A.size();
|
||||
if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2)
|
||||
throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!"));
|
||||
|
||||
this.sample = sample;
|
||||
this.alleleList = A;
|
||||
this.GLs = log10Likelihoods;
|
||||
this.depth = depth;
|
||||
}
|
||||
|
||||
public String getSample() {
|
||||
return sample;
|
||||
}
|
||||
|
||||
public double[] getLikelihoods() {
|
||||
return GLs;
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles() {
|
||||
return alleleList;
|
||||
}
|
||||
|
||||
public int getDepth() {
|
||||
return depth;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -31,107 +31,147 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
|
||||
|
||||
// the alternate allele with the largest sum of quality scores
|
||||
protected Byte bestAlternateAllele = null;
|
||||
private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50;
|
||||
|
||||
private boolean ALLOW_MULTIPLE_ALLELES;
|
||||
|
||||
private final boolean useAlleleFromVCF;
|
||||
|
||||
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC;
|
||||
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
}
|
||||
|
||||
public Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Map<String, MultiallelicGenotypeLikelihoods> GLs,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
public VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
|
||||
if ( !(priors instanceof DiploidSNPGenotypePriors) )
|
||||
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
|
||||
|
||||
byte refBase = ref.getBase();
|
||||
Allele refAllele = Allele.create(refBase, true);
|
||||
final boolean[] basesToUse = new boolean[4];
|
||||
final byte refBase = ref.getBase();
|
||||
final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase);
|
||||
|
||||
// find the alternate allele with the largest sum of quality scores
|
||||
// start making the VariantContext
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(Allele.create(refBase, true));
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles);
|
||||
|
||||
// find the alternate allele(s) that we should be using
|
||||
if ( alternateAlleleToUse != null ) {
|
||||
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
|
||||
basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true;
|
||||
} else if ( useAlleleFromVCF ) {
|
||||
VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
|
||||
final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
|
||||
|
||||
// ignore places where we don't have a variant
|
||||
if ( vc == null )
|
||||
// ignore places where we don't have a SNP
|
||||
if ( vc == null || !vc.isSNP() )
|
||||
return null;
|
||||
|
||||
if ( !vc.isBiallelic() ) {
|
||||
// for multi-allelic sites go back to the reads and find the most likely alternate allele
|
||||
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
|
||||
} else {
|
||||
bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0];
|
||||
}
|
||||
for ( Allele allele : vc.getAlternateAlleles() )
|
||||
basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true;
|
||||
} else {
|
||||
initializeBestAlternateAllele(refBase, contexts, useBAQedPileup);
|
||||
|
||||
determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup);
|
||||
|
||||
// how many alternate alleles are we using?
|
||||
int alleleCounter = countSetBits(basesToUse);
|
||||
|
||||
// if there are no non-ref alleles...
|
||||
if ( alleleCounter == 0 ) {
|
||||
// if we only want variants, then we don't need to calculate genotype likelihoods
|
||||
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
|
||||
return builder.make();
|
||||
|
||||
// otherwise, choose any alternate allele (it doesn't really matter)
|
||||
basesToUse[indexOfRefBase == 0 ? 1 : 0] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// if there are no non-ref bases...
|
||||
if ( bestAlternateAllele == null ) {
|
||||
// if we only want variants, then we don't need to calculate genotype likelihoods
|
||||
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
|
||||
return refAllele;
|
||||
|
||||
// otherwise, choose any alternate allele (it doesn't really matter)
|
||||
bestAlternateAllele = (byte)(refBase != 'A' ? 'A' : 'C');
|
||||
// create the alternate alleles and the allele ordering (the ordering is crucial for the GLs)
|
||||
final int numAltAlleles = countSetBits(basesToUse);
|
||||
final int[] alleleOrdering = new int[numAltAlleles + 1];
|
||||
alleleOrdering[0] = indexOfRefBase;
|
||||
int alleleOrderingIndex = 1;
|
||||
int numLikelihoods = 1;
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
if ( i != indexOfRefBase && basesToUse[i] ) {
|
||||
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false));
|
||||
alleleOrdering[alleleOrderingIndex++] = i;
|
||||
numLikelihoods += alleleOrderingIndex;
|
||||
}
|
||||
}
|
||||
builder.alleles(alleles);
|
||||
|
||||
Allele altAllele = Allele.create(bestAlternateAllele, false);
|
||||
// create the genotypes; no-call everyone for now
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
final List<Allele> noCall = new ArrayList<Allele>();
|
||||
noCall.add(Allele.NO_CALL);
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); }
|
||||
if ( useBAQedPileup )
|
||||
pileup = createBAQedPileup( pileup );
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
||||
int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error);
|
||||
final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE);
|
||||
if ( nGoodBases == 0 )
|
||||
continue;
|
||||
|
||||
double[] likelihoods = GL.getLikelihoods();
|
||||
final double[] allLikelihoods = GL.getLikelihoods();
|
||||
final double[] myLikelihoods = new double[numLikelihoods];
|
||||
|
||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase);
|
||||
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele);
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele);
|
||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||
aList.add(refAllele);
|
||||
aList.add(altAllele);
|
||||
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
|
||||
int myLikelihoodsIndex = 0;
|
||||
for ( int i = 0; i <= numAltAlleles; i++ ) {
|
||||
for ( int j = i; j <= numAltAlleles; j++ ) {
|
||||
myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()];
|
||||
}
|
||||
}
|
||||
|
||||
// normalize in log space so that max element is zero.
|
||||
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
||||
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
|
||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
|
||||
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup));
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
}
|
||||
|
||||
return refAllele;
|
||||
return builder.genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
protected void initializeBestAlternateAllele(byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
|
||||
private int countSetBits(boolean[] array) {
|
||||
int counter = 0;
|
||||
for ( int i = 0; i < array.length; i++ ) {
|
||||
if ( array[i] )
|
||||
counter++;
|
||||
}
|
||||
return counter;
|
||||
}
|
||||
|
||||
// fills in the allelesToUse array
|
||||
protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
|
||||
int[] qualCounts = new int[4];
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
|
|
@ -139,7 +179,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
// ignore deletions
|
||||
if ( p.isDeletion() || (! p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE ))
|
||||
if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) )
|
||||
continue;
|
||||
|
||||
final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
|
||||
|
|
@ -149,17 +189,31 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
}
|
||||
}
|
||||
|
||||
// set the non-ref base with maximum quality score sum
|
||||
int maxCount = 0;
|
||||
bestAlternateAllele = null;
|
||||
for ( byte altAllele : BaseUtils.BASES ) {
|
||||
if ( altAllele == ref )
|
||||
continue;
|
||||
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||
if ( qualCounts[index] > maxCount ) {
|
||||
maxCount = qualCounts[index];
|
||||
bestAlternateAllele = altAllele;
|
||||
if ( ALLOW_MULTIPLE_ALLELES ) {
|
||||
for ( byte altAllele : BaseUtils.BASES ) {
|
||||
if ( altAllele == ref )
|
||||
continue;
|
||||
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||
if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) {
|
||||
allelesToUse[index] = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// set the non-ref base which has the maximum quality score sum
|
||||
int maxCount = 0;
|
||||
int indexOfMax = 0;
|
||||
for ( byte altAllele : BaseUtils.BASES ) {
|
||||
if ( altAllele == ref )
|
||||
continue;
|
||||
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
|
||||
if ( qualCounts[index] > maxCount ) {
|
||||
maxCount = qualCounts[index];
|
||||
indexOfMax = index;
|
||||
}
|
||||
}
|
||||
|
||||
if ( maxCount > 0 )
|
||||
allelesToUse[indexOfMax] = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -219,14 +219,7 @@ public class UnifiedGenotyperEngine {
|
|||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||
}
|
||||
|
||||
Map<String, MultiallelicGenotypeLikelihoods> GLs = new HashMap<String, MultiallelicGenotypeLikelihoods>();
|
||||
|
||||
Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
||||
|
||||
if ( refAllele != null )
|
||||
return createVariantContextFromLikelihoods(refContext, refAllele, GLs);
|
||||
else
|
||||
return null;
|
||||
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
||||
}
|
||||
|
||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||
|
|
@ -261,40 +254,6 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vc, false);
|
||||
}
|
||||
|
||||
private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map<String, MultiallelicGenotypeLikelihoods> GLs) {
|
||||
// no-call everyone for now
|
||||
List<Allele> noCall = new ArrayList<Allele>();
|
||||
noCall.add(Allele.NO_CALL);
|
||||
|
||||
Set<Allele> alleles = new LinkedHashSet<Allele>();
|
||||
alleles.add(refAllele);
|
||||
boolean addedAltAlleles = false;
|
||||
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) {
|
||||
if ( !addedAltAlleles ) {
|
||||
addedAltAlleles = true;
|
||||
// ordering important to maintain consistency
|
||||
for (Allele a: GL.getAlleles()) {
|
||||
alleles.add(a);
|
||||
}
|
||||
}
|
||||
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
//GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
|
||||
GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(GL.getLikelihoods());
|
||||
attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth());
|
||||
attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods);
|
||||
|
||||
genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false));
|
||||
}
|
||||
|
||||
GenomeLoc loc = refContext.getLocus();
|
||||
int endLoc = calculateEndPos(alleles, refAllele, loc);
|
||||
|
||||
return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make();
|
||||
}
|
||||
|
||||
public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
return calculateGenotypes(null, null, null, null, vc, model);
|
||||
}
|
||||
|
|
@ -494,42 +453,6 @@ public class UnifiedGenotyperEngine {
|
|||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
|
||||
}
|
||||
|
||||
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
|
||||
// TODO - temp fix until we can deal with extended events properly
|
||||
// for indels, stop location is one more than ref allele length
|
||||
boolean isSNP = true, hasNullAltAllele = false;
|
||||
for (Allele a : alleles){
|
||||
if (a.length() != 1) {
|
||||
isSNP = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (Allele a : alleles){
|
||||
if (a.isNull()) {
|
||||
hasNullAltAllele = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null
|
||||
// standard insertion: ref allele length = 0, endLos = startLoc
|
||||
// mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length
|
||||
// So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less
|
||||
//
|
||||
// todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler,
|
||||
// the whole vc processing infrastructure seems too brittle and riddled with special case handling
|
||||
|
||||
|
||||
int endLoc = loc.getStart();
|
||||
if ( !isSNP) {
|
||||
endLoc += refAllele.length();
|
||||
if(!hasNullAltAllele)
|
||||
endLoc--;
|
||||
|
||||
}
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
Map<String, AlignmentContext> stratifiedContexts = null;
|
||||
|
|
|
|||
Loading…
Reference in New Issue