The Exact model now subsets the VC to the first N alleles when the VC contains more than the maximum number of alleles (instead of throwing it out completely as it did previously). [Perhaps the culling should be done by the UG engine? But theoretically the Exact model can be called outside of the UG and we'd still want the context subsetted.]
This commit is contained in:
parent
2f800b078c
commit
0f728a0604
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -41,10 +41,11 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
|
|
||||||
public enum Model {
|
public enum Model {
|
||||||
/** The default model with the best performance in all cases */
|
/** The default model with the best performance in all cases */
|
||||||
EXACT,
|
EXACT
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int N;
|
protected int N;
|
||||||
|
protected int MAX_ALTERNATE_ALLELES_TO_GENOTYPE;
|
||||||
|
|
||||||
protected Logger logger;
|
protected Logger logger;
|
||||||
protected PrintStream verboseWriter;
|
protected PrintStream verboseWriter;
|
||||||
|
|
@ -53,20 +54,21 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
|
|
||||||
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
|
protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
protected AlleleFrequencyCalculationModel(final UnifiedArgumentCollection UAC, final int N, final Logger logger, final PrintStream verboseWriter) {
|
||||||
this.N = N;
|
this.N = N;
|
||||||
|
this.MAX_ALTERNATE_ALLELES_TO_GENOTYPE = UAC.MAX_ALTERNATE_ALLELES;
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
this.verboseWriter = verboseWriter;
|
this.verboseWriter = verboseWriter;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Must be overridden by concrete subclasses
|
* Must be overridden by concrete subclasses
|
||||||
* @param GLs genotype likelihoods
|
* @param vc variant context with alleles and genotype likelihoods
|
||||||
* @param Alleles Alleles corresponding to GLs
|
|
||||||
* @param log10AlleleFrequencyPriors priors
|
* @param log10AlleleFrequencyPriors priors
|
||||||
* @param result (pre-allocated) object to store likelihoods results
|
* @param result (pre-allocated) object to store likelihoods results
|
||||||
|
* @return the alleles used for genotyping
|
||||||
*/
|
*/
|
||||||
protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles,
|
protected abstract List<Allele> getLog10PNonRef(final VariantContext vc,
|
||||||
double[][] log10AlleleFrequencyPriors,
|
final double[][] log10AlleleFrequencyPriors,
|
||||||
AlleleFrequencyCalculationResult result);
|
final AlleleFrequencyCalculationResult result);
|
||||||
}
|
}
|
||||||
|
|
@ -43,14 +43,28 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getLog10PNonRef(final GenotypesContext GLs,
|
public List<Allele> getLog10PNonRef(final VariantContext vc,
|
||||||
final List<Allele> alleles,
|
final double[][] log10AlleleFrequencyPriors,
|
||||||
final double[][] log10AlleleFrequencyPriors,
|
final AlleleFrequencyCalculationResult result) {
|
||||||
final AlleleFrequencyCalculationResult result) {
|
|
||||||
final int numAlleles = alleles.size();
|
final GenotypesContext GLs = vc.getGenotypes();
|
||||||
|
List<Allele> alleles = vc.getAlleles();
|
||||||
|
|
||||||
|
// don't try to genotype too many alternate alleles
|
||||||
|
if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) {
|
||||||
|
logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||||
|
|
||||||
|
alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
|
||||||
|
alleles.add(vc.getReference());
|
||||||
|
for ( int i = 0; i < MAX_ALTERNATE_ALLELES_TO_GENOTYPE; i++ )
|
||||||
|
alleles.add(vc.getAlternateAllele(i));
|
||||||
|
UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
|
||||||
|
}
|
||||||
|
|
||||||
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||||
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, result, false);
|
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result, false);
|
||||||
|
|
||||||
|
return alleles;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||||
|
|
|
||||||
|
|
@ -295,12 +295,6 @@ public class UnifiedGenotyperEngine {
|
||||||
}
|
}
|
||||||
AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get();
|
AlleleFrequencyCalculationResult AFresult = alleleFrequencyCalculationResult.get();
|
||||||
|
|
||||||
// don't try to genotype too many alternate alleles
|
|
||||||
if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) {
|
|
||||||
logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// estimate our confidence in a reference call and return
|
// estimate our confidence in a reference call and return
|
||||||
if ( vc.getNSamples() == 0 ) {
|
if ( vc.getNSamples() == 0 ) {
|
||||||
if ( limitedContext )
|
if ( limitedContext )
|
||||||
|
|
@ -313,25 +307,32 @@ public class UnifiedGenotyperEngine {
|
||||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
|
List<Allele> allelesUsedInGenotyping = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model), AFresult);
|
||||||
|
|
||||||
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
||||||
boolean bestGuessIsRef = true;
|
boolean bestGuessIsRef = true;
|
||||||
|
|
||||||
// determine which alternate alleles have AF>0
|
// determine which alternate alleles have AF>0
|
||||||
boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
|
final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
|
||||||
|
myAlleles.add(vc.getReference());
|
||||||
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||||
int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[i]);
|
final Allele alternateAllele = vc.getAlternateAllele(i);
|
||||||
|
final int indexOfAllele = allelesUsedInGenotyping.indexOf(alternateAllele);
|
||||||
|
// the genotyping model may have stripped it out
|
||||||
|
if ( indexOfAllele == -1 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
int indexOfBestAC = MathUtils.maxElementIndex(AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1]);
|
||||||
|
|
||||||
// if the most likely AC is not 0, then this is a good alternate allele to use;
|
// if the most likely AC is not 0, then this is a good alternate allele to use;
|
||||||
// make sure to test against log10PosteriorOfAFzero since that no longer is an entry in the array
|
// make sure to test against log10PosteriorOfAFzero since that no longer is an entry in the array
|
||||||
if ( indexOfBestAC != 0 && AFresult.log10AlleleFrequencyPosteriors[i][indexOfBestAC] > AFresult.log10PosteriorOfAFzero ) {
|
if ( indexOfBestAC != 0 && AFresult.log10AlleleFrequencyPosteriors[indexOfAllele-1][indexOfBestAC] > AFresult.log10PosteriorOfAFzero ) {
|
||||||
altAllelesToUse[i] = true;
|
myAlleles.add(alternateAllele);
|
||||||
bestGuessIsRef = false;
|
bestGuessIsRef = false;
|
||||||
}
|
}
|
||||||
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
|
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
|
||||||
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
altAllelesToUse[i] = true;
|
myAlleles.add(alternateAllele);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -367,20 +368,6 @@ public class UnifiedGenotyperEngine {
|
||||||
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
|
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF);
|
||||||
}
|
}
|
||||||
|
|
||||||
// strip out any alleles that aren't going to be used in the VariantContext
|
|
||||||
final List<Allele> myAlleles;
|
|
||||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
|
|
||||||
myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
|
|
||||||
myAlleles.add(vc.getReference());
|
|
||||||
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
|
||||||
if ( altAllelesToUse[i] )
|
|
||||||
myAlleles.add(vc.getAlternateAllele(i));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// use all of the alleles if we are given them by the user
|
|
||||||
myAlleles = vc.getAlleles();
|
|
||||||
}
|
|
||||||
|
|
||||||
// start constructing the resulting VC
|
// start constructing the resulting VC
|
||||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
|
final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
|
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles);
|
||||||
|
|
@ -394,7 +381,7 @@ public class UnifiedGenotyperEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the genotypes
|
// create the genotypes
|
||||||
final GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse);
|
final GenotypesContext genotypes = subsetAlleles(vc, myAlleles, true);
|
||||||
|
|
||||||
// print out stats if we have a writer
|
// print out stats if we have a writer
|
||||||
if ( verboseWriter != null && !limitedContext )
|
if ( verboseWriter != null && !limitedContext )
|
||||||
|
|
@ -414,7 +401,7 @@ public class UnifiedGenotyperEngine {
|
||||||
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
||||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||||
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
|
afcm.get().getLog10PNonRef(vcOverall, getAlleleFrequencyPriors(model), AFresult);
|
||||||
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
||||||
double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
double overallLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
||||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||||
|
|
@ -423,7 +410,7 @@ public class UnifiedGenotyperEngine {
|
||||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
||||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||||
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
|
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
|
||||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||||
double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero;
|
double forwardLog10PofNull = AFresult.log10PosteriorOfAFzero;
|
||||||
double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
double forwardLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
||||||
|
|
@ -433,7 +420,7 @@ public class UnifiedGenotyperEngine {
|
||||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
||||||
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
clearAFarray(AFresult.log10AlleleFrequencyLikelihoods);
|
||||||
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
clearAFarray(AFresult.log10AlleleFrequencyPosteriors);
|
||||||
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), AFresult);
|
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
|
||||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||||
double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero;
|
double reverseLog10PofNull = AFresult.log10PosteriorOfAFzero;
|
||||||
double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
double reverseLog10PofF = MathUtils.log10sumLog10(AFresult.log10AlleleFrequencyPosteriors[0], 0);
|
||||||
|
|
@ -772,30 +759,36 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param vc variant context with genotype likelihoods
|
* @param vc variant context with genotype likelihoods
|
||||||
* @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use
|
|
||||||
* @return genotypes
|
* @return genotypes
|
||||||
*/
|
*/
|
||||||
public static GenotypesContext assignGenotypes(final VariantContext vc,
|
public static GenotypesContext assignGenotypes(final VariantContext vc) {
|
||||||
final boolean[] allelesToUse) {
|
return subsetAlleles(vc, vc.getAlleles(), true);
|
||||||
|
}
|
||||||
|
|
||||||
// the no-called genotypes
|
/**
|
||||||
final GenotypesContext GLs = vc.getGenotypes();
|
* @param vc variant context with genotype likelihoods
|
||||||
|
* @param allelesToUse which alleles from the vc are okay to use
|
||||||
|
* @param assignGenotypes true if we should change the genotypes based on the (subsetted) PLs
|
||||||
|
* @return genotypes
|
||||||
|
*/
|
||||||
|
public static GenotypesContext subsetAlleles(final VariantContext vc,
|
||||||
|
final List<Allele> allelesToUse,
|
||||||
|
final boolean assignGenotypes) {
|
||||||
|
|
||||||
|
// the genotypes with PLs
|
||||||
|
final GenotypesContext oldGTs = vc.getGenotypes();
|
||||||
|
|
||||||
// samples
|
// samples
|
||||||
final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();
|
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
|
||||||
|
|
||||||
// the new called genotypes to create
|
// the new genotypes to create
|
||||||
final GenotypesContext calls = GenotypesContext.create();
|
final GenotypesContext newGTs = GenotypesContext.create();
|
||||||
|
|
||||||
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
||||||
final int numOriginalAltAlleles = allelesToUse.length;
|
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||||
final List<Allele> newAlleles = new ArrayList<Allele>(numOriginalAltAlleles+1);
|
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||||
newAlleles.add(vc.getReference());
|
|
||||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
// which PLs should be carried forward?
|
||||||
if ( allelesToUse[i] )
|
|
||||||
newAlleles.add(vc.getAlternateAllele(i));
|
|
||||||
}
|
|
||||||
final int numNewAltAlleles = newAlleles.size() - 1;
|
|
||||||
ArrayList<Integer> likelihoodIndexesToUse = null;
|
ArrayList<Integer> likelihoodIndexesToUse = null;
|
||||||
|
|
||||||
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
||||||
|
|
@ -804,20 +797,27 @@ public class UnifiedGenotyperEngine {
|
||||||
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
||||||
final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
|
final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
|
||||||
|
|
||||||
|
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
|
||||||
|
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||||
|
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
|
||||||
|
altAlleleIndexToUse[i] = true;
|
||||||
|
}
|
||||||
|
|
||||||
for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) {
|
for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) {
|
||||||
int[] alleles = PLcache[PLindex];
|
final int[] alleles = PLcache[PLindex];
|
||||||
// consider this entry only if both of the alleles are good
|
// consider this entry only if both of the alleles are good
|
||||||
if ( (alleles[0] == 0 || allelesToUse[alleles[0] - 1]) && (alleles[1] == 0 || allelesToUse[alleles[1] - 1]) )
|
if ( (alleles[0] == 0 || altAlleleIndexToUse[alleles[0] - 1]) && (alleles[1] == 0 || altAlleleIndexToUse[alleles[1] - 1]) )
|
||||||
likelihoodIndexesToUse.add(PLindex);
|
likelihoodIndexesToUse.add(PLindex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the new genotypes
|
// create the new genotypes
|
||||||
for ( int k = GLs.size() - 1; k >= 0; k-- ) {
|
for ( int k = 0; k < oldGTs.size(); k++ ) {
|
||||||
final String sample = sampleIndices.get(k);
|
final Genotype g = oldGTs.get(sampleIndices.get(k));
|
||||||
final Genotype g = GLs.get(sample);
|
if ( !g.hasLikelihoods() ) {
|
||||||
if ( !g.hasLikelihoods() )
|
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// create the new likelihoods array from the alleles we are allowed to use
|
// create the new likelihoods array from the alleles we are allowed to use
|
||||||
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
|
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
|
||||||
|
|
@ -834,29 +834,34 @@ public class UnifiedGenotyperEngine {
|
||||||
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if there is no mass on the (new) likelihoods and we actually have alternate alleles, then just no-call the sample
|
// if there is no mass on the (new) likelihoods or we weren't asked to assign a genotype, then just no-call the sample
|
||||||
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||||
calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// find the genotype with maximum likelihoods
|
final Genotype newGT = assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles);
|
||||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
newGTs.add(newGT);
|
||||||
int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];
|
|
||||||
|
|
||||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
|
||||||
myAlleles.add(newAlleles.get(alleles[0]));
|
|
||||||
myAlleles.add(newAlleles.get(alleles[1]));
|
|
||||||
|
|
||||||
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
|
|
||||||
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
|
||||||
if ( numNewAltAlleles == 0 )
|
|
||||||
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
|
||||||
else
|
|
||||||
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
|
|
||||||
calls.add(new Genotype(sample, myAlleles, qual, null, attrs, false));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return newGTs;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List<Allele> allelesToUse, int numNewAltAlleles) {
|
||||||
|
// find the genotype with maximum likelihoods
|
||||||
|
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||||
|
int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];
|
||||||
|
|
||||||
return calls;
|
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||||
|
myAlleles.add(allelesToUse.get(alleles[0]));
|
||||||
|
myAlleles.add(allelesToUse.get(alleles[1]));
|
||||||
|
|
||||||
|
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
|
||||||
|
Map<String, Object> attrs = new HashMap<String, Object>(originalGT.getAttributes());
|
||||||
|
if ( numNewAltAlleles == 0 )
|
||||||
|
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||||
|
else
|
||||||
|
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
|
||||||
|
return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue