gVCF <NON_REF> in all vcf lines including variant ones when –ERC gVCF is requested.

Changes:
-------

  <NON_REF> likelihood in variant sites is calculated as the maximum possible likelihood for an unseen alternative allele: for reach read is calculated as the second best likelihood amongst the reported alleles.

  When –ERC gVCF, stand_conf_emit and stand_conf_call are forcefully set to 0. Also dontGenotype is set to false for consistency sake.

  Integration test MD5 have been changed accordingly.

Additional fix:
--------------

  Specially after adding the <NON_REF> allele, but also happened without that, QUAL values tend to go to 0 (very large integer number in log 10) due to underflow when combining GLs (GenotypingEngine.combineGLs). To fix that combineGLs has been substituted by combineGLsPrecise that uses the log-sum-exp trick.

  In just a few cases this change results in genotype changes in integration tests but after double-checking using unit-test and difference between combineGLs and combineGLsPrecise in the affected integration test, the previous GT calls were either border-line cases and or due to the underflow.
This commit is contained in:
Valentin Ruano-Rubio 2013-12-19 16:37:47 -05:00 committed by Eric Banks
parent 383a4f4a70
commit 89c4e57478
19 changed files with 762 additions and 246 deletions

View File

@ -422,14 +422,14 @@ public class UnifiedGenotyperEngine {
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
} }
AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model)); final AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
// is the most likely frequency conformation AC=0 for all alternate alleles? // is the most likely frequency conformation AC=0 for all alternate alleles?
boolean bestGuessIsRef = true; boolean bestGuessIsRef = true;
// determine which alternate alleles have AF>0 // determine which alternate alleles have AF>0
final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size()); final List<Allele> myAlleles = new ArrayList<>(vc.getAlleles().size());
final List<Integer> alleleCountsofMLE = new ArrayList<Integer>(vc.getAlleles().size()); final List<Integer> alleleCountsofMLE = new ArrayList<>(vc.getAlleles().size());
myAlleles.add(vc.getReference()); myAlleles.add(vc.getReference());
for ( int i = 0; i < AFresult.getAllelesUsedInGenotyping().size(); i++ ) { for ( int i = 0; i < AFresult.getAllelesUsedInGenotyping().size(); i++ ) {
final Allele alternateAllele = AFresult.getAllelesUsedInGenotyping().get(i); final Allele alternateAllele = AFresult.getAllelesUsedInGenotyping().get(i);
@ -439,16 +439,13 @@ public class UnifiedGenotyperEngine {
// Compute if the site is considered polymorphic with sufficient confidence relative to our // Compute if the site is considered polymorphic with sufficient confidence relative to our
// phred-scaled emission QUAL // phred-scaled emission QUAL
final boolean isNonRef = AFresult.isPolymorphicPhredScaledQual(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); final boolean isNonRef = AFresult.isPolymorphicPhredScaledQual(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
final boolean toInclude = isNonRef || alternateAllele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE ||
UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ||
UAC.annotateAllSitesWithPLs;
// if the most likely AC is not 0, then this is a good alternate allele to use bestGuessIsRef &= !isNonRef;
if ( isNonRef ) {
myAlleles.add(alternateAllele); if (toInclude) {
alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
bestGuessIsRef = false;
}
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ||
UAC.annotateAllSitesWithPLs) {
myAlleles.add(alternateAllele); myAlleles.add(alternateAllele);
alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele)); alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
} }
@ -513,29 +510,29 @@ public class UnifiedGenotyperEngine {
// the overall lod // the overall lod
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0]; //double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); final double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
List<Allele> allAllelesToUse = builder.make().getAlleles(); final List<Allele> allAllelesToUse = builder.make().getAlleles();
// the forward lod // the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); final VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model)); final AFCalcResult forwardAFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0(); final double forwardLog10PofNull = forwardAFresult.getLog10LikelihoodOfAFEq0();
double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); final double forwardLog10PofF = forwardAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod // the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); final VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model)); final AFCalcResult reverseAFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0(); final double reverseLog10PofNull = reverseAFresult.getLog10LikelihoodOfAFEq0();
double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0(); final double reverseLog10PofF = reverseAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; final double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF; final double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
//if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); //if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
// strand score is max bias between forward and reverse strands // strand score is max bias between forward and reverse strands

View File

@ -48,7 +48,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.*; import java.util.*;
@ -67,10 +70,10 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
final int numChr = 2*numSamples; final int numChr = 2*numSamples;
// queue of AC conformations to process // queue of AC conformations to process
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>(); final LinkedList<ExactACset> ACqueue = new LinkedList<>();
// mapping of ExactACset indexes to the objects // mapping of ExactACset indexes to the objects
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1); final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<>(numChr+1);
// add AC=0 to the queue // add AC=0 to the queue
final int[] zeroCounts = new int[numAlternateAlleles]; final int[] zeroCounts = new int[numAlternateAlleles];
@ -84,7 +87,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
// compute log10Likelihoods // compute log10Likelihoods
final ExactACset set = ACqueue.remove(); final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors); calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors);
// clean up memory // clean up memory
indexesToACset.remove(set.getACcounts()); indexesToACset.remove(set.getACcounts());
@ -95,58 +98,28 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
return getResultFromFinalState(vc, log10AlleleFrequencyPriors); return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
} }
@Override
protected VariantContext reduceScope(final VariantContext vc) {
// don't try to genotype too many alternate alleles
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles() ) {
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
VariantContextBuilder builder = new VariantContextBuilder(vc); @Override
List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1); protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
alleles.add(vc.getReference()); return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
builder.alleles(alleles);
builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL));
return builder.make();
} else {
return vc;
}
} }
private static final int PL_INDEX_OF_HOM_REF = 0; @Override
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) { protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ )
likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
// based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), true); final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), true);
for ( final double[] likelihoods : GLs ) { for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) { if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) {
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL); final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL);
final int alleleLikelihoodIndex1 = alleles.alleleIndex1 - 1;
final int alleleLikelihoodIndex2 = alleles.alleleIndex2 - 1;
if ( alleles.alleleIndex1 != 0 ) if ( alleles.alleleIndex1 != 0 )
likelihoodSums[alleles.alleleIndex1-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]; likelihoodSums[alleleLikelihoodIndex1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
// don't double-count it // don't double-count it
if ( alleles.alleleIndex2 != 0 && alleles.alleleIndex2 != alleles.alleleIndex1 ) if ( alleles.alleleIndex2 != 0 && alleles.alleleIndex2 != alleles.alleleIndex1 )
likelihoodSums[alleles.alleleIndex2-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]; likelihoodSums[alleleLikelihoodIndex2].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
} }
} }
// sort them by probability mass and choose the best ones
Collections.sort(Arrays.asList(likelihoodSums));
final ArrayList<Allele> bestAlleles = new ArrayList<Allele>(numAllelesToChoose);
for ( int i = 0; i < numAllelesToChoose; i++ )
bestAlleles.add(likelihoodSums[i].allele);
final ArrayList<Allele> orderedBestAlleles = new ArrayList<Allele>(numAllelesToChoose);
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( bestAlleles.contains(allele) )
orderedBestAlleles.add(allele);
}
return orderedBestAlleles;
} }
private static final class DependentSet { private static final class DependentSet {
@ -199,8 +172,8 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
if ( ACwiggle > 1 ) { if ( ACwiggle > 1 ) {
final ArrayList<DependentSet> differentAlleles = new ArrayList<DependentSet>(numAltAlleles * numAltAlleles); final ArrayList<DependentSet> differentAlleles = new ArrayList<>(numAltAlleles * numAltAlleles);
final ArrayList<DependentSet> sameAlleles = new ArrayList<DependentSet>(numAltAlleles); final ArrayList<DependentSet> sameAlleles = new ArrayList<>(numAltAlleles);
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) { for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) { for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {

View File

@ -48,17 +48,50 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import java.util.ArrayList; import java.util.*;
/** /**
* Uses the Exact calculation of Heng Li * Uses the Exact calculation of Heng Li
*/ */
abstract class ExactAFCalc extends AFCalc { abstract class ExactAFCalc extends AFCalc {
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
/**
* Sorts {@link org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
@Override
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
return - Double.compare(o1.sum,o2.sum);
}
};
/**
* Sorts {@link org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first but make sure that
* NON_REF alleles are place are last.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
@Override
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
if (o1.allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)
return 1;
else if (o2.allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)
return -1;
else
return o1.compareTo(o2);
}
};
/**
* Sorts {@link org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with lower alternative allele index are first regardless of
* the likelihood sum.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_INDEX_COMPARATOR = new Comparator<LikelihoodSum>() {
@Override
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
return Integer.compare(o1.index, o2.index);
}
};
protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) { protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy); super(nSamples, maxAltAlleles, ploidy);
@ -69,9 +102,10 @@ abstract class ExactAFCalc extends AFCalc {
*/ */
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> { protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
public double sum = 0.0; public double sum = 0.0;
public Allele allele; public final Allele allele;
public final int index;
public LikelihoodSum(Allele allele) { this.allele = allele; } public LikelihoodSum(final Allele allele, final int index) { this.allele = allele; this.index = index; }
public int compareTo(LikelihoodSum other) { public int compareTo(LikelihoodSum other) {
final double diff = sum - other.sum; final double diff = sum - other.sum;
@ -85,12 +119,12 @@ abstract class ExactAFCalc extends AFCalc {
* @return ArrayList of doubles corresponding to GL vectors * @return ArrayList of doubles corresponding to GL vectors
*/ */
protected static ArrayList<double[]> getGLs(final GenotypesContext GLs, final boolean includeDummy) { protected static ArrayList<double[]> getGLs(final GenotypesContext GLs, final boolean includeDummy) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size() + 1); final ArrayList<double[]> genotypeLikelihoods = new ArrayList<>(GLs.size() + 1);
if ( includeDummy ) genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy if ( includeDummy ) genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) { for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
if ( sample.hasLikelihoods() ) { if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector(); final double[] gls = sample.getLikelihoods().getAsVector();
if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
genotypeLikelihoods.add(gls); genotypeLikelihoods.add(gls);
@ -99,4 +133,108 @@ abstract class ExactAFCalc extends AFCalc {
return genotypeLikelihoods; return genotypeLikelihoods;
} }
@Override
protected VariantContext reduceScope(final VariantContext vc) {
// don't try to genotype too many alternate alleles
final List<Allele> inputAltAlleles = vc.getAlternateAlleles();
final List<Allele> outputAltAlleles = reduceScopeAlleles(vc,getMaxAltAlleles());
// only if output allele has reduced from the input alt allele set size we should care.
final int altAlleleReduction = inputAltAlleles.size() - outputAltAlleles.size();
if (altAlleleReduction == 0)
return vc;
else if (altAlleleReduction != 0) {
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles()
+ " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart()
+ " has " + (vc.getAlternateAlleles().size())
+ " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
final List<Allele> alleles = new ArrayList<>(getMaxAltAlleles() + 1);
alleles.add(vc.getReference());
alleles.addAll(reduceScopeAlleles(vc, getMaxAltAlleles()));
final VariantContextBuilder builder = new VariantContextBuilder(vc);
builder.alleles(alleles);
builder.genotypes(reduceScopeGenotypes(vc, alleles));
if (altAlleleReduction < 0)
throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc + " " + builder.make());
return builder.make();
} else // if (altAlleleReduction < 0)
throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc);
}
/**
* Returns a the new set of alleles to use.
* @param vc target variant context.
* @param numAllelesToChoose number of alleles to keep.
* @return the list of alternative allele to keep.
*/
protected List<Allele> reduceScopeAlleles(final VariantContext vc, final int numAllelesToChoose) {
// Look for the <NON_REF> allele to exclude it from the pruning if present.
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final int nonRefAltAlleleIndex = GATKVariantContextUtils.indexOfAltAllele(vc,
GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE, false);
final boolean nonRefAltAllelePresent = nonRefAltAlleleIndex >= 0;
// <NON_REF> should not be considered in the downsizing, so we need to count it out when
// considering if alt. allele downsizing is required.
final int numProperOriginalAltAlleles = numOriginalAltAlleles - (nonRefAltAllelePresent ? 1 : 0);
// Avoid pointless allele reduction:
if (numAllelesToChoose >= numProperOriginalAltAlleles)
return vc.getAlternateAlleles();
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
final Allele allele = vc.getAlternateAllele(i);
likelihoodSums[i] = new LikelihoodSum(allele,i);
}
// Calculate the allele likelihood sums.
reduceScopeCalculateLikelihoodSums(vc, likelihoodSums);
// sort them by probability mass and choose the best ones
// Make sure that the <NON_REF> allele is last if present.
Collections.sort(Arrays.asList(likelihoodSums), nonRefAltAllelePresent ? LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR : LIKELIHOOD_SUM_COMPARATOR);
// We need to return the best likelihood alleles in the original alternative allele index order.
// This heap will keep track of that index order.
final PriorityQueue<LikelihoodSum> mostLikelyAllelesHeapByIndex = new PriorityQueue<>(numOriginalAltAlleles, LIKELIHOOD_INDEX_COMPARATOR);
for ( int i = 0; i < numAllelesToChoose; i++ )
mostLikelyAllelesHeapByIndex.add(likelihoodSums[i]);
// guaranteed no to have been added at this point thanks for checking on whether reduction was
// needed in the first place.
if (nonRefAltAllelePresent)
mostLikelyAllelesHeapByIndex.add(likelihoodSums[nonRefAltAlleleIndex]);
final ArrayList<Allele> orderedBestAlleles = new ArrayList<>(numAllelesToChoose);
while (!mostLikelyAllelesHeapByIndex.isEmpty())
orderedBestAlleles.add(mostLikelyAllelesHeapByIndex.remove().allele);
return orderedBestAlleles;
}
protected static final int PL_INDEX_OF_HOM_REF = 0;
/**
* Update the likelihood sums with using the variant context genotype likelihoods.
* @param vc source variant context.
* @param likelihoodSums where to update the likelihood sums.
*/
protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums);
/**
* Transforms the genotypes of the variant context according to the new subset of possible alleles.
*
* @param vc original variant-context.
* @param allelesToUse possible alleles.
* @return never {@code null}, the new set of genotype calls for the reduced scope.
*/
protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse);
} }

View File

@ -59,7 +59,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
private final int ploidy; private final int ploidy;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final static boolean VERBOSE = false; private final static boolean VERBOSE = false;
protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) { protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
@ -68,22 +68,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
} }
@Override @Override
protected VariantContext reduceScope(VariantContext vc) { protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
// don't try to genotype too many alternate alleles return subsetAlleles(vc,allelesToUse,false,ploidy);
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles()) {
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
final List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1);
alleles.add(vc.getReference());
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles(), ploidy));
VariantContextBuilder builder = new VariantContextBuilder(vc);
builder.alleles(alleles);
builder.genotypes(subsetAlleles(vc, alleles, false, ploidy));
return builder.make();
} else {
return vc;
}
} }
@Override @Override
@ -105,8 +91,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
public CombinedPoolLikelihoods() { public CombinedPoolLikelihoods() {
// final int numElements = GenotypeLikelihoods.numLikelihoods(); // final int numElements = GenotypeLikelihoods.numLikelihoods();
alleleCountSetList = new LinkedList<ExactACset>(); alleleCountSetList = new LinkedList<>();
conformationMap = new HashMap<ExactACcounts,ExactACset>(); conformationMap = new HashMap<>();
maxLikelihood = Double.NEGATIVE_INFINITY; maxLikelihood = Double.NEGATIVE_INFINITY;
} }
@ -136,54 +122,22 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
public int getLength() { public int getLength() {
return alleleCountSetList.size(); return alleleCountSetList.size();
} }
} }
/** @Override
* protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
* Chooses N most likely alleles in a set of pools (samples) based on GL sum over alt alleles final int numOriginalAltAlleles = likelihoodSums.length;
* @param vc Input variant context
* @param numAllelesToChoose Number of alleles to choose
* @param ploidy Ploidy per pool
* @return list of numAllelesToChoose most likely alleles
*/
private static final int PL_INDEX_OF_HOM_REF = 0;
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose, int ploidy) {
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ )
likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
// based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), false); final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), false);
for ( final double[] likelihoods : GLs ) { for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL); final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele // by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
for (int k=1; k < acCount.length;k++) { for (int k=1; k < acCount.length;k++)
if (acCount[k] > 0) if (acCount[k] > 0 )
likelihoodSums[k-1].sum += acCount[k] * (likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]); likelihoodSums[k-1].sum += acCount[k] * (likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]);
}
} }
// sort them by probability mass and choose the best ones
Collections.sort(Arrays.asList(likelihoodSums));
final ArrayList<Allele> bestAlleles = new ArrayList<Allele>(numAllelesToChoose);
for ( int i = 0; i < numAllelesToChoose; i++ )
bestAlleles.add(likelihoodSums[i].allele);
final ArrayList<Allele> orderedBestAlleles = new ArrayList<Allele>(numAllelesToChoose);
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( bestAlleles.contains(allele) )
orderedBestAlleles.add(allele);
}
return orderedBestAlleles;
} }
/** /**
* Simple non-optimized version that combines GLs from several pools and produces global AF distribution. * Simple non-optimized version that combines GLs from several pools and produces global AF distribution.
* @param GLs Inputs genotypes context with per-pool GLs * @param GLs Inputs genotypes context with per-pool GLs
@ -231,9 +185,9 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
int newGLPloidy, int newGLPloidy,
int numAlleles, int numAlleles,
final double[] log10AlleleFrequencyPriors) { final double[] log10AlleleFrequencyPriors) {
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>(); final LinkedList<ExactACset> ACqueue = new LinkedList<>();
// mapping of ExactACset indexes to the objects // mapping of ExactACset indexes to the objects
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(); final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<>();
final CombinedPoolLikelihoods newPool = new CombinedPoolLikelihoods(); final CombinedPoolLikelihoods newPool = new CombinedPoolLikelihoods();
// add AC=0 to the queue // add AC=0 to the queue
@ -251,7 +205,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
getStateTracker().incNEvaluations(); getStateTracker().incNEvaluations();
// compute log10Likelihoods // compute log10Likelihoods
final ExactACset ACset = ACqueue.remove(); final ExactACset ACset = ACqueue.remove();
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
// clean up memory // clean up memory
indexesToACset.remove(ACset.getACcounts()); indexesToACset.remove(ACset.getACcounts());
@ -514,7 +469,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final int ploidy) { final int ploidy) {
// the genotypes with PLs // the genotypes with PLs
final GenotypesContext oldGTs = vc.getGenotypes(); final GenotypesContext oldGTs = vc.getGenotypes();
List<Allele> NO_CALL_ALLELES = new ArrayList<Allele>(ploidy); List<Allele> NO_CALL_ALLELES = new ArrayList<>(ploidy);
for (int k=0; k < ploidy; k++) for (int k=0; k < ploidy; k++)
NO_CALL_ALLELES.add(Allele.NO_CALL); NO_CALL_ALLELES.add(Allele.NO_CALL);
@ -584,8 +539,6 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* @param newLikelihoods the PL array * @param newLikelihoods the PL array
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs) * @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
* @param numChromosomes Number of chromosomes per pool * @param numChromosomes Number of chromosomes per pool
*
* @return genotype
*/ */
private void assignGenotype(final GenotypeBuilder gb, private void assignGenotype(final GenotypeBuilder gb,
final double[] newLikelihoods, final double[] newLikelihoods,
@ -599,8 +552,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex); final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final ArrayList<Double> alleleFreqs = new ArrayList<Double>(); final ArrayList<Double> alleleFreqs = new ArrayList<>();
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>(); final ArrayList<Integer> alleleCounts = new ArrayList<>();
for (int k=1; k < mlAlleleCount.length; k++) { for (int k=1; k < mlAlleleCount.length; k++) {
@ -629,6 +582,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
} }
gb.alleles(myAlleles); gb.alleles(myAlleles);
// TODO - deprecated so what is the appropriate method to call?
if ( numNewAltAlleles > 0 ) if ( numNewAltAlleles > 0 )
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
} }

View File

@ -107,6 +107,7 @@ import java.util.*;
* prior for the ith least likely allele. * prior for the ith least likely allele.
*/ */
public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc { public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
/** /**
* The min. confidence of an allele to be included in the joint posterior. * The min. confidence of an allele to be included in the joint posterior.
*/ */
@ -249,7 +250,7 @@ import java.util.*;
final int nAlts = rootVC.getNAlleles() - 1; final int nAlts = rootVC.getNAlleles() - 1;
final List<Genotype> biallelicGenotypes = new ArrayList<Genotype>(rootVC.getNSamples()); final List<Genotype> biallelicGenotypes = new ArrayList<Genotype>(rootVC.getNSamples());
for ( final Genotype g : rootVC.getGenotypes() ) for ( final Genotype g : rootVC.getGenotypes() )
biallelicGenotypes.add(combineGLs(g, altAlleleIndex, nAlts)); biallelicGenotypes.add(combineGLsPrecise(g, altAlleleIndex, nAlts));
final VariantContextBuilder vcb = new VariantContextBuilder(rootVC); final VariantContextBuilder vcb = new VariantContextBuilder(rootVC);
final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1); final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1);
@ -281,6 +282,7 @@ import java.util.*;
*/ */
@Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"}) @Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"})
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"}) @Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
@Deprecated
protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) { protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) {
if ( original.isNonInformative() ) if ( original.isNonInformative() )
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make(); return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
@ -316,6 +318,75 @@ import java.util.*;
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make(); return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
} }
private static final double PHRED_2_LOG10_COEFF = -.1;
/**
* Returns a new Genotype with the PLs of the multi-allelic original reduced to a bi-allelic case.
*
* <p>Uses the log-sum-exp trick in order to work well with very low PLs</p>
*
* <p>This is handled in the following way:</p>
*
* <p>Suppose we have for a A/B/C site the following GLs:</p>
*
* <p>AA AB BB AC BC CC</p>
*
* <p>and we want to get the bi-allelic GLs for X/B, where X is everything not B</p>
*
* <p>XX = AA + AC + CC (since X = A or C)<br/>
* XB = AB + BC <br/>
* BB = BB <br/>
* </p>
* <p>
* This implementation use the log sum trick in order to avoid numeric inestability.
* </p>
*
* @param original the original multi-allelic genotype
* @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0
* @param nAlts the total number of alt alleles
* @return a new biallelic genotype with appropriate PLs
*/
@Requires({"original.hasLikelihoods()"})
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
protected Genotype combineGLsPrecise(final Genotype original, final int altIndex, final int nAlts ) {
if ( original.isNonInformative() )
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts);
final int[] pls = original.getPL();
final int nAlleles = nAlts + 1;
final int plCount = pls.length;
double BB = 0;
final double[] XBvalues = new double[nAlleles - 1];
final double[] XXvalues = new double[plCount - nAlleles];
int xbOffset = 0;
int xxOffset = 0;
for ( int index = 0; index < plCount; index++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index);
int i = pair.alleleIndex1;
int j = pair.alleleIndex2;
if (i == j) {
if (i == altIndex) BB = PHRED_2_LOG10_COEFF * pls[index]; else XXvalues[xxOffset++] = PHRED_2_LOG10_COEFF * pls[index];
} else if (i == altIndex || j == altIndex)
XBvalues[xbOffset++] = PHRED_2_LOG10_COEFF * pls[index];
else
XXvalues[xxOffset++] = PHRED_2_LOG10_COEFF * pls[index];
}
final double XB = MathUtils.log10sumLog10(XBvalues);
final double XX = MathUtils.log10sumLog10(XXvalues);
final double[] GLs = new double[] { XX, XB, BB};
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
}
protected final List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) { protected final List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults); final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults);
@ -340,7 +411,6 @@ import java.util.*;
return sorted; return sorted;
} }
/** /**
* Take the independent estimates of pNonRef for each alt allele and combine them into a single result * Take the independent estimates of pNonRef for each alt allele and combine them into a single result
* *

View File

@ -57,8 +57,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.DefaultHashMap; import org.broadinstitute.sting.utils.collections.DefaultHashMap;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.haplotype.EventMap; import org.broadinstitute.sting.utils.haplotype.EventMap;
import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.haplotype.Haplotype;
@ -136,7 +134,10 @@ public class GenotypingEngine {
* @param activeRegionWindow Active window * @param activeRegionWindow Active window
* @param genomeLocParser GenomeLocParser * @param genomeLocParser GenomeLocParser
* @param activeAllelesToGenotype Alleles to genotype * @param activeAllelesToGenotype Alleles to genotype
* @param addNonRef whether we should add a &lt;NON_REF&gt; alternative allele to the result variation contexts.
*
* @return A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes * @return A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes
*
*/ */
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"}) @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
@Ensures("result != null") @Ensures("result != null")
@ -150,7 +151,8 @@ public class GenotypingEngine {
final GenomeLoc activeRegionWindow, final GenomeLoc activeRegionWindow,
final GenomeLocParser genomeLocParser, final GenomeLocParser genomeLocParser,
final RefMetaDataTracker tracker, final RefMetaDataTracker tracker,
final List<VariantContext> activeAllelesToGenotype ) { final List<VariantContext> activeAllelesToGenotype,
final boolean addNonRef) {
// sanity check input arguments // sanity check input arguments
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine); if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes); if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
@ -183,16 +185,27 @@ public class GenotypingEngine {
final List<String> priorityList = makePriorityList(eventsAtThisLoc); final List<String> priorityList = makePriorityList(eventsAtThisLoc);
// Merge the event to find a common reference representation // Merge the event to find a common reference representation
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
final VariantContextBuilder vcb = new VariantContextBuilder(mergedVC);
if( mergedVC == null ) { continue; } if( mergedVC == null ) { continue; }
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
// this is possible in GGA mode when the same event is represented in multiple input records ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
throw new UserException("The same event (although possibly represented differently) is present in multiple input records at location " + loc + " and this is not something we can handle at this time. You will need to remove one of the records in order to proceed with your input file(s).");
if (addNonRef) {
final List<Allele> alleleList = new ArrayList<>();
alleleList.addAll(mergedVC.getAlleles());
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
vcb.alleles(alleleList);
mergedVC = vcb.make();
} }
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>(); final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) { for(int iii = 0; iii < eventsAtThisLoc.size(); iii++) {
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
} }
@ -204,11 +217,14 @@ public class GenotypingEngine {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() ); final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() );
if (addNonRef) addMiscellaneousAllele(alleleReadMap);
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC ); final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL); final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
if( call != null ) { if( call != null ) {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap : final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) ); convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) );
if (addNonRef) addMiscellaneousAllele(alleleReadMap_annotations);
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call ); final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call); VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
@ -218,16 +234,46 @@ public class GenotypingEngine {
} }
// maintain the set of all called haplotypes // maintain the set of all called haplotypes
for ( final Allele calledAllele : call.getAlleles() ) for ( final Allele calledAllele : call.getAlleles() ) {
calledHaplotypes.addAll(alleleMapper.get(calledAllele)); final List<Haplotype> haplotypeList = alleleMapper.get(calledAllele);
if (haplotypeList == null) continue;
calledHaplotypes.addAll(haplotypeList);
}
returnCalls.add( annotatedCall ); returnCalls.add( annotatedCall );
} }
} }
} }
return new CalledHaplotypes(returnCalls, calledHaplotypes); return new CalledHaplotypes(returnCalls, calledHaplotypes);
} }
/**
* Add the <NON_REF> allele
* @param stratifiedReadMap target per-read-allele-likelihood-map.
*/
public static Map<String, PerReadAlleleLikelihoodMap> addMiscellaneousAllele(final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
final Allele miscellanoeusAllele = GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE;
for (Map.Entry<String, PerReadAlleleLikelihoodMap> perSample : stratifiedReadMap.entrySet()) {
for (Map.Entry<GATKSAMRecord, Map<Allele, Double>> perRead : perSample.getValue().getLikelihoodReadMap().entrySet()) {
double bestLikelihood = Double.NEGATIVE_INFINITY;
double secondBestLikelihood = Double.NEGATIVE_INFINITY;
for (Map.Entry<Allele,Double> perAllele : perRead.getValue().entrySet()) {
final double value = perAllele.getValue();
if (value > bestLikelihood) {
secondBestLikelihood = bestLikelihood;
bestLikelihood = value;
} else if (value < bestLikelihood && value > secondBestLikelihood) {
secondBestLikelihood = value;
}
}
final double miscellanousLikelihood = Double.isInfinite(secondBestLikelihood) ? bestLikelihood : secondBestLikelihood;
perSample.getValue().add(perRead.getKey(),miscellanoeusAllele,miscellanousLikelihood);
}
}
return stratifiedReadMap;
}
/** /**
* Go through the haplotypes we assembled, and decompose them into their constituent variant contexts * Go through the haplotypes we assembled, and decompose them into their constituent variant contexts
* *

View File

@ -564,6 +564,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
public void initialize() { public void initialize() {
super.initialize(); super.initialize();
if (dontGenotype && emitReferenceConfidence == ReferenceConfidenceMode.GVCF)
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output");
}
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY ) if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
throw new UserException.BadArgumentValue("pnrm", "HaplotypeCaller doesn't currently support " + SCAC.AFmodel); throw new UserException.BadArgumentValue("pnrm", "HaplotypeCaller doesn't currently support " + SCAC.AFmodel);
@ -615,24 +624,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// where the filters are used. For example, in emitting all sites the lowQual field is used // where the filters are used. For example, in emitting all sites the lowQual field is used
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality")); headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel); initializeReferenceConfidenceModel(samples, headerInfo);
if ( emitReferenceConfidence() ) {
if ( samples.size() != 1 ) throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
// a kluge to enforce the use of this indexing strategy
if (getToolkit().getArguments().variant_index_type != OPTIMAL_GVCF_INDEX_TYPE ||
getToolkit().getArguments().variant_index_parameter != OPTIMAL_GVCF_INDEX_PARAMETER) {
throw new UserException.GVCFIndexException(OPTIMAL_GVCF_INDEX_TYPE, OPTIMAL_GVCF_INDEX_PARAMETER);
}
try {
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands);
} catch ( IllegalArgumentException e ) {
throw new UserException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
}
}
}
vcfWriter.writeHeader(new VCFHeader(headerInfo, samples)); vcfWriter.writeHeader(new VCFHeader(headerInfo, samples));
@ -688,6 +680,28 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
getToolkit().getGenomeLocParser()); getToolkit().getGenomeLocParser());
} }
private void initializeReferenceConfidenceModel(final Set<String> samples, final Set<VCFHeaderLine> headerInfo) {
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
if ( emitReferenceConfidence() ) {
if ( samples.size() != 1 ) throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
// a kluge to enforce the use of this indexing strategy
if (getToolkit().getArguments().variant_index_type != OPTIMAL_GVCF_INDEX_TYPE ||
getToolkit().getArguments().variant_index_parameter != OPTIMAL_GVCF_INDEX_PARAMETER) {
throw new UserException.GVCFIndexException(OPTIMAL_GVCF_INDEX_TYPE, OPTIMAL_GVCF_INDEX_PARAMETER);
}
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = 0.0;
try {
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands);
} catch ( IllegalArgumentException e ) {
throw new UserException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
}
}
}
}
/** /**
* Instantiates the appropriate likelihood calculation engine. * Instantiates the appropriate likelihood calculation engine.
* *
@ -833,10 +847,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap =
likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,reads); likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,reads);
// Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there // Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there
// was a bad interaction between that selection and the marginalization that happens over each event when computing // was a bad interaction between that selection and the marginalization that happens over each event when computing
// GLs. In particular, for samples that are heterozygous non-reference (B/C) the marginalization for B treats the // GLs. In particular, for samples that are heterozygous non-reference (B/C) the marginalization for B treats the
@ -852,7 +862,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
regionForGenotyping.getLocation(), regionForGenotyping.getLocation(),
getToolkit().getGenomeLocParser(), getToolkit().getGenomeLocParser(),
metaDataTracker, metaDataTracker,
activeAllelesToGenotype ); activeAllelesToGenotype, emitReferenceConfidence() );
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted // TODO -- must disable if we are doing NCT, or set the output type of ! presorted
if ( bamWriter != null ) { if ( bamWriter != null ) {
@ -866,8 +876,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if( DEBUG ) { logger.info("----------------------------------------------------------------------------------"); } if( DEBUG ) { logger.info("----------------------------------------------------------------------------------"); }
if ( emitReferenceConfidence() ) { if ( emitReferenceConfidence() ) {
if ( calledHaplotypes.getCalls().isEmpty() ) { if ( !containsCalls(calledHaplotypes) ) {
// no called all of the potential haplotypes // no called all of the potential haplotypes
return referenceModelForNoVariation(originalActiveRegion, false); return referenceModelForNoVariation(originalActiveRegion, false);
} else } else
@ -879,6 +890,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
} }
} }
private boolean containsCalls(final GenotypingEngine.CalledHaplotypes calledHaplotypes) {
final List<VariantContext> calls = calledHaplotypes.getCalls();
if (calls.isEmpty()) return false;
for (final VariantContext call : calls)
for (final Genotype genotype : call.getGenotypes())
if (genotype.isCalled())
return true;
return false;
}
/** /**
* High-level function that runs the assembler on the active region reads, * High-level function that runs the assembler on the active region reads,
* returning a data structure with the resulting information needed * returning a data structure with the resulting information needed

View File

@ -48,7 +48,10 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import net.sf.samtools.*; import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.haplotype.Haplotype;
@ -63,9 +66,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFFormatHeaderLine;
import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFSimpleHeaderLine; import org.broadinstitute.variant.vcf.VCFSimpleHeaderLine;
import java.io.File; import java.io.File;

View File

@ -46,8 +46,6 @@
package org.broadinstitute.sting.utils.gvcf; package org.broadinstitute.sting.utils.gvcf;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.ReferenceConfidenceModel;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypeBuilder; import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
@ -56,7 +54,10 @@ import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import java.util.*; import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
/** /**
* Genome-wide VCF writer * Genome-wide VCF writer
@ -287,7 +288,7 @@ public class GVCFWriter implements VariantContextWriter {
} }
final Genotype g = vc.getGenotype(0); final Genotype g = vc.getGenotype(0);
if ( g.isHomRef() && vc.hasAlternateAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) ) { if ( g.isHomRef() && vc.hasAlternateAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) && vc.isBiallelic() ) {
// create bands // create bands
final VariantContext maybeCompletedBand = addHomRefSite(vc, g); final VariantContext maybeCompletedBand = addHomRefSite(vc, g);
if ( maybeCompletedBand != null ) underlyingWriter.add(maybeCompletedBand); if ( maybeCompletedBand != null ) underlyingWriter.add(maybeCompletedBand);

View File

@ -118,7 +118,6 @@ final class HomRefBlock {
if ( g == null ) throw new IllegalArgumentException("g cannot be null"); if ( g == null ) throw new IllegalArgumentException("g cannot be null");
if ( ! g.hasGQ() ) throw new IllegalArgumentException("g must have GQ field"); if ( ! g.hasGQ() ) throw new IllegalArgumentException("g must have GQ field");
if ( ! g.hasPL() ) throw new IllegalArgumentException("g must have PL field"); if ( ! g.hasPL() ) throw new IllegalArgumentException("g must have PL field");
if ( ! g.hasDP() ) throw new IllegalArgumentException("g must have DP field");
if ( pos != stop + 1 ) throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous stop " + stop); if ( pos != stop + 1 ) throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous stop " + stop);
if( minPLs == null ) { // if the minPLs vector has not been set yet, create it here by copying the provided genotype's PLs if( minPLs == null ) { // if the minPLs vector has not been set yet, create it here by copying the provided genotype's PLs
@ -136,7 +135,7 @@ final class HomRefBlock {
} }
stop = pos; stop = pos;
GQs.add(Math.min(g.getGQ(), 99)); // cap the GQs by the max. of 99 emission GQs.add(Math.min(g.getGQ(), 99)); // cap the GQs by the max. of 99 emission
DPs.add(g.getDP()); DPs.add(Math.max(g.getDP(),0));
} }
/** /**

View File

@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultipleSNPAlleles() { public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("e7cb959912ea964bf9c897904aa5220b")); Arrays.asList("f5a62ecb8d32f6161b2ac7682c9f711d"));
executeTest("test Multiple SNP alleles", spec); executeTest("test Multiple SNP alleles", spec);
} }
@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMismatchedPLs() { public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("f29b3fa9d5642297cfc4b10aa2137c68")); Arrays.asList("8897652c7516a91d22bc678f2189131e"));
executeTest("test mismatched PLs", spec); executeTest("test mismatched PLs", spec);
} }
} }

View File

@ -106,14 +106,22 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
} }
@Test(enabled = true, dataProvider = "TestCombineGLs") @Test(enabled = true, dataProvider = "TestCombineGLs")
private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) { public void testCombineGLsPrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4); final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final Genotype combined = calc.combineGLs(testg, altIndex, nAlts); final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts);
Assert.assertEquals(combined.getPL(), expected.getPL(), Assert.assertEquals(combined.getPL(), expected.getPL(),
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL())); "Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
} }
@Test(enabled = true, dataProvider = "TestCombineGLs")
public void testCombinePrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts);
Assert.assertEquals(combined.getPL(), expected.getPL(),
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
}
static Allele A = Allele.create("A", true); static Allele A = Allele.create("A", true);
static Allele C = Allele.create("C"); static Allele C = Allele.create("C");

View File

@ -57,9 +57,13 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.haplotype.Haplotype;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment; import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.*;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
@ -279,6 +283,60 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(compareVCMaps(calculatedMap, expectedMap)); Assert.assertTrue(compareVCMaps(calculatedMap, expectedMap));
} }
@Test(dataProvider="AddMiscellaneousDataProvider", enabled=false)
public void testAddMiscellaneousAllele(final String readBases, final int readOffset,
final String ref, final int refOffset,
final String referenceAllele, final String[] alternatives, final double[] likelihoods, final double[] expected) {
final byte baseQual = (byte)30;
final byte[] baseQuals = Utils.dupBytes(baseQual, readBases.length());
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(readBases.getBytes(), baseQuals, readBases.length() + "M");
final GenomeLoc loc = new UnvalidatingGenomeLoc("20",0,refOffset,refOffset);
final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc,Collections.singletonList(read),readOffset);
final VariantContextBuilder vcb = new VariantContextBuilder();
final GenotypeBuilder gb = new GenotypeBuilder();
final List<String> alleleStrings = new ArrayList<>( 1 + alternatives.length);
alleleStrings.add(referenceAllele);
alleleStrings.addAll(Arrays.asList(alternatives));
gb.AD(new int[] { 1 });
gb.DP(1);
gb.PL(likelihoods);
vcb.alleles(alleleStrings);
vcb.loc("20",refOffset,refOffset + referenceAllele.length() -1);
vcb.genotypes(gb.make());
final VariantContext vc = vcb.make();
final VariantContext updatedVc = null; // GenotypingEngine.addMiscellaneousAllele(vc,pileup,ref.getBytes(),0);
final GenotypeLikelihoods updatedLikelihoods = updatedVc.getGenotype(0).getLikelihoods();
Assert.assertEquals(updatedLikelihoods.getAsVector().length, expected.length);
final double[] updatedLikelihoodsArray = updatedVc.getGenotype(0).getLikelihoods().getAsVector();
for (int i = 0; i < updatedLikelihoodsArray.length; i++) {
Assert.assertEquals(updatedLikelihoodsArray[i],expected[i],0.0001);
}
Allele altAllele = null;
for (final Allele allele : updatedVc.getAlleles())
if (allele.isSymbolic() && allele.getBaseString().equals(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE_NAME))
altAllele = allele;
Assert.assertNotNull(altAllele);
}
@DataProvider(name="AddMiscellaneousDataProvider")
public Iterator<Object[]> addMiscellaneousAlleleDataProvider() {
return Arrays.asList(ADD_MISCELLANEOUS_ALLELE_DATA).iterator();
}
private static final double MATCH_LnLK = QualityUtils.qualToProbLog10((byte)30);
private static final double MISS_LnLK = QualityUtils.qualToErrorProbLog10((byte)30);
private static final Object[][] ADD_MISCELLANEOUS_ALLELE_DATA = new Object[][] {
new Object[] {"ACTG", 0,"ACTGTGAGTATTCC",0,"A",new String[]{}, new double[] {MATCH_LnLK * MATCH_LnLK}, 6,
new double[] {MATCH_LnLK * MATCH_LnLK,MATCH_LnLK * MISS_LnLK, MISS_LnLK * MISS_LnLK}}
};
/** /**
* Private function to compare Map of VCs, it only checks the types and start locations of the VariantContext * Private function to compare Map of VCs, it only checks the types and start locations of the VariantContext
*/ */

View File

@ -66,11 +66,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data // this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "4bb8e44b2d04757f8b11ca6400828357"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "1dfe1fc8079938adf1565450671094d4"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "8ddc291584f56e27d125b6a0523f2703"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d98245380500a6decfc26dcaadb2c4d2"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "e53e164cc3f5cbd5fba083f2cdb98a88"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "c4c28e74eda133f99e0864ad16c965c4"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "a258dbbfabe88dad11d57151cd256831"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "e4d36f165b2ddbb923d3c9a402e96f1b"});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }

View File

@ -48,10 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.haplotype.Haplotype;
@ -61,10 +58,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.variant.variantcontext.GenotypeType;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeMethod;

View File

@ -492,13 +492,62 @@ public class MathUtils {
return result; return result;
} }
private static final double LOG1MEXP_THRESHOLD = Math.log(0.5);
private static final double LN_10 = Math.log(10);
/**
* Calculates {@code log(1-exp(a))} without loosing precision.
*
* <p>
* This is based on the approach described in:
*
* </p>
* <p>
* Maechler M, Accurately Computing log(1-exp(-|a|)) Assessed by the Rmpfr package, 2012 <br/>
* <a ref="http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf">Online document</a>.
*
* </p>
*
* @param a the input exponent.
* @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value.
*/
public static double log1mexp(final double a) {
if (a > 0) return Double.NaN;
if (a == 0) return Double.NEGATIVE_INFINITY;
return (a < LOG1MEXP_THRESHOLD) ? Math.log1p(-Math.exp(a)) : Math.log(-Math.expm1(a));
}
/**
* Calculates {@code log10(1-10^a)} without loosing precision.
*
* <p>
* This is based on the approach described in:
*
* </p>
* <p>
* Maechler M, Accurately Computing log(1-exp(-|a|)) Assessed by the Rmpfr package, 2012 <br/>
* <a ref="http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf">Online document</a>.
* </p>
*
* @param a the input exponent.
* @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value.
*/
public static double log10OneMinusPow10(final double a) {
if (a > 0) return Double.NaN;
if (a == 0) return Double.NEGATIVE_INFINITY;
final double b = a * LN_10;
return log1mexp(b) / LN_10;
}
/** /**
* Calculates the log10 of the multinomial coefficient. Designed to prevent * Calculates the log10 of the multinomial coefficient. Designed to prevent
* overflows even with very large numbers. * overflows even with very large numbers.
* *
* @param n total number of trials * @param n total number of trials
* @param k array of any size with the number of successes for each grouping (k1, k2, k3, ..., km) * @param k array of any size with the number of successes for each grouping (k1, k2, k3, ..., km)
* @return * @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value.
*/ */
public static double log10MultinomialCoefficient(final int n, final int[] k) { public static double log10MultinomialCoefficient(final int n, final int[] k) {
if ( n < 0 ) if ( n < 0 )

View File

@ -2115,4 +2115,79 @@ public class GATKVariantContextUtils {
if ( ! vc1.getAlleles().equals(vc2.getAlleles()) ) return false; if ( ! vc1.getAlleles().equals(vc2.getAlleles()) ) return false;
return true; return true;
} }
/**
* Returns the absolute 0-based index of an allele.
*
* <p/>
* If the allele is equal to the reference, the result is 0, if it equal to the first alternative the result is 1
* and so forth.
* <p/>
* Therefore if you want the 0-based index within the alternative alleles you need to do the following:
*
* <p/>
* You can indicate whether the Java object reference comparator {@code ==} can be safelly used by setting {@code useEquals} to {@code false}.
*
* @param vc the target variant context.
* @param allele the target allele.
* @param ignoreRefState whether the reference states of the allele is important at all. Has no effect if {@code useEquals} is {@code false}.
* @param considerRefAllele whether the reference allele should be considered. You should set it to {@code false} if you are only interested in alternative alleles.
* @param useEquals whether equal method should be used in the search: {@link Allele#equals(Allele,boolean)}.
*
* @throws IllegalArgumentException if {@code allele} is {@code null}.
* @return {@code -1} if there is no such allele that satify those criteria, a value between 0 and {@link VariantContext#getNAlleles()} {@code -1} otherwise.
*/
public static int indexOfAllele(final VariantContext vc, final Allele allele, final boolean ignoreRefState, final boolean considerRefAllele, final boolean useEquals) {
if (allele == null) throw new IllegalArgumentException();
return useEquals ? indexOfEqualAllele(vc,allele,ignoreRefState,considerRefAllele) : indexOfSameAllele(vc,allele,considerRefAllele);
}
/**
* Returns the relative 0-based index of an alternative allele.
* <p/>
* The the query allele is the same as the first alternative allele, the result is 0,
* if it is equal to the second 1 and so forth.
*
*
* <p/>
* Notice that the ref-status of the query {@code allele} is ignored.
*
* @param vc the target variant context.
* @param allele the query allele.
* @param useEquals whether equal method should be used in the search: {@link Allele#equals(Allele,boolean)}.
*
* @throws IllegalArgumentException if {@code allele} is {@code null}.
*
* @return {@code -1} if there is no such allele that satify those criteria, a value between 0 and the number
* of alternative alleles - 1.
*/
public static int indexOfAltAllele(final VariantContext vc, final Allele allele, final boolean useEquals) {
final int absoluteIndex = indexOfAllele(vc,allele,true,false,useEquals);
return absoluteIndex == -1 ? -1 : absoluteIndex - 1;
}
// Impements index search using equals.
private static int indexOfEqualAllele(final VariantContext vc, final Allele allele, final boolean ignoreRefState,
final boolean considerRefAllele) {
int i = 0;
for (final Allele a : vc.getAlleles())
if (a.equals(allele,ignoreRefState))
return i == 0 ? (considerRefAllele ? 0 : -1) : i;
else
i++;
return -1;
}
// Implements index search using ==.
private static int indexOfSameAllele(final VariantContext vc, final Allele allele, final boolean considerRefAllele) {
int i = 0;
for (final Allele a : vc.getAlleles())
if (a == allele)
return i == 0 ? (considerRefAllele ? 0 : -1) : i;
else
i++;
return -1;
}
} }

View File

@ -39,18 +39,19 @@ import java.util.*;
* Basic unit test for MathUtils * Basic unit test for MathUtils
*/ */
public class MathUtilsUnitTest extends BaseTest { public class MathUtilsUnitTest extends BaseTest {
@BeforeClass @BeforeClass
public void init() { public void init() {
} }
/** /**
* Tests that we get unqiue values for the valid (non-null-producing) input space for {@link MathUtils#fastGenerateUniqueHashFromThreeIntegers(int, int, int)}. * Tests that we get unique values for the valid (non-null-producing) input space for {@link MathUtils#fastGenerateUniqueHashFromThreeIntegers(int, int, int)}.
*/ */
@Test @Test
public void testGenerateUniqueHashFromThreePositiveIntegers() { public void testGenerateUniqueHashFromThreePositiveIntegers() {
logger.warn("Executing testGenerateUniqueHashFromThreePositiveIntegers"); logger.warn("Executing testGenerateUniqueHashFromThreePositiveIntegers");
final Set<Long> observedLongs = new HashSet<Long>(); final Set<Long> observedLongs = new HashSet<>();
for (short i = 0; i < Byte.MAX_VALUE; i++) { for (short i = 0; i < Byte.MAX_VALUE; i++) {
for (short j = 0; j < Byte.MAX_VALUE; j++) { for (short j = 0; j < Byte.MAX_VALUE; j++) {
for (short k = 0; k < Byte.MAX_VALUE; k++) { for (short k = 0; k < Byte.MAX_VALUE; k++) {
@ -72,6 +73,80 @@ public class MathUtilsUnitTest extends BaseTest {
} }
} }
@Test(dataProvider = "log10OneMinusPow10Data")
public void testLog10OneMinusPow10(final double x, final double expected) {
final double actual = MathUtils.log10OneMinusPow10(x);
if (Double.isNaN(expected))
Assert.assertTrue(Double.isNaN(actual));
else
Assert.assertEquals(actual,expected,1E-9);
}
@Test(dataProvider = "log1mexpData")
public void testLog1mexp(final double x, final double expected) {
final double actual = MathUtils.log1mexp(x);
if (Double.isNaN(expected))
Assert.assertTrue(Double.isNaN(actual));
else
Assert.assertEquals(actual,expected,1E-9);
}
@DataProvider(name = "log10OneMinusPow10Data")
public Iterator<Object[]> log10OneMinusPow10Data() {
final double[] inValues = new double[] { Double.NaN, 10, 1, 0, -1, -3, -10, -30, -100, -300, -1000, -3000 };
return new Iterator<Object[]>() {
private int i = 0;
@Override
public boolean hasNext() {
return i < inValues.length;
}
@Override
public Object[] next() {
final double input = inValues[i++];
final double output = Math.log10( 1 - Math.pow(10,input));
return new Object[] { input, output };
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@DataProvider(name = "log1mexpData")
public Iterator<Object[]> log1mexpData() {
final double[] inValues = new double[] { Double.NaN, 10, 1, 0, -1, -3, -10, -30, -100, -300, -1000, -3000 };
return new Iterator<Object[]>() {
private int i = 0;
@Override
public boolean hasNext() {
return i < inValues.length;
}
@Override
public Object[] next() {
final double input = inValues[i++];
final double output = Math.log( 1 - Math.exp(input));
return new Object[] { input, output };
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
/** /**
* Tests that we get the right values from the binomial distribution * Tests that we get the right values from the binomial distribution
*/ */
@ -144,7 +219,7 @@ public class MathUtilsUnitTest extends BaseTest {
Assert.assertTrue(MathUtils.sampleIndicesWithReplacement(5, 1000).size() == 1000); Assert.assertTrue(MathUtils.sampleIndicesWithReplacement(5, 1000).size() == 1000);
// Check that the list contains only the k element range that as asked for - no more, no less // Check that the list contains only the k element range that as asked for - no more, no less
List<Integer> Five = new ArrayList<Integer>(); List<Integer> Five = new ArrayList<>();
Collections.addAll(Five, 0, 1, 2, 3, 4); Collections.addAll(Five, 0, 1, 2, 3, 4);
List<Integer> BigFive = MathUtils.sampleIndicesWithReplacement(5, 10000); List<Integer> BigFive = MathUtils.sampleIndicesWithReplacement(5, 10000);
Assert.assertTrue(BigFive.containsAll(Five)); Assert.assertTrue(BigFive.containsAll(Five));
@ -160,9 +235,9 @@ public class MathUtilsUnitTest extends BaseTest {
// Check that the list contains only the k element range that as asked for - no more, no less but now // Check that the list contains only the k element range that as asked for - no more, no less but now
// use the index list to pull elements from another list using sliceListByIndices // use the index list to pull elements from another list using sliceListByIndices
List<Integer> Five = new ArrayList<Integer>(); List<Integer> Five = new ArrayList<>();
Collections.addAll(Five, 0, 1, 2, 3, 4); Collections.addAll(Five, 0, 1, 2, 3, 4);
List<Character> FiveAlpha = new ArrayList<Character>(); List<Character> FiveAlpha = new ArrayList<>();
Collections.addAll(FiveAlpha, 'a', 'b', 'c', 'd', 'e'); Collections.addAll(FiveAlpha, 'a', 'b', 'c', 'd', 'e');
List<Integer> BigFive = MathUtils.sampleIndicesWithReplacement(5, 10000); List<Integer> BigFive = MathUtils.sampleIndicesWithReplacement(5, 10000);
List<Character> BigFiveAlpha = MathUtils.sliceListByIndices(BigFive, FiveAlpha); List<Character> BigFiveAlpha = MathUtils.sliceListByIndices(BigFive, FiveAlpha);
@ -180,8 +255,8 @@ public class MathUtilsUnitTest extends BaseTest {
int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24}; int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24};
MathUtils.RunningAverage r = new MathUtils.RunningAverage(); MathUtils.RunningAverage r = new MathUtils.RunningAverage();
for (int i = 0; i < numbers.length; i++) for (final double b : numbers)
r.add((double) numbers[i]); r.add(b);
Assert.assertEquals((long) numbers.length, r.observationCount()); Assert.assertEquals((long) numbers.length, r.observationCount());
Assert.assertTrue(r.mean() - 3224.625 < 2e-10); Assert.assertTrue(r.mean() - 3224.625 < 2e-10);
@ -253,24 +328,6 @@ public class MathUtilsUnitTest extends BaseTest {
} }
} }
/**
* Private functions used by testArrayShuffle()
*/
private boolean hasUniqueElements(Object[] x) {
for (int i = 0; i < x.length; i++)
for (int j = i + 1; j < x.length; j++)
if (x[i].equals(x[j]) || x[i] == x[j])
return false;
return true;
}
private boolean hasAllElements(final Object[] expected, final Object[] actual) {
HashSet<Object> set = new HashSet<Object>();
set.addAll(Arrays.asList(expected));
set.removeAll(Arrays.asList(actual));
return set.isEmpty();
}
@Test @Test
public void testApproximateLog10SumLog10() { public void testApproximateLog10SumLog10() {
@ -451,7 +508,7 @@ public class MathUtilsUnitTest extends BaseTest {
@DataProvider(name = "ArrayMinData") @DataProvider(name = "ArrayMinData")
public Object[][] makeArrayMinData() { public Object[][] makeArrayMinData() {
List<Object[]> tests = new ArrayList<Object[]>(); List<Object[]> tests = new ArrayList<>();
// this functionality can be adapted to provide input data for whatever you might want in your data // this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{Arrays.asList(10), 10}); tests.add(new Object[]{Arrays.asList(10), 10});
@ -554,9 +611,11 @@ public class MathUtilsUnitTest extends BaseTest {
// generate the partitions of an integer, each partition sorted numerically // generate the partitions of an integer, each partition sorted numerically
int n; int n;
List<Integer> a; List<Integer> a;
int y; int y;
int k; int k;
int state; int state;
int x; int x;
int l; int l;
@ -564,7 +623,7 @@ public class MathUtilsUnitTest extends BaseTest {
this.n = n; this.n = n;
this.y = n - 1; this.y = n - 1;
this.k = 1; this.k = 1;
this.a = new ArrayList<Integer>(); this.a = new ArrayList<>();
for ( int i = 0; i < n; i++ ) { for ( int i = 0; i < n; i++ ) {
this.a.add(i); this.a.add(i);
} }
@ -588,9 +647,9 @@ public class MathUtilsUnitTest extends BaseTest {
} }
if ( this.state == 1 ) { if ( this.state == 1 ) {
while ( 2*x <= y ) { while ( 2 * x <= y ) {
this.a.set(k,x); this.a.set(k,x);
this.y -= x; this.y -= (int) x;
this.k++; this.k++;
} }
this.l = 1+this.k; this.l = 1+this.k;
@ -620,7 +679,7 @@ public class MathUtilsUnitTest extends BaseTest {
} }
public String toString() { public String toString() {
StringBuffer buf = new StringBuffer(); final StringBuilder buf = new StringBuilder();
buf.append("{ "); buf.append("{ ");
while ( hasNext() ) { while ( hasNext() ) {
buf.append("["); buf.append("[");
@ -671,12 +730,7 @@ public class MathUtilsUnitTest extends BaseTest {
} }
private int[] clone(int[] arr) { private int[] clone(int[] arr) {
int[] a = new int[arr.length]; return Arrays.copyOf(arr, arr.length);
for ( int idx = 0; idx < a.length ; idx ++) {
a[idx] = arr[idx];
}
return a;
} }
private int[] nextFromPartitioner() { private int[] nextFromPartitioner() {
@ -834,7 +888,7 @@ public class MathUtilsUnitTest extends BaseTest {
for ( double[] alleles : testAlleles ) { for ( double[] alleles : testAlleles ) {
for ( int count : numAlleleSampled ) { for ( int count : numAlleleSampled ) {
// test that everything sums to one. Generate all multinomial draws // test that everything sums to one. Generate all multinomial draws
List<Double> likelihoods = new ArrayList<Double>(100000); List<Double> likelihoods = new ArrayList<>(100000);
NextCounts generator = new NextCounts(alleles.length,count); NextCounts generator = new NextCounts(alleles.length,count);
double maxLog = Double.MIN_VALUE; double maxLog = Double.MIN_VALUE;
//List<String> countLog = new ArrayList<String>(200); //List<String> countLog = new ArrayList<String>(200);

View File

@ -1627,4 +1627,82 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
Assert.assertEquals(result.getGenotype(expectedGenotype.getSampleName()).toString(), expectedGenotype.toString()); Assert.assertEquals(result.getGenotype(expectedGenotype.getSampleName()).toString(), expectedGenotype.toString());
} }
} }
}
@Test(dataProvider = "indexOfAlleleData")
public void testIndexOfAllele(final Allele reference, final List<Allele> altAlleles, final List<Allele> otherAlleles) {
final List<Allele> alleles = new ArrayList<>(altAlleles.size() + 1);
alleles.add(reference);
alleles.addAll(altAlleles);
final VariantContext vc = makeVC("Source", alleles);
for (int i = 0; i < alleles.size(); i++) {
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,true,true),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,true,true),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,true,false),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,true,false),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i),true),true,true,true),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i),true),true,true,false),-1);
if (i == 0) {
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,false,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,false,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,false,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,false,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i).getBases(),true),false,true,true),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i).getBases(),false),false,true,true),-1);
} else {
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,alleles.get(i),true),i - 1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,alleles.get(i),false), i - 1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,Allele.create(alleles.get(i),true),true),i-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,Allele.create(alleles.get(i),true),false),-1);
}
}
for (final Allele other : otherAlleles) {
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,true,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,true,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,true,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,true,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,false,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,false,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,false,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,false,false),-1);
}
}
@DataProvider(name = "indexOfAlleleData")
public Iterator<Object[]> indexOfAlleleData() {
final Allele[] ALTERNATIVE_ALLELES = new Allele[] { T, C, G, ATC, ATCATC};
final int lastMask = 0x1F;
return new Iterator<Object[]>() {
int nextMask = 0;
@Override
public boolean hasNext() {
return nextMask <= lastMask;
}
@Override
public Object[] next() {
int mask = nextMask++;
final List<Allele> includedAlleles = new ArrayList<>(5);
final List<Allele> excludedAlleles = new ArrayList<>(5);
for (int i = 0; i < ALTERNATIVE_ALLELES.length; i++) {
((mask & 1) == 1 ? includedAlleles : excludedAlleles).add(ALTERNATIVE_ALLELES[i]);
mask >>= 1;
}
return new Object[] { Aref , includedAlleles, excludedAlleles};
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
}