gVCF <NON_REF> in all vcf lines including variant ones when –ERC gVCF is requested.

Changes:
-------

  <NON_REF> likelihood in variant sites is calculated as the maximum possible likelihood for an unseen alternative allele: for reach read is calculated as the second best likelihood amongst the reported alleles.

  When –ERC gVCF, stand_conf_emit and stand_conf_call are forcefully set to 0. Also dontGenotype is set to false for consistency sake.

  Integration test MD5 have been changed accordingly.

Additional fix:
--------------

  Specially after adding the <NON_REF> allele, but also happened without that, QUAL values tend to go to 0 (very large integer number in log 10) due to underflow when combining GLs (GenotypingEngine.combineGLs). To fix that combineGLs has been substituted by combineGLsPrecise that uses the log-sum-exp trick.

  In just a few cases this change results in genotype changes in integration tests but after double-checking using unit-test and difference between combineGLs and combineGLsPrecise in the affected integration test, the previous GT calls were either border-line cases and or due to the underflow.
This commit is contained in:
Valentin Ruano-Rubio 2013-12-19 16:37:47 -05:00 committed by Eric Banks
parent 383a4f4a70
commit 89c4e57478
19 changed files with 762 additions and 246 deletions

View File

@ -422,14 +422,14 @@ public class UnifiedGenotyperEngine {
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
}
AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
final AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
// is the most likely frequency conformation AC=0 for all alternate alleles?
boolean bestGuessIsRef = true;
// determine which alternate alleles have AF>0
final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
final List<Integer> alleleCountsofMLE = new ArrayList<Integer>(vc.getAlleles().size());
final List<Allele> myAlleles = new ArrayList<>(vc.getAlleles().size());
final List<Integer> alleleCountsofMLE = new ArrayList<>(vc.getAlleles().size());
myAlleles.add(vc.getReference());
for ( int i = 0; i < AFresult.getAllelesUsedInGenotyping().size(); i++ ) {
final Allele alternateAllele = AFresult.getAllelesUsedInGenotyping().get(i);
@ -439,16 +439,13 @@ public class UnifiedGenotyperEngine {
// Compute if the site is considered polymorphic with sufficient confidence relative to our
// phred-scaled emission QUAL
final boolean isNonRef = AFresult.isPolymorphicPhredScaledQual(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
final boolean toInclude = isNonRef || alternateAllele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE ||
UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ||
UAC.annotateAllSitesWithPLs;
// if the most likely AC is not 0, then this is a good alternate allele to use
if ( isNonRef ) {
myAlleles.add(alternateAllele);
alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
bestGuessIsRef = false;
}
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ||
UAC.annotateAllSitesWithPLs) {
bestGuessIsRef &= !isNonRef;
if (toInclude) {
myAlleles.add(alternateAllele);
alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
}
@ -513,29 +510,29 @@ public class UnifiedGenotyperEngine {
// the overall lod
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
final double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
List<Allele> allAllelesToUse = builder.make().getAlleles();
final List<Allele> allAllelesToUse = builder.make().getAlleles();
// the forward lod
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
final VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
final AFCalcResult forwardAFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
final double forwardLog10PofNull = forwardAFresult.getLog10LikelihoodOfAFEq0();
final double forwardLog10PofF = forwardAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
final VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
final AFCalcResult reverseAFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
final double reverseLog10PofNull = reverseAFresult.getLog10LikelihoodOfAFEq0();
final double reverseLog10PofF = reverseAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
final double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
final double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
//if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
// strand score is max bias between forward and reverse strands

View File

@ -48,7 +48,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.*;
@ -67,10 +70,10 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
final int numChr = 2*numSamples;
// queue of AC conformations to process
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
final LinkedList<ExactACset> ACqueue = new LinkedList<>();
// mapping of ExactACset indexes to the objects
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<>(numChr+1);
// add AC=0 to the queue
final int[] zeroCounts = new int[numAlternateAlleles];
@ -84,7 +87,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
// compute log10Likelihoods
final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors);
calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors);
// clean up memory
indexesToACset.remove(set.getACcounts());
@ -95,58 +98,28 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
}
@Override
protected VariantContext reduceScope(final VariantContext vc) {
// don't try to genotype too many alternate alleles
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles() ) {
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
VariantContextBuilder builder = new VariantContextBuilder(vc);
List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1);
alleles.add(vc.getReference());
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
builder.alleles(alleles);
builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL));
return builder.make();
} else {
return vc;
}
@Override
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
}
private static final int PL_INDEX_OF_HOM_REF = 0;
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ )
likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
// based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
@Override
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), true);
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) {
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL);
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL);
final int alleleLikelihoodIndex1 = alleles.alleleIndex1 - 1;
final int alleleLikelihoodIndex2 = alleles.alleleIndex2 - 1;
if ( alleles.alleleIndex1 != 0 )
likelihoodSums[alleles.alleleIndex1-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
likelihoodSums[alleleLikelihoodIndex1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
// don't double-count it
if ( alleles.alleleIndex2 != 0 && alleles.alleleIndex2 != alleles.alleleIndex1 )
likelihoodSums[alleles.alleleIndex2-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
likelihoodSums[alleleLikelihoodIndex2].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
}
}
// sort them by probability mass and choose the best ones
Collections.sort(Arrays.asList(likelihoodSums));
final ArrayList<Allele> bestAlleles = new ArrayList<Allele>(numAllelesToChoose);
for ( int i = 0; i < numAllelesToChoose; i++ )
bestAlleles.add(likelihoodSums[i].allele);
final ArrayList<Allele> orderedBestAlleles = new ArrayList<Allele>(numAllelesToChoose);
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( bestAlleles.contains(allele) )
orderedBestAlleles.add(allele);
}
return orderedBestAlleles;
}
private static final class DependentSet {
@ -199,8 +172,8 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
if ( ACwiggle > 1 ) {
final ArrayList<DependentSet> differentAlleles = new ArrayList<DependentSet>(numAltAlleles * numAltAlleles);
final ArrayList<DependentSet> sameAlleles = new ArrayList<DependentSet>(numAltAlleles);
final ArrayList<DependentSet> differentAlleles = new ArrayList<>(numAltAlleles * numAltAlleles);
final ArrayList<DependentSet> sameAlleles = new ArrayList<>(numAltAlleles);
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {

View File

@ -48,17 +48,50 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.*;
import java.util.ArrayList;
import java.util.*;
/**
* Uses the Exact calculation of Heng Li
*/
abstract class ExactAFCalc extends AFCalc {
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
/**
* Sorts {@link org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
@Override
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
return - Double.compare(o1.sum,o2.sum);
}
};
/**
* Sorts {@link org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first but make sure that
* NON_REF alleles are place are last.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
@Override
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
if (o1.allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)
return 1;
else if (o2.allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)
return -1;
else
return o1.compareTo(o2);
}
};
/**
* Sorts {@link org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with lower alternative allele index are first regardless of
* the likelihood sum.
*/
protected static final Comparator<LikelihoodSum> LIKELIHOOD_INDEX_COMPARATOR = new Comparator<LikelihoodSum>() {
@Override
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
return Integer.compare(o1.index, o2.index);
}
};
protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) {
super(nSamples, maxAltAlleles, ploidy);
@ -69,9 +102,10 @@ abstract class ExactAFCalc extends AFCalc {
*/
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
public double sum = 0.0;
public Allele allele;
public final Allele allele;
public final int index;
public LikelihoodSum(Allele allele) { this.allele = allele; }
public LikelihoodSum(final Allele allele, final int index) { this.allele = allele; this.index = index; }
public int compareTo(LikelihoodSum other) {
final double diff = sum - other.sum;
@ -85,12 +119,12 @@ abstract class ExactAFCalc extends AFCalc {
* @return ArrayList of doubles corresponding to GL vectors
*/
protected static ArrayList<double[]> getGLs(final GenotypesContext GLs, final boolean includeDummy) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size() + 1);
final ArrayList<double[]> genotypeLikelihoods = new ArrayList<>(GLs.size() + 1);
if ( includeDummy ) genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector();
final double[] gls = sample.getLikelihoods().getAsVector();
if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
genotypeLikelihoods.add(gls);
@ -99,4 +133,108 @@ abstract class ExactAFCalc extends AFCalc {
return genotypeLikelihoods;
}
@Override
protected VariantContext reduceScope(final VariantContext vc) {
// don't try to genotype too many alternate alleles
final List<Allele> inputAltAlleles = vc.getAlternateAlleles();
final List<Allele> outputAltAlleles = reduceScopeAlleles(vc,getMaxAltAlleles());
// only if output allele has reduced from the input alt allele set size we should care.
final int altAlleleReduction = inputAltAlleles.size() - outputAltAlleles.size();
if (altAlleleReduction == 0)
return vc;
else if (altAlleleReduction != 0) {
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles()
+ " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart()
+ " has " + (vc.getAlternateAlleles().size())
+ " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
final List<Allele> alleles = new ArrayList<>(getMaxAltAlleles() + 1);
alleles.add(vc.getReference());
alleles.addAll(reduceScopeAlleles(vc, getMaxAltAlleles()));
final VariantContextBuilder builder = new VariantContextBuilder(vc);
builder.alleles(alleles);
builder.genotypes(reduceScopeGenotypes(vc, alleles));
if (altAlleleReduction < 0)
throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc + " " + builder.make());
return builder.make();
} else // if (altAlleleReduction < 0)
throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc);
}
/**
* Returns a the new set of alleles to use.
* @param vc target variant context.
* @param numAllelesToChoose number of alleles to keep.
* @return the list of alternative allele to keep.
*/
protected List<Allele> reduceScopeAlleles(final VariantContext vc, final int numAllelesToChoose) {
// Look for the <NON_REF> allele to exclude it from the pruning if present.
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final int nonRefAltAlleleIndex = GATKVariantContextUtils.indexOfAltAllele(vc,
GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE, false);
final boolean nonRefAltAllelePresent = nonRefAltAlleleIndex >= 0;
// <NON_REF> should not be considered in the downsizing, so we need to count it out when
// considering if alt. allele downsizing is required.
final int numProperOriginalAltAlleles = numOriginalAltAlleles - (nonRefAltAllelePresent ? 1 : 0);
// Avoid pointless allele reduction:
if (numAllelesToChoose >= numProperOriginalAltAlleles)
return vc.getAlternateAlleles();
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
final Allele allele = vc.getAlternateAllele(i);
likelihoodSums[i] = new LikelihoodSum(allele,i);
}
// Calculate the allele likelihood sums.
reduceScopeCalculateLikelihoodSums(vc, likelihoodSums);
// sort them by probability mass and choose the best ones
// Make sure that the <NON_REF> allele is last if present.
Collections.sort(Arrays.asList(likelihoodSums), nonRefAltAllelePresent ? LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR : LIKELIHOOD_SUM_COMPARATOR);
// We need to return the best likelihood alleles in the original alternative allele index order.
// This heap will keep track of that index order.
final PriorityQueue<LikelihoodSum> mostLikelyAllelesHeapByIndex = new PriorityQueue<>(numOriginalAltAlleles, LIKELIHOOD_INDEX_COMPARATOR);
for ( int i = 0; i < numAllelesToChoose; i++ )
mostLikelyAllelesHeapByIndex.add(likelihoodSums[i]);
// guaranteed no to have been added at this point thanks for checking on whether reduction was
// needed in the first place.
if (nonRefAltAllelePresent)
mostLikelyAllelesHeapByIndex.add(likelihoodSums[nonRefAltAlleleIndex]);
final ArrayList<Allele> orderedBestAlleles = new ArrayList<>(numAllelesToChoose);
while (!mostLikelyAllelesHeapByIndex.isEmpty())
orderedBestAlleles.add(mostLikelyAllelesHeapByIndex.remove().allele);
return orderedBestAlleles;
}
protected static final int PL_INDEX_OF_HOM_REF = 0;
/**
* Update the likelihood sums with using the variant context genotype likelihoods.
* @param vc source variant context.
* @param likelihoodSums where to update the likelihood sums.
*/
protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums);
/**
* Transforms the genotypes of the variant context according to the new subset of possible alleles.
*
* @param vc original variant-context.
* @param allelesToUse possible alleles.
* @return never {@code null}, the new set of genotype calls for the reduced scope.
*/
protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse);
}

View File

@ -59,7 +59,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
private final int ploidy;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final static boolean VERBOSE = false;
protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
@ -68,22 +68,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
}
@Override
protected VariantContext reduceScope(VariantContext vc) {
// don't try to genotype too many alternate alleles
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles()) {
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
final List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1);
alleles.add(vc.getReference());
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles(), ploidy));
VariantContextBuilder builder = new VariantContextBuilder(vc);
builder.alleles(alleles);
builder.genotypes(subsetAlleles(vc, alleles, false, ploidy));
return builder.make();
} else {
return vc;
}
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
return subsetAlleles(vc,allelesToUse,false,ploidy);
}
@Override
@ -105,8 +91,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
public CombinedPoolLikelihoods() {
// final int numElements = GenotypeLikelihoods.numLikelihoods();
alleleCountSetList = new LinkedList<ExactACset>();
conformationMap = new HashMap<ExactACcounts,ExactACset>();
alleleCountSetList = new LinkedList<>();
conformationMap = new HashMap<>();
maxLikelihood = Double.NEGATIVE_INFINITY;
}
@ -136,54 +122,22 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
public int getLength() {
return alleleCountSetList.size();
}
}
}
/**
*
* Chooses N most likely alleles in a set of pools (samples) based on GL sum over alt alleles
* @param vc Input variant context
* @param numAllelesToChoose Number of alleles to choose
* @param ploidy Ploidy per pool
* @return list of numAllelesToChoose most likely alleles
*/
private static final int PL_INDEX_OF_HOM_REF = 0;
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose, int ploidy) {
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ )
likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
// based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
@Override
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
final int numOriginalAltAlleles = likelihoodSums.length;
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), false);
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
for (int k=1; k < acCount.length;k++) {
if (acCount[k] > 0)
for (int k=1; k < acCount.length;k++)
if (acCount[k] > 0 )
likelihoodSums[k-1].sum += acCount[k] * (likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]);
}
}
// sort them by probability mass and choose the best ones
Collections.sort(Arrays.asList(likelihoodSums));
final ArrayList<Allele> bestAlleles = new ArrayList<Allele>(numAllelesToChoose);
for ( int i = 0; i < numAllelesToChoose; i++ )
bestAlleles.add(likelihoodSums[i].allele);
final ArrayList<Allele> orderedBestAlleles = new ArrayList<Allele>(numAllelesToChoose);
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( bestAlleles.contains(allele) )
orderedBestAlleles.add(allele);
}
return orderedBestAlleles;
}
/**
* Simple non-optimized version that combines GLs from several pools and produces global AF distribution.
* @param GLs Inputs genotypes context with per-pool GLs
@ -231,9 +185,9 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
int newGLPloidy,
int numAlleles,
final double[] log10AlleleFrequencyPriors) {
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
final LinkedList<ExactACset> ACqueue = new LinkedList<>();
// mapping of ExactACset indexes to the objects
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>();
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<>();
final CombinedPoolLikelihoods newPool = new CombinedPoolLikelihoods();
// add AC=0 to the queue
@ -251,7 +205,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
getStateTracker().incNEvaluations();
// compute log10Likelihoods
final ExactACset ACset = ACqueue.remove();
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
// clean up memory
indexesToACset.remove(ACset.getACcounts());
@ -514,7 +469,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final int ploidy) {
// the genotypes with PLs
final GenotypesContext oldGTs = vc.getGenotypes();
List<Allele> NO_CALL_ALLELES = new ArrayList<Allele>(ploidy);
List<Allele> NO_CALL_ALLELES = new ArrayList<>(ploidy);
for (int k=0; k < ploidy; k++)
NO_CALL_ALLELES.add(Allele.NO_CALL);
@ -584,8 +539,6 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
* @param newLikelihoods the PL array
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
* @param numChromosomes Number of chromosomes per pool
*
* @return genotype
*/
private void assignGenotype(final GenotypeBuilder gb,
final double[] newLikelihoods,
@ -599,8 +552,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
final ArrayList<Double> alleleFreqs = new ArrayList<>();
final ArrayList<Integer> alleleCounts = new ArrayList<>();
for (int k=1; k < mlAlleleCount.length; k++) {
@ -629,6 +582,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
}
gb.alleles(myAlleles);
// TODO - deprecated so what is the appropriate method to call?
if ( numNewAltAlleles > 0 )
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
}

View File

@ -107,6 +107,7 @@ import java.util.*;
* prior for the ith least likely allele.
*/
public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
/**
* The min. confidence of an allele to be included in the joint posterior.
*/
@ -249,7 +250,7 @@ import java.util.*;
final int nAlts = rootVC.getNAlleles() - 1;
final List<Genotype> biallelicGenotypes = new ArrayList<Genotype>(rootVC.getNSamples());
for ( final Genotype g : rootVC.getGenotypes() )
biallelicGenotypes.add(combineGLs(g, altAlleleIndex, nAlts));
biallelicGenotypes.add(combineGLsPrecise(g, altAlleleIndex, nAlts));
final VariantContextBuilder vcb = new VariantContextBuilder(rootVC);
final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1);
@ -281,6 +282,7 @@ import java.util.*;
*/
@Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"})
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
@Deprecated
protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) {
if ( original.isNonInformative() )
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
@ -316,6 +318,75 @@ import java.util.*;
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
}
private static final double PHRED_2_LOG10_COEFF = -.1;
/**
* Returns a new Genotype with the PLs of the multi-allelic original reduced to a bi-allelic case.
*
* <p>Uses the log-sum-exp trick in order to work well with very low PLs</p>
*
* <p>This is handled in the following way:</p>
*
* <p>Suppose we have for a A/B/C site the following GLs:</p>
*
* <p>AA AB BB AC BC CC</p>
*
* <p>and we want to get the bi-allelic GLs for X/B, where X is everything not B</p>
*
* <p>XX = AA + AC + CC (since X = A or C)<br/>
* XB = AB + BC <br/>
* BB = BB <br/>
* </p>
* <p>
* This implementation use the log sum trick in order to avoid numeric inestability.
* </p>
*
* @param original the original multi-allelic genotype
* @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0
* @param nAlts the total number of alt alleles
* @return a new biallelic genotype with appropriate PLs
*/
@Requires({"original.hasLikelihoods()"})
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
protected Genotype combineGLsPrecise(final Genotype original, final int altIndex, final int nAlts ) {
if ( original.isNonInformative() )
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts);
final int[] pls = original.getPL();
final int nAlleles = nAlts + 1;
final int plCount = pls.length;
double BB = 0;
final double[] XBvalues = new double[nAlleles - 1];
final double[] XXvalues = new double[plCount - nAlleles];
int xbOffset = 0;
int xxOffset = 0;
for ( int index = 0; index < plCount; index++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index);
int i = pair.alleleIndex1;
int j = pair.alleleIndex2;
if (i == j) {
if (i == altIndex) BB = PHRED_2_LOG10_COEFF * pls[index]; else XXvalues[xxOffset++] = PHRED_2_LOG10_COEFF * pls[index];
} else if (i == altIndex || j == altIndex)
XBvalues[xbOffset++] = PHRED_2_LOG10_COEFF * pls[index];
else
XXvalues[xxOffset++] = PHRED_2_LOG10_COEFF * pls[index];
}
final double XB = MathUtils.log10sumLog10(XBvalues);
final double XX = MathUtils.log10sumLog10(XXvalues);
final double[] GLs = new double[] { XX, XB, BB};
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
}
protected final List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults);
@ -340,7 +411,6 @@ import java.util.*;
return sorted;
}
/**
* Take the independent estimates of pNonRef for each alt allele and combine them into a single result
*

View File

@ -57,8 +57,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.DefaultHashMap;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.haplotype.EventMap;
import org.broadinstitute.sting.utils.haplotype.Haplotype;
@ -136,7 +134,10 @@ public class GenotypingEngine {
* @param activeRegionWindow Active window
* @param genomeLocParser GenomeLocParser
* @param activeAllelesToGenotype Alleles to genotype
* @param addNonRef whether we should add a &lt;NON_REF&gt; alternative allele to the result variation contexts.
*
* @return A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes
*
*/
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
@Ensures("result != null")
@ -150,7 +151,8 @@ public class GenotypingEngine {
final GenomeLoc activeRegionWindow,
final GenomeLocParser genomeLocParser,
final RefMetaDataTracker tracker,
final List<VariantContext> activeAllelesToGenotype ) {
final List<VariantContext> activeAllelesToGenotype,
final boolean addNonRef) {
// sanity check input arguments
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
@ -183,16 +185,27 @@ public class GenotypingEngine {
final List<String> priorityList = makePriorityList(eventsAtThisLoc);
// Merge the event to find a common reference representation
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
final VariantContextBuilder vcb = new VariantContextBuilder(mergedVC);
if( mergedVC == null ) { continue; }
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
// this is possible in GGA mode when the same event is represented in multiple input records
throw new UserException("The same event (although possibly represented differently) is present in multiple input records at location " + loc + " and this is not something we can handle at this time. You will need to remove one of the records in order to proceed with your input file(s).");
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
if (addNonRef) {
final List<Allele> alleleList = new ArrayList<>();
alleleList.addAll(mergedVC.getAlleles());
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
vcb.alleles(alleleList);
mergedVC = vcb.make();
}
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) {
for(int iii = 0; iii < eventsAtThisLoc.size(); iii++) {
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
}
@ -204,11 +217,14 @@ public class GenotypingEngine {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() );
if (addNonRef) addMiscellaneousAllele(alleleReadMap);
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL);
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
if( call != null ) {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) );
if (addNonRef) addMiscellaneousAllele(alleleReadMap_annotations);
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
@ -218,16 +234,46 @@ public class GenotypingEngine {
}
// maintain the set of all called haplotypes
for ( final Allele calledAllele : call.getAlleles() )
calledHaplotypes.addAll(alleleMapper.get(calledAllele));
for ( final Allele calledAllele : call.getAlleles() ) {
final List<Haplotype> haplotypeList = alleleMapper.get(calledAllele);
if (haplotypeList == null) continue;
calledHaplotypes.addAll(haplotypeList);
}
returnCalls.add( annotatedCall );
}
}
}
return new CalledHaplotypes(returnCalls, calledHaplotypes);
}
/**
* Add the <NON_REF> allele
* @param stratifiedReadMap target per-read-allele-likelihood-map.
*/
public static Map<String, PerReadAlleleLikelihoodMap> addMiscellaneousAllele(final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
final Allele miscellanoeusAllele = GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE;
for (Map.Entry<String, PerReadAlleleLikelihoodMap> perSample : stratifiedReadMap.entrySet()) {
for (Map.Entry<GATKSAMRecord, Map<Allele, Double>> perRead : perSample.getValue().getLikelihoodReadMap().entrySet()) {
double bestLikelihood = Double.NEGATIVE_INFINITY;
double secondBestLikelihood = Double.NEGATIVE_INFINITY;
for (Map.Entry<Allele,Double> perAllele : perRead.getValue().entrySet()) {
final double value = perAllele.getValue();
if (value > bestLikelihood) {
secondBestLikelihood = bestLikelihood;
bestLikelihood = value;
} else if (value < bestLikelihood && value > secondBestLikelihood) {
secondBestLikelihood = value;
}
}
final double miscellanousLikelihood = Double.isInfinite(secondBestLikelihood) ? bestLikelihood : secondBestLikelihood;
perSample.getValue().add(perRead.getKey(),miscellanoeusAllele,miscellanousLikelihood);
}
}
return stratifiedReadMap;
}
/**
* Go through the haplotypes we assembled, and decompose them into their constituent variant contexts
*

View File

@ -564,6 +564,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
public void initialize() {
super.initialize();
if (dontGenotype && emitReferenceConfidence == ReferenceConfidenceMode.GVCF)
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output");
}
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
throw new UserException.BadArgumentValue("pnrm", "HaplotypeCaller doesn't currently support " + SCAC.AFmodel);
@ -615,24 +624,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// where the filters are used. For example, in emitting all sites the lowQual field is used
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
if ( emitReferenceConfidence() ) {
if ( samples.size() != 1 ) throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
// a kluge to enforce the use of this indexing strategy
if (getToolkit().getArguments().variant_index_type != OPTIMAL_GVCF_INDEX_TYPE ||
getToolkit().getArguments().variant_index_parameter != OPTIMAL_GVCF_INDEX_PARAMETER) {
throw new UserException.GVCFIndexException(OPTIMAL_GVCF_INDEX_TYPE, OPTIMAL_GVCF_INDEX_PARAMETER);
}
try {
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands);
} catch ( IllegalArgumentException e ) {
throw new UserException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
}
}
}
initializeReferenceConfidenceModel(samples, headerInfo);
vcfWriter.writeHeader(new VCFHeader(headerInfo, samples));
@ -688,6 +680,28 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
getToolkit().getGenomeLocParser());
}
private void initializeReferenceConfidenceModel(final Set<String> samples, final Set<VCFHeaderLine> headerInfo) {
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
if ( emitReferenceConfidence() ) {
if ( samples.size() != 1 ) throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
// a kluge to enforce the use of this indexing strategy
if (getToolkit().getArguments().variant_index_type != OPTIMAL_GVCF_INDEX_TYPE ||
getToolkit().getArguments().variant_index_parameter != OPTIMAL_GVCF_INDEX_PARAMETER) {
throw new UserException.GVCFIndexException(OPTIMAL_GVCF_INDEX_TYPE, OPTIMAL_GVCF_INDEX_PARAMETER);
}
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = 0.0;
try {
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands);
} catch ( IllegalArgumentException e ) {
throw new UserException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
}
}
}
}
/**
* Instantiates the appropriate likelihood calculation engine.
*
@ -833,10 +847,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap =
likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,reads);
// Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there
// was a bad interaction between that selection and the marginalization that happens over each event when computing
// GLs. In particular, for samples that are heterozygous non-reference (B/C) the marginalization for B treats the
@ -852,7 +862,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
regionForGenotyping.getLocation(),
getToolkit().getGenomeLocParser(),
metaDataTracker,
activeAllelesToGenotype );
activeAllelesToGenotype, emitReferenceConfidence() );
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
if ( bamWriter != null ) {
@ -866,8 +876,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if( DEBUG ) { logger.info("----------------------------------------------------------------------------------"); }
if ( emitReferenceConfidence() ) {
if ( calledHaplotypes.getCalls().isEmpty() ) {
if ( !containsCalls(calledHaplotypes) ) {
// no called all of the potential haplotypes
return referenceModelForNoVariation(originalActiveRegion, false);
} else
@ -879,6 +890,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
}
}
private boolean containsCalls(final GenotypingEngine.CalledHaplotypes calledHaplotypes) {
final List<VariantContext> calls = calledHaplotypes.getCalls();
if (calls.isEmpty()) return false;
for (final VariantContext call : calls)
for (final Genotype genotype : call.getGenotypes())
if (genotype.isCalled())
return true;
return false;
}
/**
* High-level function that runs the assembler on the active region reads,
* returning a data structure with the resulting information needed

View File

@ -48,7 +48,10 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.haplotype.Haplotype;
@ -63,9 +66,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFFormatHeaderLine;
import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFSimpleHeaderLine;
import java.io.File;

View File

@ -46,8 +46,6 @@
package org.broadinstitute.sting.utils.gvcf;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.ReferenceConfidenceModel;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
@ -56,7 +54,10 @@ import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.vcf.*;
import java.util.*;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
/**
* Genome-wide VCF writer
@ -287,7 +288,7 @@ public class GVCFWriter implements VariantContextWriter {
}
final Genotype g = vc.getGenotype(0);
if ( g.isHomRef() && vc.hasAlternateAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) ) {
if ( g.isHomRef() && vc.hasAlternateAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) && vc.isBiallelic() ) {
// create bands
final VariantContext maybeCompletedBand = addHomRefSite(vc, g);
if ( maybeCompletedBand != null ) underlyingWriter.add(maybeCompletedBand);

View File

@ -118,7 +118,6 @@ final class HomRefBlock {
if ( g == null ) throw new IllegalArgumentException("g cannot be null");
if ( ! g.hasGQ() ) throw new IllegalArgumentException("g must have GQ field");
if ( ! g.hasPL() ) throw new IllegalArgumentException("g must have PL field");
if ( ! g.hasDP() ) throw new IllegalArgumentException("g must have DP field");
if ( pos != stop + 1 ) throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous stop " + stop);
if( minPLs == null ) { // if the minPLs vector has not been set yet, create it here by copying the provided genotype's PLs
@ -136,7 +135,7 @@ final class HomRefBlock {
}
stop = pos;
GQs.add(Math.min(g.getGQ(), 99)); // cap the GQs by the max. of 99 emission
DPs.add(g.getDP());
DPs.add(Math.max(g.getDP(),0));
}
/**

View File

@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("e7cb959912ea964bf9c897904aa5220b"));
Arrays.asList("f5a62ecb8d32f6161b2ac7682c9f711d"));
executeTest("test Multiple SNP alleles", spec);
}
@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("f29b3fa9d5642297cfc4b10aa2137c68"));
Arrays.asList("8897652c7516a91d22bc678f2189131e"));
executeTest("test mismatched PLs", spec);
}
}

View File

@ -106,14 +106,22 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
}
@Test(enabled = true, dataProvider = "TestCombineGLs")
private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
public void testCombineGLsPrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final Genotype combined = calc.combineGLs(testg, altIndex, nAlts);
final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts);
Assert.assertEquals(combined.getPL(), expected.getPL(),
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
}
@Test(enabled = true, dataProvider = "TestCombineGLs")
public void testCombinePrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts);
Assert.assertEquals(combined.getPL(), expected.getPL(),
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
}
static Allele A = Allele.create("A", true);
static Allele C = Allele.create("C");

View File

@ -57,9 +57,13 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.haplotype.Haplotype;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
@ -279,6 +283,60 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertTrue(compareVCMaps(calculatedMap, expectedMap));
}
@Test(dataProvider="AddMiscellaneousDataProvider", enabled=false)
public void testAddMiscellaneousAllele(final String readBases, final int readOffset,
final String ref, final int refOffset,
final String referenceAllele, final String[] alternatives, final double[] likelihoods, final double[] expected) {
final byte baseQual = (byte)30;
final byte[] baseQuals = Utils.dupBytes(baseQual, readBases.length());
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(readBases.getBytes(), baseQuals, readBases.length() + "M");
final GenomeLoc loc = new UnvalidatingGenomeLoc("20",0,refOffset,refOffset);
final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc,Collections.singletonList(read),readOffset);
final VariantContextBuilder vcb = new VariantContextBuilder();
final GenotypeBuilder gb = new GenotypeBuilder();
final List<String> alleleStrings = new ArrayList<>( 1 + alternatives.length);
alleleStrings.add(referenceAllele);
alleleStrings.addAll(Arrays.asList(alternatives));
gb.AD(new int[] { 1 });
gb.DP(1);
gb.PL(likelihoods);
vcb.alleles(alleleStrings);
vcb.loc("20",refOffset,refOffset + referenceAllele.length() -1);
vcb.genotypes(gb.make());
final VariantContext vc = vcb.make();
final VariantContext updatedVc = null; // GenotypingEngine.addMiscellaneousAllele(vc,pileup,ref.getBytes(),0);
final GenotypeLikelihoods updatedLikelihoods = updatedVc.getGenotype(0).getLikelihoods();
Assert.assertEquals(updatedLikelihoods.getAsVector().length, expected.length);
final double[] updatedLikelihoodsArray = updatedVc.getGenotype(0).getLikelihoods().getAsVector();
for (int i = 0; i < updatedLikelihoodsArray.length; i++) {
Assert.assertEquals(updatedLikelihoodsArray[i],expected[i],0.0001);
}
Allele altAllele = null;
for (final Allele allele : updatedVc.getAlleles())
if (allele.isSymbolic() && allele.getBaseString().equals(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE_NAME))
altAllele = allele;
Assert.assertNotNull(altAllele);
}
@DataProvider(name="AddMiscellaneousDataProvider")
public Iterator<Object[]> addMiscellaneousAlleleDataProvider() {
return Arrays.asList(ADD_MISCELLANEOUS_ALLELE_DATA).iterator();
}
private static final double MATCH_LnLK = QualityUtils.qualToProbLog10((byte)30);
private static final double MISS_LnLK = QualityUtils.qualToErrorProbLog10((byte)30);
private static final Object[][] ADD_MISCELLANEOUS_ALLELE_DATA = new Object[][] {
new Object[] {"ACTG", 0,"ACTGTGAGTATTCC",0,"A",new String[]{}, new double[] {MATCH_LnLK * MATCH_LnLK}, 6,
new double[] {MATCH_LnLK * MATCH_LnLK,MATCH_LnLK * MISS_LnLK, MISS_LnLK * MISS_LnLK}}
};
/**
* Private function to compare Map of VCs, it only checks the types and start locations of the VariantContext
*/

View File

@ -66,11 +66,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "4bb8e44b2d04757f8b11ca6400828357"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "8ddc291584f56e27d125b6a0523f2703"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "1dfe1fc8079938adf1565450671094d4"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d98245380500a6decfc26dcaadb2c4d2"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "e53e164cc3f5cbd5fba083f2cdb98a88"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "a258dbbfabe88dad11d57151cd256831"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "c4c28e74eda133f99e0864ad16c965c4"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "e4d36f165b2ddbb923d3c9a402e96f1b"});
return tests.toArray(new Object[][]{});
}

View File

@ -48,10 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.haplotype.Haplotype;
@ -61,10 +58,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.variant.variantcontext.GenotypeType;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.*;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;

View File

@ -492,13 +492,62 @@ public class MathUtils {
return result;
}
private static final double LOG1MEXP_THRESHOLD = Math.log(0.5);
private static final double LN_10 = Math.log(10);
/**
* Calculates {@code log(1-exp(a))} without loosing precision.
*
* <p>
* This is based on the approach described in:
*
* </p>
* <p>
* Maechler M, Accurately Computing log(1-exp(-|a|)) Assessed by the Rmpfr package, 2012 <br/>
* <a ref="http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf">Online document</a>.
*
* </p>
*
* @param a the input exponent.
* @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value.
*/
public static double log1mexp(final double a) {
if (a > 0) return Double.NaN;
if (a == 0) return Double.NEGATIVE_INFINITY;
return (a < LOG1MEXP_THRESHOLD) ? Math.log1p(-Math.exp(a)) : Math.log(-Math.expm1(a));
}
/**
* Calculates {@code log10(1-10^a)} without loosing precision.
*
* <p>
* This is based on the approach described in:
*
* </p>
* <p>
* Maechler M, Accurately Computing log(1-exp(-|a|)) Assessed by the Rmpfr package, 2012 <br/>
* <a ref="http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf">Online document</a>.
* </p>
*
* @param a the input exponent.
* @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value.
*/
public static double log10OneMinusPow10(final double a) {
if (a > 0) return Double.NaN;
if (a == 0) return Double.NEGATIVE_INFINITY;
final double b = a * LN_10;
return log1mexp(b) / LN_10;
}
/**
* Calculates the log10 of the multinomial coefficient. Designed to prevent
* overflows even with very large numbers.
*
* @param n total number of trials
* @param k array of any size with the number of successes for each grouping (k1, k2, k3, ..., km)
* @return
* @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value.
*/
public static double log10MultinomialCoefficient(final int n, final int[] k) {
if ( n < 0 )

View File

@ -2115,4 +2115,79 @@ public class GATKVariantContextUtils {
if ( ! vc1.getAlleles().equals(vc2.getAlleles()) ) return false;
return true;
}
/**
* Returns the absolute 0-based index of an allele.
*
* <p/>
* If the allele is equal to the reference, the result is 0, if it equal to the first alternative the result is 1
* and so forth.
* <p/>
* Therefore if you want the 0-based index within the alternative alleles you need to do the following:
*
* <p/>
* You can indicate whether the Java object reference comparator {@code ==} can be safelly used by setting {@code useEquals} to {@code false}.
*
* @param vc the target variant context.
* @param allele the target allele.
* @param ignoreRefState whether the reference states of the allele is important at all. Has no effect if {@code useEquals} is {@code false}.
* @param considerRefAllele whether the reference allele should be considered. You should set it to {@code false} if you are only interested in alternative alleles.
* @param useEquals whether equal method should be used in the search: {@link Allele#equals(Allele,boolean)}.
*
* @throws IllegalArgumentException if {@code allele} is {@code null}.
* @return {@code -1} if there is no such allele that satify those criteria, a value between 0 and {@link VariantContext#getNAlleles()} {@code -1} otherwise.
*/
public static int indexOfAllele(final VariantContext vc, final Allele allele, final boolean ignoreRefState, final boolean considerRefAllele, final boolean useEquals) {
if (allele == null) throw new IllegalArgumentException();
return useEquals ? indexOfEqualAllele(vc,allele,ignoreRefState,considerRefAllele) : indexOfSameAllele(vc,allele,considerRefAllele);
}
/**
* Returns the relative 0-based index of an alternative allele.
* <p/>
* The the query allele is the same as the first alternative allele, the result is 0,
* if it is equal to the second 1 and so forth.
*
*
* <p/>
* Notice that the ref-status of the query {@code allele} is ignored.
*
* @param vc the target variant context.
* @param allele the query allele.
* @param useEquals whether equal method should be used in the search: {@link Allele#equals(Allele,boolean)}.
*
* @throws IllegalArgumentException if {@code allele} is {@code null}.
*
* @return {@code -1} if there is no such allele that satify those criteria, a value between 0 and the number
* of alternative alleles - 1.
*/
public static int indexOfAltAllele(final VariantContext vc, final Allele allele, final boolean useEquals) {
final int absoluteIndex = indexOfAllele(vc,allele,true,false,useEquals);
return absoluteIndex == -1 ? -1 : absoluteIndex - 1;
}
// Impements index search using equals.
private static int indexOfEqualAllele(final VariantContext vc, final Allele allele, final boolean ignoreRefState,
final boolean considerRefAllele) {
int i = 0;
for (final Allele a : vc.getAlleles())
if (a.equals(allele,ignoreRefState))
return i == 0 ? (considerRefAllele ? 0 : -1) : i;
else
i++;
return -1;
}
// Implements index search using ==.
private static int indexOfSameAllele(final VariantContext vc, final Allele allele, final boolean considerRefAllele) {
int i = 0;
for (final Allele a : vc.getAlleles())
if (a == allele)
return i == 0 ? (considerRefAllele ? 0 : -1) : i;
else
i++;
return -1;
}
}

View File

@ -39,18 +39,19 @@ import java.util.*;
* Basic unit test for MathUtils
*/
public class MathUtilsUnitTest extends BaseTest {
@BeforeClass
public void init() {
}
/**
* Tests that we get unqiue values for the valid (non-null-producing) input space for {@link MathUtils#fastGenerateUniqueHashFromThreeIntegers(int, int, int)}.
* Tests that we get unique values for the valid (non-null-producing) input space for {@link MathUtils#fastGenerateUniqueHashFromThreeIntegers(int, int, int)}.
*/
@Test
public void testGenerateUniqueHashFromThreePositiveIntegers() {
logger.warn("Executing testGenerateUniqueHashFromThreePositiveIntegers");
final Set<Long> observedLongs = new HashSet<Long>();
final Set<Long> observedLongs = new HashSet<>();
for (short i = 0; i < Byte.MAX_VALUE; i++) {
for (short j = 0; j < Byte.MAX_VALUE; j++) {
for (short k = 0; k < Byte.MAX_VALUE; k++) {
@ -72,6 +73,80 @@ public class MathUtilsUnitTest extends BaseTest {
}
}
@Test(dataProvider = "log10OneMinusPow10Data")
public void testLog10OneMinusPow10(final double x, final double expected) {
final double actual = MathUtils.log10OneMinusPow10(x);
if (Double.isNaN(expected))
Assert.assertTrue(Double.isNaN(actual));
else
Assert.assertEquals(actual,expected,1E-9);
}
@Test(dataProvider = "log1mexpData")
public void testLog1mexp(final double x, final double expected) {
final double actual = MathUtils.log1mexp(x);
if (Double.isNaN(expected))
Assert.assertTrue(Double.isNaN(actual));
else
Assert.assertEquals(actual,expected,1E-9);
}
@DataProvider(name = "log10OneMinusPow10Data")
public Iterator<Object[]> log10OneMinusPow10Data() {
final double[] inValues = new double[] { Double.NaN, 10, 1, 0, -1, -3, -10, -30, -100, -300, -1000, -3000 };
return new Iterator<Object[]>() {
private int i = 0;
@Override
public boolean hasNext() {
return i < inValues.length;
}
@Override
public Object[] next() {
final double input = inValues[i++];
final double output = Math.log10( 1 - Math.pow(10,input));
return new Object[] { input, output };
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@DataProvider(name = "log1mexpData")
public Iterator<Object[]> log1mexpData() {
final double[] inValues = new double[] { Double.NaN, 10, 1, 0, -1, -3, -10, -30, -100, -300, -1000, -3000 };
return new Iterator<Object[]>() {
private int i = 0;
@Override
public boolean hasNext() {
return i < inValues.length;
}
@Override
public Object[] next() {
final double input = inValues[i++];
final double output = Math.log( 1 - Math.exp(input));
return new Object[] { input, output };
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
/**
* Tests that we get the right values from the binomial distribution
*/
@ -144,7 +219,7 @@ public class MathUtilsUnitTest extends BaseTest {
Assert.assertTrue(MathUtils.sampleIndicesWithReplacement(5, 1000).size() == 1000);
// Check that the list contains only the k element range that as asked for - no more, no less
List<Integer> Five = new ArrayList<Integer>();
List<Integer> Five = new ArrayList<>();
Collections.addAll(Five, 0, 1, 2, 3, 4);
List<Integer> BigFive = MathUtils.sampleIndicesWithReplacement(5, 10000);
Assert.assertTrue(BigFive.containsAll(Five));
@ -160,9 +235,9 @@ public class MathUtilsUnitTest extends BaseTest {
// Check that the list contains only the k element range that as asked for - no more, no less but now
// use the index list to pull elements from another list using sliceListByIndices
List<Integer> Five = new ArrayList<Integer>();
List<Integer> Five = new ArrayList<>();
Collections.addAll(Five, 0, 1, 2, 3, 4);
List<Character> FiveAlpha = new ArrayList<Character>();
List<Character> FiveAlpha = new ArrayList<>();
Collections.addAll(FiveAlpha, 'a', 'b', 'c', 'd', 'e');
List<Integer> BigFive = MathUtils.sampleIndicesWithReplacement(5, 10000);
List<Character> BigFiveAlpha = MathUtils.sliceListByIndices(BigFive, FiveAlpha);
@ -180,8 +255,8 @@ public class MathUtilsUnitTest extends BaseTest {
int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24};
MathUtils.RunningAverage r = new MathUtils.RunningAverage();
for (int i = 0; i < numbers.length; i++)
r.add((double) numbers[i]);
for (final double b : numbers)
r.add(b);
Assert.assertEquals((long) numbers.length, r.observationCount());
Assert.assertTrue(r.mean() - 3224.625 < 2e-10);
@ -253,24 +328,6 @@ public class MathUtilsUnitTest extends BaseTest {
}
}
/**
* Private functions used by testArrayShuffle()
*/
private boolean hasUniqueElements(Object[] x) {
for (int i = 0; i < x.length; i++)
for (int j = i + 1; j < x.length; j++)
if (x[i].equals(x[j]) || x[i] == x[j])
return false;
return true;
}
private boolean hasAllElements(final Object[] expected, final Object[] actual) {
HashSet<Object> set = new HashSet<Object>();
set.addAll(Arrays.asList(expected));
set.removeAll(Arrays.asList(actual));
return set.isEmpty();
}
@Test
public void testApproximateLog10SumLog10() {
@ -451,7 +508,7 @@ public class MathUtilsUnitTest extends BaseTest {
@DataProvider(name = "ArrayMinData")
public Object[][] makeArrayMinData() {
List<Object[]> tests = new ArrayList<Object[]>();
List<Object[]> tests = new ArrayList<>();
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{Arrays.asList(10), 10});
@ -554,9 +611,11 @@ public class MathUtilsUnitTest extends BaseTest {
// generate the partitions of an integer, each partition sorted numerically
int n;
List<Integer> a;
int y;
int k;
int state;
int x;
int l;
@ -564,7 +623,7 @@ public class MathUtilsUnitTest extends BaseTest {
this.n = n;
this.y = n - 1;
this.k = 1;
this.a = new ArrayList<Integer>();
this.a = new ArrayList<>();
for ( int i = 0; i < n; i++ ) {
this.a.add(i);
}
@ -588,9 +647,9 @@ public class MathUtilsUnitTest extends BaseTest {
}
if ( this.state == 1 ) {
while ( 2*x <= y ) {
while ( 2 * x <= y ) {
this.a.set(k,x);
this.y -= x;
this.y -= (int) x;
this.k++;
}
this.l = 1+this.k;
@ -620,7 +679,7 @@ public class MathUtilsUnitTest extends BaseTest {
}
public String toString() {
StringBuffer buf = new StringBuffer();
final StringBuilder buf = new StringBuilder();
buf.append("{ ");
while ( hasNext() ) {
buf.append("[");
@ -671,12 +730,7 @@ public class MathUtilsUnitTest extends BaseTest {
}
private int[] clone(int[] arr) {
int[] a = new int[arr.length];
for ( int idx = 0; idx < a.length ; idx ++) {
a[idx] = arr[idx];
}
return a;
return Arrays.copyOf(arr, arr.length);
}
private int[] nextFromPartitioner() {
@ -834,7 +888,7 @@ public class MathUtilsUnitTest extends BaseTest {
for ( double[] alleles : testAlleles ) {
for ( int count : numAlleleSampled ) {
// test that everything sums to one. Generate all multinomial draws
List<Double> likelihoods = new ArrayList<Double>(100000);
List<Double> likelihoods = new ArrayList<>(100000);
NextCounts generator = new NextCounts(alleles.length,count);
double maxLog = Double.MIN_VALUE;
//List<String> countLog = new ArrayList<String>(200);

View File

@ -1627,4 +1627,82 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
Assert.assertEquals(result.getGenotype(expectedGenotype.getSampleName()).toString(), expectedGenotype.toString());
}
}
}
@Test(dataProvider = "indexOfAlleleData")
public void testIndexOfAllele(final Allele reference, final List<Allele> altAlleles, final List<Allele> otherAlleles) {
final List<Allele> alleles = new ArrayList<>(altAlleles.size() + 1);
alleles.add(reference);
alleles.addAll(altAlleles);
final VariantContext vc = makeVC("Source", alleles);
for (int i = 0; i < alleles.size(); i++) {
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,true,true),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,true,true),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,true,false),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,true,false),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i),true),true,true,true),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i),true),true,true,false),-1);
if (i == 0) {
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,false,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,false,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,false,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,false,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i).getBases(),true),false,true,true),i);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i).getBases(),false),false,true,true),-1);
} else {
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,alleles.get(i),true),i - 1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,alleles.get(i),false), i - 1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,Allele.create(alleles.get(i),true),true),i-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,Allele.create(alleles.get(i),true),false),-1);
}
}
for (final Allele other : otherAlleles) {
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,true,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,true,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,true,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,true,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,false,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,false,true),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,false,false),-1);
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,false,false),-1);
}
}
@DataProvider(name = "indexOfAlleleData")
public Iterator<Object[]> indexOfAlleleData() {
final Allele[] ALTERNATIVE_ALLELES = new Allele[] { T, C, G, ATC, ATCATC};
final int lastMask = 0x1F;
return new Iterator<Object[]>() {
int nextMask = 0;
@Override
public boolean hasNext() {
return nextMask <= lastMask;
}
@Override
public Object[] next() {
int mask = nextMask++;
final List<Allele> includedAlleles = new ArrayList<>(5);
final List<Allele> excludedAlleles = new ArrayList<>(5);
for (int i = 0; i < ALTERNATIVE_ALLELES.length; i++) {
((mask & 1) == 1 ? includedAlleles : excludedAlleles).add(ALTERNATIVE_ALLELES[i]);
mask >>= 1;
}
return new Object[] { Aref , includedAlleles, excludedAlleles};
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
}