Merge pull request #482 from broadinstitute/vrr_reference_model_alt_allele
gVCF <NON_REF> in all vcf lines including variant ones when –ERC gVCF is...
This commit is contained in:
commit
83d07280ef
|
|
@ -422,14 +422,14 @@ public class UnifiedGenotyperEngine {
|
|||
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
|
||||
}
|
||||
|
||||
AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
|
||||
final AFCalcResult AFresult = afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
|
||||
|
||||
// is the most likely frequency conformation AC=0 for all alternate alleles?
|
||||
boolean bestGuessIsRef = true;
|
||||
|
||||
// determine which alternate alleles have AF>0
|
||||
final List<Allele> myAlleles = new ArrayList<Allele>(vc.getAlleles().size());
|
||||
final List<Integer> alleleCountsofMLE = new ArrayList<Integer>(vc.getAlleles().size());
|
||||
final List<Allele> myAlleles = new ArrayList<>(vc.getAlleles().size());
|
||||
final List<Integer> alleleCountsofMLE = new ArrayList<>(vc.getAlleles().size());
|
||||
myAlleles.add(vc.getReference());
|
||||
for ( int i = 0; i < AFresult.getAllelesUsedInGenotyping().size(); i++ ) {
|
||||
final Allele alternateAllele = AFresult.getAllelesUsedInGenotyping().get(i);
|
||||
|
|
@ -439,16 +439,13 @@ public class UnifiedGenotyperEngine {
|
|||
// Compute if the site is considered polymorphic with sufficient confidence relative to our
|
||||
// phred-scaled emission QUAL
|
||||
final boolean isNonRef = AFresult.isPolymorphicPhredScaledQual(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||
final boolean toInclude = isNonRef || alternateAllele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE ||
|
||||
UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ||
|
||||
UAC.annotateAllSitesWithPLs;
|
||||
|
||||
// if the most likely AC is not 0, then this is a good alternate allele to use
|
||||
if ( isNonRef ) {
|
||||
myAlleles.add(alternateAllele);
|
||||
alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
|
||||
bestGuessIsRef = false;
|
||||
}
|
||||
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
|
||||
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ||
|
||||
UAC.annotateAllSitesWithPLs) {
|
||||
bestGuessIsRef &= !isNonRef;
|
||||
|
||||
if (toInclude) {
|
||||
myAlleles.add(alternateAllele);
|
||||
alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
|
||||
}
|
||||
|
|
@ -513,29 +510,29 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the overall lod
|
||||
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
||||
double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
|
||||
final double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
List<Allele> allAllelesToUse = builder.make().getAlleles();
|
||||
final List<Allele> allAllelesToUse = builder.make().getAlleles();
|
||||
|
||||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
AFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
|
||||
final VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
final AFCalcResult forwardAFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double forwardLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
|
||||
double forwardLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
|
||||
final double forwardLog10PofNull = forwardAFresult.getLog10LikelihoodOfAFEq0();
|
||||
final double forwardLog10PofF = forwardAFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
AFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
|
||||
final VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
final AFCalcResult reverseAFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double reverseLog10PofNull = AFresult.getLog10LikelihoodOfAFEq0();
|
||||
double reverseLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
|
||||
final double reverseLog10PofNull = reverseAFresult.getLog10LikelihoodOfAFEq0();
|
||||
final double reverseLog10PofF = reverseAFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
|
||||
final double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
final double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
|
||||
//if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
|
||||
// strand score is max bias between forward and reverse strands
|
||||
|
|
|
|||
|
|
@ -48,7 +48,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -67,10 +70,10 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
final int numChr = 2*numSamples;
|
||||
|
||||
// queue of AC conformations to process
|
||||
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||
final LinkedList<ExactACset> ACqueue = new LinkedList<>();
|
||||
|
||||
// mapping of ExactACset indexes to the objects
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>(numChr+1);
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<>(numChr+1);
|
||||
|
||||
// add AC=0 to the queue
|
||||
final int[] zeroCounts = new int[numAlternateAlleles];
|
||||
|
|
@ -84,7 +87,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors);
|
||||
calculateAlleleCountConformation(set, genotypeLikelihoods, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors);
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(set.getACcounts());
|
||||
|
|
@ -95,58 +98,28 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
return getResultFromFinalState(vc, log10AlleleFrequencyPriors);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected VariantContext reduceScope(final VariantContext vc) {
|
||||
// don't try to genotype too many alternate alleles
|
||||
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles() ) {
|
||||
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1);
|
||||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL));
|
||||
return builder.make();
|
||||
} else {
|
||||
return vc;
|
||||
}
|
||||
@Override
|
||||
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
|
||||
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
|
||||
}
|
||||
|
||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ )
|
||||
likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
|
||||
|
||||
// based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
|
||||
@Override
|
||||
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
|
||||
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), true);
|
||||
for ( final double[] likelihoods : GLs ) {
|
||||
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
|
||||
if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) {
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL);
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL);
|
||||
final int alleleLikelihoodIndex1 = alleles.alleleIndex1 - 1;
|
||||
final int alleleLikelihoodIndex2 = alleles.alleleIndex2 - 1;
|
||||
if ( alleles.alleleIndex1 != 0 )
|
||||
likelihoodSums[alleles.alleleIndex1-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
likelihoodSums[alleleLikelihoodIndex1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
// don't double-count it
|
||||
if ( alleles.alleleIndex2 != 0 && alleles.alleleIndex2 != alleles.alleleIndex1 )
|
||||
likelihoodSums[alleles.alleleIndex2-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
likelihoodSums[alleleLikelihoodIndex2].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
}
|
||||
}
|
||||
|
||||
// sort them by probability mass and choose the best ones
|
||||
Collections.sort(Arrays.asList(likelihoodSums));
|
||||
final ArrayList<Allele> bestAlleles = new ArrayList<Allele>(numAllelesToChoose);
|
||||
for ( int i = 0; i < numAllelesToChoose; i++ )
|
||||
bestAlleles.add(likelihoodSums[i].allele);
|
||||
|
||||
final ArrayList<Allele> orderedBestAlleles = new ArrayList<Allele>(numAllelesToChoose);
|
||||
for ( Allele allele : vc.getAlternateAlleles() ) {
|
||||
if ( bestAlleles.contains(allele) )
|
||||
orderedBestAlleles.add(allele);
|
||||
}
|
||||
|
||||
return orderedBestAlleles;
|
||||
}
|
||||
|
||||
private static final class DependentSet {
|
||||
|
|
@ -199,8 +172,8 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
|
||||
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
||||
if ( ACwiggle > 1 ) {
|
||||
final ArrayList<DependentSet> differentAlleles = new ArrayList<DependentSet>(numAltAlleles * numAltAlleles);
|
||||
final ArrayList<DependentSet> sameAlleles = new ArrayList<DependentSet>(numAltAlleles);
|
||||
final ArrayList<DependentSet> differentAlleles = new ArrayList<>(numAltAlleles * numAltAlleles);
|
||||
final ArrayList<DependentSet> sameAlleles = new ArrayList<>(numAltAlleles);
|
||||
|
||||
for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) {
|
||||
for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) {
|
||||
|
|
|
|||
|
|
@ -48,17 +48,50 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Uses the Exact calculation of Heng Li
|
||||
*/
|
||||
abstract class ExactAFCalc extends AFCalc {
|
||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||
/**
|
||||
* Sorts {@link org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first.
|
||||
*/
|
||||
protected static final Comparator<LikelihoodSum> LIKELIHOOD_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
|
||||
|
||||
@Override
|
||||
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
|
||||
return - Double.compare(o1.sum,o2.sum);
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Sorts {@link org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with higher likelihood are first but make sure that
|
||||
* NON_REF alleles are place are last.
|
||||
*/
|
||||
protected static final Comparator<LikelihoodSum> LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR = new Comparator<LikelihoodSum>() {
|
||||
@Override
|
||||
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
|
||||
if (o1.allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)
|
||||
return 1;
|
||||
else if (o2.allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE)
|
||||
return -1;
|
||||
else
|
||||
return o1.compareTo(o2);
|
||||
}
|
||||
};
|
||||
/**
|
||||
* Sorts {@link org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactAFCalc.LikelihoodSum} instances where those with lower alternative allele index are first regardless of
|
||||
* the likelihood sum.
|
||||
*/
|
||||
protected static final Comparator<LikelihoodSum> LIKELIHOOD_INDEX_COMPARATOR = new Comparator<LikelihoodSum>() {
|
||||
@Override
|
||||
public int compare(final LikelihoodSum o1, final LikelihoodSum o2) {
|
||||
return Integer.compare(o1.index, o2.index);
|
||||
}
|
||||
};
|
||||
|
||||
protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) {
|
||||
super(nSamples, maxAltAlleles, ploidy);
|
||||
|
|
@ -69,9 +102,10 @@ abstract class ExactAFCalc extends AFCalc {
|
|||
*/
|
||||
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
|
||||
public double sum = 0.0;
|
||||
public Allele allele;
|
||||
public final Allele allele;
|
||||
public final int index;
|
||||
|
||||
public LikelihoodSum(Allele allele) { this.allele = allele; }
|
||||
public LikelihoodSum(final Allele allele, final int index) { this.allele = allele; this.index = index; }
|
||||
|
||||
public int compareTo(LikelihoodSum other) {
|
||||
final double diff = sum - other.sum;
|
||||
|
|
@ -85,12 +119,12 @@ abstract class ExactAFCalc extends AFCalc {
|
|||
* @return ArrayList of doubles corresponding to GL vectors
|
||||
*/
|
||||
protected static ArrayList<double[]> getGLs(final GenotypesContext GLs, final boolean includeDummy) {
|
||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size() + 1);
|
||||
final ArrayList<double[]> genotypeLikelihoods = new ArrayList<>(GLs.size() + 1);
|
||||
|
||||
if ( includeDummy ) genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
||||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
final double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
genotypeLikelihoods.add(gls);
|
||||
|
|
@ -99,4 +133,108 @@ abstract class ExactAFCalc extends AFCalc {
|
|||
|
||||
return genotypeLikelihoods;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected VariantContext reduceScope(final VariantContext vc) {
|
||||
// don't try to genotype too many alternate alleles
|
||||
final List<Allele> inputAltAlleles = vc.getAlternateAlleles();
|
||||
final List<Allele> outputAltAlleles = reduceScopeAlleles(vc,getMaxAltAlleles());
|
||||
|
||||
// only if output allele has reduced from the input alt allele set size we should care.
|
||||
final int altAlleleReduction = inputAltAlleles.size() - outputAltAlleles.size();
|
||||
|
||||
if (altAlleleReduction == 0)
|
||||
return vc;
|
||||
else if (altAlleleReduction != 0) {
|
||||
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles()
|
||||
+ " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart()
|
||||
+ " has " + (vc.getAlternateAlleles().size())
|
||||
+ " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
|
||||
final List<Allele> alleles = new ArrayList<>(getMaxAltAlleles() + 1);
|
||||
alleles.add(vc.getReference());
|
||||
alleles.addAll(reduceScopeAlleles(vc, getMaxAltAlleles()));
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(reduceScopeGenotypes(vc, alleles));
|
||||
if (altAlleleReduction < 0)
|
||||
throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc + " " + builder.make());
|
||||
return builder.make();
|
||||
} else // if (altAlleleReduction < 0)
|
||||
throw new IllegalStateException("unexpected: reduction increased the number of alt. alleles!: " + - altAlleleReduction + " " + vc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a the new set of alleles to use.
|
||||
* @param vc target variant context.
|
||||
* @param numAllelesToChoose number of alleles to keep.
|
||||
* @return the list of alternative allele to keep.
|
||||
*/
|
||||
protected List<Allele> reduceScopeAlleles(final VariantContext vc, final int numAllelesToChoose) {
|
||||
|
||||
// Look for the <NON_REF> allele to exclude it from the pruning if present.
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
|
||||
final int nonRefAltAlleleIndex = GATKVariantContextUtils.indexOfAltAllele(vc,
|
||||
GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE, false);
|
||||
final boolean nonRefAltAllelePresent = nonRefAltAlleleIndex >= 0;
|
||||
|
||||
// <NON_REF> should not be considered in the downsizing, so we need to count it out when
|
||||
// considering if alt. allele downsizing is required.
|
||||
final int numProperOriginalAltAlleles = numOriginalAltAlleles - (nonRefAltAllelePresent ? 1 : 0);
|
||||
|
||||
// Avoid pointless allele reduction:
|
||||
if (numAllelesToChoose >= numProperOriginalAltAlleles)
|
||||
return vc.getAlternateAlleles();
|
||||
|
||||
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||
final Allele allele = vc.getAlternateAllele(i);
|
||||
likelihoodSums[i] = new LikelihoodSum(allele,i);
|
||||
}
|
||||
|
||||
// Calculate the allele likelihood sums.
|
||||
reduceScopeCalculateLikelihoodSums(vc, likelihoodSums);
|
||||
|
||||
// sort them by probability mass and choose the best ones
|
||||
// Make sure that the <NON_REF> allele is last if present.
|
||||
Collections.sort(Arrays.asList(likelihoodSums), nonRefAltAllelePresent ? LIKELIHOOD_NON_REF_THEN_SUM_COMPARATOR : LIKELIHOOD_SUM_COMPARATOR);
|
||||
|
||||
// We need to return the best likelihood alleles in the original alternative allele index order.
|
||||
// This heap will keep track of that index order.
|
||||
final PriorityQueue<LikelihoodSum> mostLikelyAllelesHeapByIndex = new PriorityQueue<>(numOriginalAltAlleles, LIKELIHOOD_INDEX_COMPARATOR);
|
||||
|
||||
for ( int i = 0; i < numAllelesToChoose; i++ )
|
||||
mostLikelyAllelesHeapByIndex.add(likelihoodSums[i]);
|
||||
|
||||
// guaranteed no to have been added at this point thanks for checking on whether reduction was
|
||||
// needed in the first place.
|
||||
if (nonRefAltAllelePresent)
|
||||
mostLikelyAllelesHeapByIndex.add(likelihoodSums[nonRefAltAlleleIndex]);
|
||||
|
||||
final ArrayList<Allele> orderedBestAlleles = new ArrayList<>(numAllelesToChoose);
|
||||
|
||||
while (!mostLikelyAllelesHeapByIndex.isEmpty())
|
||||
orderedBestAlleles.add(mostLikelyAllelesHeapByIndex.remove().allele);
|
||||
|
||||
return orderedBestAlleles;
|
||||
}
|
||||
|
||||
protected static final int PL_INDEX_OF_HOM_REF = 0;
|
||||
|
||||
/**
|
||||
* Update the likelihood sums with using the variant context genotype likelihoods.
|
||||
* @param vc source variant context.
|
||||
* @param likelihoodSums where to update the likelihood sums.
|
||||
*/
|
||||
protected abstract void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums);
|
||||
|
||||
/**
|
||||
* Transforms the genotypes of the variant context according to the new subset of possible alleles.
|
||||
*
|
||||
* @param vc original variant-context.
|
||||
* @param allelesToUse possible alleles.
|
||||
* @return never {@code null}, the new set of genotype calls for the reduced scope.
|
||||
*/
|
||||
protected abstract GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse);
|
||||
}
|
||||
|
|
@ -59,7 +59,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
|
||||
|
||||
private final int ploidy;
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
private final static boolean VERBOSE = false;
|
||||
|
||||
protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||
|
|
@ -68,22 +68,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected VariantContext reduceScope(VariantContext vc) {
|
||||
// don't try to genotype too many alternate alleles
|
||||
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles()) {
|
||||
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1);
|
||||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles(), ploidy));
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(subsetAlleles(vc, alleles, false, ploidy));
|
||||
return builder.make();
|
||||
} else {
|
||||
return vc;
|
||||
}
|
||||
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final List<Allele> allelesToUse) {
|
||||
return subsetAlleles(vc,allelesToUse,false,ploidy);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -105,8 +91,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
|
||||
public CombinedPoolLikelihoods() {
|
||||
// final int numElements = GenotypeLikelihoods.numLikelihoods();
|
||||
alleleCountSetList = new LinkedList<ExactACset>();
|
||||
conformationMap = new HashMap<ExactACcounts,ExactACset>();
|
||||
alleleCountSetList = new LinkedList<>();
|
||||
conformationMap = new HashMap<>();
|
||||
maxLikelihood = Double.NEGATIVE_INFINITY;
|
||||
}
|
||||
|
||||
|
|
@ -136,54 +122,22 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
public int getLength() {
|
||||
return alleleCountSetList.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Chooses N most likely alleles in a set of pools (samples) based on GL sum over alt alleles
|
||||
* @param vc Input variant context
|
||||
* @param numAllelesToChoose Number of alleles to choose
|
||||
* @param ploidy Ploidy per pool
|
||||
* @return list of numAllelesToChoose most likely alleles
|
||||
*/
|
||||
|
||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose, int ploidy) {
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ )
|
||||
likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
|
||||
|
||||
// based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
|
||||
@Override
|
||||
protected void reduceScopeCalculateLikelihoodSums(final VariantContext vc, final LikelihoodSum[] likelihoodSums) {
|
||||
final int numOriginalAltAlleles = likelihoodSums.length;
|
||||
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes(), false);
|
||||
for ( final double[] likelihoods : GLs ) {
|
||||
|
||||
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
|
||||
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
|
||||
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
|
||||
for (int k=1; k < acCount.length;k++) {
|
||||
if (acCount[k] > 0)
|
||||
for (int k=1; k < acCount.length;k++)
|
||||
if (acCount[k] > 0 )
|
||||
likelihoodSums[k-1].sum += acCount[k] * (likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// sort them by probability mass and choose the best ones
|
||||
Collections.sort(Arrays.asList(likelihoodSums));
|
||||
final ArrayList<Allele> bestAlleles = new ArrayList<Allele>(numAllelesToChoose);
|
||||
for ( int i = 0; i < numAllelesToChoose; i++ )
|
||||
bestAlleles.add(likelihoodSums[i].allele);
|
||||
|
||||
final ArrayList<Allele> orderedBestAlleles = new ArrayList<Allele>(numAllelesToChoose);
|
||||
for ( Allele allele : vc.getAlternateAlleles() ) {
|
||||
if ( bestAlleles.contains(allele) )
|
||||
orderedBestAlleles.add(allele);
|
||||
}
|
||||
|
||||
return orderedBestAlleles;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Simple non-optimized version that combines GLs from several pools and produces global AF distribution.
|
||||
* @param GLs Inputs genotypes context with per-pool GLs
|
||||
|
|
@ -231,9 +185,9 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
int newGLPloidy,
|
||||
int numAlleles,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
final LinkedList<ExactACset> ACqueue = new LinkedList<ExactACset>();
|
||||
final LinkedList<ExactACset> ACqueue = new LinkedList<>();
|
||||
// mapping of ExactACset indexes to the objects
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<ExactACcounts, ExactACset>();
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset = new HashMap<>();
|
||||
final CombinedPoolLikelihoods newPool = new CombinedPoolLikelihoods();
|
||||
|
||||
// add AC=0 to the queue
|
||||
|
|
@ -251,7 +205,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
getStateTracker().incNEvaluations();
|
||||
// compute log10Likelihoods
|
||||
final ExactACset ACset = ACqueue.remove();
|
||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
|
||||
|
||||
calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, ACqueue, indexesToACset);
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(ACset.getACcounts());
|
||||
|
|
@ -514,7 +469,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
final int ploidy) {
|
||||
// the genotypes with PLs
|
||||
final GenotypesContext oldGTs = vc.getGenotypes();
|
||||
List<Allele> NO_CALL_ALLELES = new ArrayList<Allele>(ploidy);
|
||||
List<Allele> NO_CALL_ALLELES = new ArrayList<>(ploidy);
|
||||
|
||||
for (int k=0; k < ploidy; k++)
|
||||
NO_CALL_ALLELES.add(Allele.NO_CALL);
|
||||
|
|
@ -584,8 +539,6 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
* @param newLikelihoods the PL array
|
||||
* @param allelesToUse the list of alleles to choose from (corresponding to the PLs)
|
||||
* @param numChromosomes Number of chromosomes per pool
|
||||
*
|
||||
* @return genotype
|
||||
*/
|
||||
private void assignGenotype(final GenotypeBuilder gb,
|
||||
final double[] newLikelihoods,
|
||||
|
|
@ -599,8 +552,8 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
|
||||
final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
|
||||
final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
|
||||
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
|
||||
final ArrayList<Double> alleleFreqs = new ArrayList<>();
|
||||
final ArrayList<Integer> alleleCounts = new ArrayList<>();
|
||||
|
||||
|
||||
for (int k=1; k < mlAlleleCount.length; k++) {
|
||||
|
|
@ -629,6 +582,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
}
|
||||
gb.alleles(myAlleles);
|
||||
|
||||
// TODO - deprecated so what is the appropriate method to call?
|
||||
if ( numNewAltAlleles > 0 )
|
||||
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ import java.util.*;
|
|||
* prior for the ith least likely allele.
|
||||
*/
|
||||
public class IndependentAllelesDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||
|
||||
/**
|
||||
* The min. confidence of an allele to be included in the joint posterior.
|
||||
*/
|
||||
|
|
@ -249,7 +250,7 @@ import java.util.*;
|
|||
final int nAlts = rootVC.getNAlleles() - 1;
|
||||
final List<Genotype> biallelicGenotypes = new ArrayList<Genotype>(rootVC.getNSamples());
|
||||
for ( final Genotype g : rootVC.getGenotypes() )
|
||||
biallelicGenotypes.add(combineGLs(g, altAlleleIndex, nAlts));
|
||||
biallelicGenotypes.add(combineGLsPrecise(g, altAlleleIndex, nAlts));
|
||||
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(rootVC);
|
||||
final Allele altAllele = rootVC.getAlternateAllele(altAlleleIndex - 1);
|
||||
|
|
@ -281,6 +282,7 @@ import java.util.*;
|
|||
*/
|
||||
@Requires({"original.hasLikelihoods()"}) // TODO -- add ploidy == 2 test "original.getPLs() == null || original.getPLs().length == 3"})
|
||||
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
|
||||
@Deprecated
|
||||
protected Genotype combineGLs(final Genotype original, final int altIndex, final int nAlts ) {
|
||||
if ( original.isNonInformative() )
|
||||
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
|
||||
|
|
@ -316,6 +318,75 @@ import java.util.*;
|
|||
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
|
||||
}
|
||||
|
||||
|
||||
private static final double PHRED_2_LOG10_COEFF = -.1;
|
||||
|
||||
/**
|
||||
* Returns a new Genotype with the PLs of the multi-allelic original reduced to a bi-allelic case.
|
||||
*
|
||||
* <p>Uses the log-sum-exp trick in order to work well with very low PLs</p>
|
||||
*
|
||||
* <p>This is handled in the following way:</p>
|
||||
*
|
||||
* <p>Suppose we have for a A/B/C site the following GLs:</p>
|
||||
*
|
||||
* <p>AA AB BB AC BC CC</p>
|
||||
*
|
||||
* <p>and we want to get the bi-allelic GLs for X/B, where X is everything not B</p>
|
||||
*
|
||||
* <p>XX = AA + AC + CC (since X = A or C)<br/>
|
||||
* XB = AB + BC <br/>
|
||||
* BB = BB <br/>
|
||||
* </p>
|
||||
* <p>
|
||||
* This implementation use the log sum trick in order to avoid numeric inestability.
|
||||
* </p>
|
||||
*
|
||||
* @param original the original multi-allelic genotype
|
||||
* @param altIndex the index of the alt allele we wish to keep in the bialleic case -- with ref == 0
|
||||
* @param nAlts the total number of alt alleles
|
||||
* @return a new biallelic genotype with appropriate PLs
|
||||
*/
|
||||
@Requires({"original.hasLikelihoods()"})
|
||||
@Ensures({"result.hasLikelihoods()", "result.getPL().length == 3"})
|
||||
protected Genotype combineGLsPrecise(final Genotype original, final int altIndex, final int nAlts ) {
|
||||
|
||||
if ( original.isNonInformative() )
|
||||
return new GenotypeBuilder(original).PL(BIALLELIC_NON_INFORMATIVE_PLS).alleles(BIALLELIC_NOCALL).make();
|
||||
|
||||
if ( altIndex < 1 || altIndex > nAlts ) throw new IllegalStateException("altIndex must be between 1 and nAlts " + nAlts);
|
||||
|
||||
final int[] pls = original.getPL();
|
||||
|
||||
final int nAlleles = nAlts + 1;
|
||||
|
||||
final int plCount = pls.length;
|
||||
|
||||
double BB = 0;
|
||||
final double[] XBvalues = new double[nAlleles - 1];
|
||||
final double[] XXvalues = new double[plCount - nAlleles];
|
||||
|
||||
int xbOffset = 0;
|
||||
int xxOffset = 0;
|
||||
for ( int index = 0; index < plCount; index++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(index);
|
||||
int i = pair.alleleIndex1;
|
||||
int j = pair.alleleIndex2;
|
||||
if (i == j) {
|
||||
if (i == altIndex) BB = PHRED_2_LOG10_COEFF * pls[index]; else XXvalues[xxOffset++] = PHRED_2_LOG10_COEFF * pls[index];
|
||||
} else if (i == altIndex || j == altIndex)
|
||||
XBvalues[xbOffset++] = PHRED_2_LOG10_COEFF * pls[index];
|
||||
else
|
||||
XXvalues[xxOffset++] = PHRED_2_LOG10_COEFF * pls[index];
|
||||
}
|
||||
|
||||
final double XB = MathUtils.log10sumLog10(XBvalues);
|
||||
final double XX = MathUtils.log10sumLog10(XXvalues);
|
||||
|
||||
final double[] GLs = new double[] { XX, XB, BB};
|
||||
return new GenotypeBuilder(original).PL(GLs).alleles(BIALLELIC_NOCALL).make();
|
||||
}
|
||||
|
||||
protected final List<AFCalcResult> applyMultiAllelicPriors(final List<AFCalcResult> conditionalPNonRefResults) {
|
||||
final ArrayList<AFCalcResult> sorted = new ArrayList<AFCalcResult>(conditionalPNonRefResults);
|
||||
|
||||
|
|
@ -340,7 +411,6 @@ import java.util.*;
|
|||
return sorted;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Take the independent estimates of pNonRef for each alt allele and combine them into a single result
|
||||
*
|
||||
|
|
|
|||
|
|
@ -57,8 +57,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.DefaultHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.haplotype.EventMap;
|
||||
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||
|
|
@ -136,7 +134,10 @@ public class GenotypingEngine {
|
|||
* @param activeRegionWindow Active window
|
||||
* @param genomeLocParser GenomeLocParser
|
||||
* @param activeAllelesToGenotype Alleles to genotype
|
||||
* @param addNonRef whether we should add a <NON_REF> alternative allele to the result variation contexts.
|
||||
*
|
||||
* @return A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes
|
||||
*
|
||||
*/
|
||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||
@Ensures("result != null")
|
||||
|
|
@ -150,7 +151,8 @@ public class GenotypingEngine {
|
|||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final RefMetaDataTracker tracker,
|
||||
final List<VariantContext> activeAllelesToGenotype ) {
|
||||
final List<VariantContext> activeAllelesToGenotype,
|
||||
final boolean addNonRef) {
|
||||
// sanity check input arguments
|
||||
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
|
||||
if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
|
||||
|
|
@ -183,16 +185,27 @@ public class GenotypingEngine {
|
|||
final List<String> priorityList = makePriorityList(eventsAtThisLoc);
|
||||
|
||||
// Merge the event to find a common reference representation
|
||||
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||
|
||||
VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(mergedVC);
|
||||
|
||||
if( mergedVC == null ) { continue; }
|
||||
|
||||
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
|
||||
// this is possible in GGA mode when the same event is represented in multiple input records
|
||||
throw new UserException("The same event (although possibly represented differently) is present in multiple input records at location " + loc + " and this is not something we can handle at this time. You will need to remove one of the records in order to proceed with your input file(s).");
|
||||
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
|
||||
? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
|
||||
if (addNonRef) {
|
||||
final List<Allele> alleleList = new ArrayList<>();
|
||||
alleleList.addAll(mergedVC.getAlleles());
|
||||
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
vcb.alleles(alleleList);
|
||||
mergedVC = vcb.make();
|
||||
}
|
||||
|
||||
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
|
||||
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
|
||||
for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) {
|
||||
for(int iii = 0; iii < eventsAtThisLoc.size(); iii++) {
|
||||
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
|
||||
}
|
||||
|
||||
|
|
@ -204,11 +217,14 @@ public class GenotypingEngine {
|
|||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() );
|
||||
|
||||
if (addNonRef) addMiscellaneousAllele(alleleReadMap);
|
||||
|
||||
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
|
||||
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
|
||||
if( call != null ) {
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
|
||||
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) );
|
||||
if (addNonRef) addMiscellaneousAllele(alleleReadMap_annotations);
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
|
||||
|
||||
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
|
||||
|
|
@ -218,16 +234,46 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
// maintain the set of all called haplotypes
|
||||
for ( final Allele calledAllele : call.getAlleles() )
|
||||
calledHaplotypes.addAll(alleleMapper.get(calledAllele));
|
||||
for ( final Allele calledAllele : call.getAlleles() ) {
|
||||
final List<Haplotype> haplotypeList = alleleMapper.get(calledAllele);
|
||||
if (haplotypeList == null) continue;
|
||||
calledHaplotypes.addAll(haplotypeList);
|
||||
}
|
||||
|
||||
returnCalls.add( annotatedCall );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new CalledHaplotypes(returnCalls, calledHaplotypes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the <NON_REF> allele
|
||||
* @param stratifiedReadMap target per-read-allele-likelihood-map.
|
||||
*/
|
||||
public static Map<String, PerReadAlleleLikelihoodMap> addMiscellaneousAllele(final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap) {
|
||||
final Allele miscellanoeusAllele = GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE;
|
||||
for (Map.Entry<String, PerReadAlleleLikelihoodMap> perSample : stratifiedReadMap.entrySet()) {
|
||||
for (Map.Entry<GATKSAMRecord, Map<Allele, Double>> perRead : perSample.getValue().getLikelihoodReadMap().entrySet()) {
|
||||
double bestLikelihood = Double.NEGATIVE_INFINITY;
|
||||
double secondBestLikelihood = Double.NEGATIVE_INFINITY;
|
||||
for (Map.Entry<Allele,Double> perAllele : perRead.getValue().entrySet()) {
|
||||
final double value = perAllele.getValue();
|
||||
if (value > bestLikelihood) {
|
||||
secondBestLikelihood = bestLikelihood;
|
||||
bestLikelihood = value;
|
||||
} else if (value < bestLikelihood && value > secondBestLikelihood) {
|
||||
secondBestLikelihood = value;
|
||||
}
|
||||
}
|
||||
final double miscellanousLikelihood = Double.isInfinite(secondBestLikelihood) ? bestLikelihood : secondBestLikelihood;
|
||||
perSample.getValue().add(perRead.getKey(),miscellanoeusAllele,miscellanousLikelihood);
|
||||
}
|
||||
}
|
||||
return stratifiedReadMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Go through the haplotypes we assembled, and decompose them into their constituent variant contexts
|
||||
*
|
||||
|
|
|
|||
|
|
@ -564,6 +564,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
public void initialize() {
|
||||
super.initialize();
|
||||
|
||||
if (dontGenotype && emitReferenceConfidence == ReferenceConfidenceMode.GVCF)
|
||||
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
|
||||
|
||||
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
|
||||
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
|
||||
SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
|
||||
logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output");
|
||||
}
|
||||
|
||||
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
|
||||
throw new UserException.BadArgumentValue("pnrm", "HaplotypeCaller doesn't currently support " + SCAC.AFmodel);
|
||||
|
||||
|
|
@ -615,24 +624,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
// where the filters are used. For example, in emitting all sites the lowQual field is used
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
|
||||
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
|
||||
if ( emitReferenceConfidence() ) {
|
||||
if ( samples.size() != 1 ) throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
|
||||
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
|
||||
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
|
||||
// a kluge to enforce the use of this indexing strategy
|
||||
if (getToolkit().getArguments().variant_index_type != OPTIMAL_GVCF_INDEX_TYPE ||
|
||||
getToolkit().getArguments().variant_index_parameter != OPTIMAL_GVCF_INDEX_PARAMETER) {
|
||||
throw new UserException.GVCFIndexException(OPTIMAL_GVCF_INDEX_TYPE, OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
}
|
||||
|
||||
try {
|
||||
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands);
|
||||
} catch ( IllegalArgumentException e ) {
|
||||
throw new UserException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
initializeReferenceConfidenceModel(samples, headerInfo);
|
||||
|
||||
vcfWriter.writeHeader(new VCFHeader(headerInfo, samples));
|
||||
|
||||
|
|
@ -688,6 +680,28 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
getToolkit().getGenomeLocParser());
|
||||
}
|
||||
|
||||
private void initializeReferenceConfidenceModel(final Set<String> samples, final Set<VCFHeaderLine> headerInfo) {
|
||||
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
|
||||
if ( emitReferenceConfidence() ) {
|
||||
if ( samples.size() != 1 ) throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
|
||||
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
|
||||
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
|
||||
// a kluge to enforce the use of this indexing strategy
|
||||
if (getToolkit().getArguments().variant_index_type != OPTIMAL_GVCF_INDEX_TYPE ||
|
||||
getToolkit().getArguments().variant_index_parameter != OPTIMAL_GVCF_INDEX_PARAMETER) {
|
||||
throw new UserException.GVCFIndexException(OPTIMAL_GVCF_INDEX_TYPE, OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
}
|
||||
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = 0.0;
|
||||
|
||||
try {
|
||||
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands);
|
||||
} catch ( IllegalArgumentException e ) {
|
||||
throw new UserException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates the appropriate likelihood calculation engine.
|
||||
*
|
||||
|
|
@ -833,10 +847,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap =
|
||||
likelihoodCalculationEngine.computeReadLikelihoods(assemblyResult,reads);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Note: we used to subset down at this point to only the "best" haplotypes in all samples for genotyping, but there
|
||||
// was a bad interaction between that selection and the marginalization that happens over each event when computing
|
||||
// GLs. In particular, for samples that are heterozygous non-reference (B/C) the marginalization for B treats the
|
||||
|
|
@ -852,7 +862,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
regionForGenotyping.getLocation(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
metaDataTracker,
|
||||
activeAllelesToGenotype );
|
||||
activeAllelesToGenotype, emitReferenceConfidence() );
|
||||
|
||||
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
|
||||
if ( bamWriter != null ) {
|
||||
|
|
@ -866,8 +876,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
|
||||
if( DEBUG ) { logger.info("----------------------------------------------------------------------------------"); }
|
||||
|
||||
|
||||
if ( emitReferenceConfidence() ) {
|
||||
if ( calledHaplotypes.getCalls().isEmpty() ) {
|
||||
if ( !containsCalls(calledHaplotypes) ) {
|
||||
// no called all of the potential haplotypes
|
||||
return referenceModelForNoVariation(originalActiveRegion, false);
|
||||
} else
|
||||
|
|
@ -879,6 +890,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
}
|
||||
}
|
||||
|
||||
private boolean containsCalls(final GenotypingEngine.CalledHaplotypes calledHaplotypes) {
|
||||
final List<VariantContext> calls = calledHaplotypes.getCalls();
|
||||
if (calls.isEmpty()) return false;
|
||||
for (final VariantContext call : calls)
|
||||
for (final Genotype genotype : call.getGenotypes())
|
||||
if (genotype.isCalled())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* High-level function that runs the assembler on the active region reads,
|
||||
* returning a data structure with the resulting information needed
|
||||
|
|
|
|||
|
|
@ -48,7 +48,10 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||
|
|
@ -63,9 +66,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFSimpleHeaderLine;
|
||||
|
||||
import java.io.File;
|
||||
|
|
|
|||
|
|
@ -46,8 +46,6 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.gvcf;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.ReferenceConfidenceModel;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
|
||||
|
|
@ -56,7 +54,10 @@ import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
|||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Genome-wide VCF writer
|
||||
|
|
@ -287,7 +288,7 @@ public class GVCFWriter implements VariantContextWriter {
|
|||
}
|
||||
|
||||
final Genotype g = vc.getGenotype(0);
|
||||
if ( g.isHomRef() && vc.hasAlternateAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) ) {
|
||||
if ( g.isHomRef() && vc.hasAlternateAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE) && vc.isBiallelic() ) {
|
||||
// create bands
|
||||
final VariantContext maybeCompletedBand = addHomRefSite(vc, g);
|
||||
if ( maybeCompletedBand != null ) underlyingWriter.add(maybeCompletedBand);
|
||||
|
|
|
|||
|
|
@ -118,7 +118,6 @@ final class HomRefBlock {
|
|||
if ( g == null ) throw new IllegalArgumentException("g cannot be null");
|
||||
if ( ! g.hasGQ() ) throw new IllegalArgumentException("g must have GQ field");
|
||||
if ( ! g.hasPL() ) throw new IllegalArgumentException("g must have PL field");
|
||||
if ( ! g.hasDP() ) throw new IllegalArgumentException("g must have DP field");
|
||||
if ( pos != stop + 1 ) throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous stop " + stop);
|
||||
|
||||
if( minPLs == null ) { // if the minPLs vector has not been set yet, create it here by copying the provided genotype's PLs
|
||||
|
|
@ -136,7 +135,7 @@ final class HomRefBlock {
|
|||
}
|
||||
stop = pos;
|
||||
GQs.add(Math.min(g.getGQ(), 99)); // cap the GQs by the max. of 99 emission
|
||||
DPs.add(g.getDP());
|
||||
DPs.add(Math.max(g.getDP(),0));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("e7cb959912ea964bf9c897904aa5220b"));
|
||||
Arrays.asList("f5a62ecb8d32f6161b2ac7682c9f711d"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("f29b3fa9d5642297cfc4b10aa2137c68"));
|
||||
Arrays.asList("8897652c7516a91d22bc678f2189131e"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -106,14 +106,22 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "TestCombineGLs")
|
||||
private void testCombineGLs(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
|
||||
public void testCombineGLsPrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
|
||||
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
|
||||
final Genotype combined = calc.combineGLs(testg, altIndex, nAlts);
|
||||
final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts);
|
||||
|
||||
Assert.assertEquals(combined.getPL(), expected.getPL(),
|
||||
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "TestCombineGLs")
|
||||
public void testCombinePrecise(final int altIndex, final int nAlts, final Genotype testg, final Genotype expected) {
|
||||
final IndependentAllelesDiploidExactAFCalc calc = (IndependentAllelesDiploidExactAFCalc)AFCalcFactory.createAFCalc(AFCalcFactory.Calculation.EXACT_INDEPENDENT, 1, 4);
|
||||
final Genotype combined = calc.combineGLsPrecise(testg, altIndex, nAlts);
|
||||
|
||||
Assert.assertEquals(combined.getPL(), expected.getPL(),
|
||||
"Combined PLs " + Utils.join(",", combined.getPL()) + " != expected " + Utils.join(",", expected.getPL()));
|
||||
}
|
||||
|
||||
static Allele A = Allele.create("A", true);
|
||||
static Allele C = Allele.create("C");
|
||||
|
|
|
|||
|
|
@ -57,9 +57,13 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -279,6 +283,60 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertTrue(compareVCMaps(calculatedMap, expectedMap));
|
||||
}
|
||||
|
||||
@Test(dataProvider="AddMiscellaneousDataProvider", enabled=false)
|
||||
public void testAddMiscellaneousAllele(final String readBases, final int readOffset,
|
||||
final String ref, final int refOffset,
|
||||
final String referenceAllele, final String[] alternatives, final double[] likelihoods, final double[] expected) {
|
||||
final byte baseQual = (byte)30;
|
||||
|
||||
final byte[] baseQuals = Utils.dupBytes(baseQual, readBases.length());
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(readBases.getBytes(), baseQuals, readBases.length() + "M");
|
||||
final GenomeLoc loc = new UnvalidatingGenomeLoc("20",0,refOffset,refOffset);
|
||||
final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc,Collections.singletonList(read),readOffset);
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder();
|
||||
final GenotypeBuilder gb = new GenotypeBuilder();
|
||||
final List<String> alleleStrings = new ArrayList<>( 1 + alternatives.length);
|
||||
alleleStrings.add(referenceAllele);
|
||||
alleleStrings.addAll(Arrays.asList(alternatives));
|
||||
|
||||
gb.AD(new int[] { 1 });
|
||||
gb.DP(1);
|
||||
gb.PL(likelihoods);
|
||||
|
||||
vcb.alleles(alleleStrings);
|
||||
vcb.loc("20",refOffset,refOffset + referenceAllele.length() -1);
|
||||
|
||||
vcb.genotypes(gb.make());
|
||||
|
||||
final VariantContext vc = vcb.make();
|
||||
|
||||
final VariantContext updatedVc = null; // GenotypingEngine.addMiscellaneousAllele(vc,pileup,ref.getBytes(),0);
|
||||
final GenotypeLikelihoods updatedLikelihoods = updatedVc.getGenotype(0).getLikelihoods();
|
||||
Assert.assertEquals(updatedLikelihoods.getAsVector().length, expected.length);
|
||||
final double[] updatedLikelihoodsArray = updatedVc.getGenotype(0).getLikelihoods().getAsVector();
|
||||
for (int i = 0; i < updatedLikelihoodsArray.length; i++) {
|
||||
Assert.assertEquals(updatedLikelihoodsArray[i],expected[i],0.0001);
|
||||
}
|
||||
Allele altAllele = null;
|
||||
for (final Allele allele : updatedVc.getAlleles())
|
||||
if (allele.isSymbolic() && allele.getBaseString().equals(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE_NAME))
|
||||
altAllele = allele;
|
||||
Assert.assertNotNull(altAllele);
|
||||
}
|
||||
|
||||
@DataProvider(name="AddMiscellaneousDataProvider")
|
||||
public Iterator<Object[]> addMiscellaneousAlleleDataProvider() {
|
||||
return Arrays.asList(ADD_MISCELLANEOUS_ALLELE_DATA).iterator();
|
||||
}
|
||||
|
||||
private static final double MATCH_LnLK = QualityUtils.qualToProbLog10((byte)30);
|
||||
private static final double MISS_LnLK = QualityUtils.qualToErrorProbLog10((byte)30);
|
||||
|
||||
private static final Object[][] ADD_MISCELLANEOUS_ALLELE_DATA = new Object[][] {
|
||||
new Object[] {"ACTG", 0,"ACTGTGAGTATTCC",0,"A",new String[]{}, new double[] {MATCH_LnLK * MATCH_LnLK}, 6,
|
||||
new double[] {MATCH_LnLK * MATCH_LnLK,MATCH_LnLK * MISS_LnLK, MISS_LnLK * MISS_LnLK}}
|
||||
};
|
||||
|
||||
/**
|
||||
* Private function to compare Map of VCs, it only checks the types and start locations of the VariantContext
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -66,11 +66,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "4bb8e44b2d04757f8b11ca6400828357"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "8ddc291584f56e27d125b6a0523f2703"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "1dfe1fc8079938adf1565450671094d4"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d98245380500a6decfc26dcaadb2c4d2"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "e53e164cc3f5cbd5fba083f2cdb98a88"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "a258dbbfabe88dad11d57151cd256831"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "c4c28e74eda133f99e0864ad16c965c4"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "e4d36f165b2ddbb923d3c9a402e96f1b"});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,10 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||
|
|
@ -61,10 +58,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
|||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeType;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
|
|
|
|||
|
|
@ -492,13 +492,62 @@ public class MathUtils {
|
|||
return result;
|
||||
}
|
||||
|
||||
private static final double LOG1MEXP_THRESHOLD = Math.log(0.5);
|
||||
|
||||
private static final double LN_10 = Math.log(10);
|
||||
|
||||
/**
|
||||
* Calculates {@code log(1-exp(a))} without loosing precision.
|
||||
*
|
||||
* <p>
|
||||
* This is based on the approach described in:
|
||||
*
|
||||
* </p>
|
||||
* <p>
|
||||
* Maechler M, Accurately Computing log(1-exp(-|a|)) Assessed by the Rmpfr package, 2012 <br/>
|
||||
* <a ref="http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf">Online document</a>.
|
||||
*
|
||||
* </p>
|
||||
*
|
||||
* @param a the input exponent.
|
||||
* @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value.
|
||||
*/
|
||||
public static double log1mexp(final double a) {
|
||||
if (a > 0) return Double.NaN;
|
||||
if (a == 0) return Double.NEGATIVE_INFINITY;
|
||||
|
||||
return (a < LOG1MEXP_THRESHOLD) ? Math.log1p(-Math.exp(a)) : Math.log(-Math.expm1(a));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates {@code log10(1-10^a)} without loosing precision.
|
||||
*
|
||||
* <p>
|
||||
* This is based on the approach described in:
|
||||
*
|
||||
* </p>
|
||||
* <p>
|
||||
* Maechler M, Accurately Computing log(1-exp(-|a|)) Assessed by the Rmpfr package, 2012 <br/>
|
||||
* <a ref="http://cran.r-project.org/web/packages/Rmpfr/vignettes/log1mexp-note.pdf">Online document</a>.
|
||||
* </p>
|
||||
*
|
||||
* @param a the input exponent.
|
||||
* @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value.
|
||||
*/
|
||||
public static double log10OneMinusPow10(final double a) {
|
||||
if (a > 0) return Double.NaN;
|
||||
if (a == 0) return Double.NEGATIVE_INFINITY;
|
||||
final double b = a * LN_10;
|
||||
return log1mexp(b) / LN_10;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the log10 of the multinomial coefficient. Designed to prevent
|
||||
* overflows even with very large numbers.
|
||||
*
|
||||
* @param n total number of trials
|
||||
* @param k array of any size with the number of successes for each grouping (k1, k2, k3, ..., km)
|
||||
* @return
|
||||
* @return {@link Double#NaN NaN} if {@code a > 0}, otherwise the corresponding value.
|
||||
*/
|
||||
public static double log10MultinomialCoefficient(final int n, final int[] k) {
|
||||
if ( n < 0 )
|
||||
|
|
|
|||
|
|
@ -2115,4 +2115,79 @@ public class GATKVariantContextUtils {
|
|||
if ( ! vc1.getAlleles().equals(vc2.getAlleles()) ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the absolute 0-based index of an allele.
|
||||
*
|
||||
* <p/>
|
||||
* If the allele is equal to the reference, the result is 0, if it equal to the first alternative the result is 1
|
||||
* and so forth.
|
||||
* <p/>
|
||||
* Therefore if you want the 0-based index within the alternative alleles you need to do the following:
|
||||
*
|
||||
* <p/>
|
||||
* You can indicate whether the Java object reference comparator {@code ==} can be safelly used by setting {@code useEquals} to {@code false}.
|
||||
*
|
||||
* @param vc the target variant context.
|
||||
* @param allele the target allele.
|
||||
* @param ignoreRefState whether the reference states of the allele is important at all. Has no effect if {@code useEquals} is {@code false}.
|
||||
* @param considerRefAllele whether the reference allele should be considered. You should set it to {@code false} if you are only interested in alternative alleles.
|
||||
* @param useEquals whether equal method should be used in the search: {@link Allele#equals(Allele,boolean)}.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code allele} is {@code null}.
|
||||
* @return {@code -1} if there is no such allele that satify those criteria, a value between 0 and {@link VariantContext#getNAlleles()} {@code -1} otherwise.
|
||||
*/
|
||||
public static int indexOfAllele(final VariantContext vc, final Allele allele, final boolean ignoreRefState, final boolean considerRefAllele, final boolean useEquals) {
|
||||
if (allele == null) throw new IllegalArgumentException();
|
||||
return useEquals ? indexOfEqualAllele(vc,allele,ignoreRefState,considerRefAllele) : indexOfSameAllele(vc,allele,considerRefAllele);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the relative 0-based index of an alternative allele.
|
||||
* <p/>
|
||||
* The the query allele is the same as the first alternative allele, the result is 0,
|
||||
* if it is equal to the second 1 and so forth.
|
||||
*
|
||||
*
|
||||
* <p/>
|
||||
* Notice that the ref-status of the query {@code allele} is ignored.
|
||||
*
|
||||
* @param vc the target variant context.
|
||||
* @param allele the query allele.
|
||||
* @param useEquals whether equal method should be used in the search: {@link Allele#equals(Allele,boolean)}.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code allele} is {@code null}.
|
||||
*
|
||||
* @return {@code -1} if there is no such allele that satify those criteria, a value between 0 and the number
|
||||
* of alternative alleles - 1.
|
||||
*/
|
||||
public static int indexOfAltAllele(final VariantContext vc, final Allele allele, final boolean useEquals) {
|
||||
final int absoluteIndex = indexOfAllele(vc,allele,true,false,useEquals);
|
||||
return absoluteIndex == -1 ? -1 : absoluteIndex - 1;
|
||||
}
|
||||
|
||||
// Impements index search using equals.
|
||||
private static int indexOfEqualAllele(final VariantContext vc, final Allele allele, final boolean ignoreRefState,
|
||||
final boolean considerRefAllele) {
|
||||
int i = 0;
|
||||
for (final Allele a : vc.getAlleles())
|
||||
if (a.equals(allele,ignoreRefState))
|
||||
return i == 0 ? (considerRefAllele ? 0 : -1) : i;
|
||||
else
|
||||
i++;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Implements index search using ==.
|
||||
private static int indexOfSameAllele(final VariantContext vc, final Allele allele, final boolean considerRefAllele) {
|
||||
int i = 0;
|
||||
|
||||
for (final Allele a : vc.getAlleles())
|
||||
if (a == allele)
|
||||
return i == 0 ? (considerRefAllele ? 0 : -1) : i;
|
||||
else
|
||||
i++;
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,18 +39,19 @@ import java.util.*;
|
|||
* Basic unit test for MathUtils
|
||||
*/
|
||||
public class MathUtilsUnitTest extends BaseTest {
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that we get unqiue values for the valid (non-null-producing) input space for {@link MathUtils#fastGenerateUniqueHashFromThreeIntegers(int, int, int)}.
|
||||
* Tests that we get unique values for the valid (non-null-producing) input space for {@link MathUtils#fastGenerateUniqueHashFromThreeIntegers(int, int, int)}.
|
||||
*/
|
||||
@Test
|
||||
public void testGenerateUniqueHashFromThreePositiveIntegers() {
|
||||
logger.warn("Executing testGenerateUniqueHashFromThreePositiveIntegers");
|
||||
|
||||
final Set<Long> observedLongs = new HashSet<Long>();
|
||||
final Set<Long> observedLongs = new HashSet<>();
|
||||
for (short i = 0; i < Byte.MAX_VALUE; i++) {
|
||||
for (short j = 0; j < Byte.MAX_VALUE; j++) {
|
||||
for (short k = 0; k < Byte.MAX_VALUE; k++) {
|
||||
|
|
@ -72,6 +73,80 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "log10OneMinusPow10Data")
|
||||
public void testLog10OneMinusPow10(final double x, final double expected) {
|
||||
final double actual = MathUtils.log10OneMinusPow10(x);
|
||||
if (Double.isNaN(expected))
|
||||
Assert.assertTrue(Double.isNaN(actual));
|
||||
else
|
||||
Assert.assertEquals(actual,expected,1E-9);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "log1mexpData")
|
||||
public void testLog1mexp(final double x, final double expected) {
|
||||
final double actual = MathUtils.log1mexp(x);
|
||||
if (Double.isNaN(expected))
|
||||
Assert.assertTrue(Double.isNaN(actual));
|
||||
else
|
||||
Assert.assertEquals(actual,expected,1E-9);
|
||||
}
|
||||
|
||||
@DataProvider(name = "log10OneMinusPow10Data")
|
||||
public Iterator<Object[]> log10OneMinusPow10Data() {
|
||||
|
||||
final double[] inValues = new double[] { Double.NaN, 10, 1, 0, -1, -3, -10, -30, -100, -300, -1000, -3000 };
|
||||
return new Iterator<Object[]>() {
|
||||
|
||||
private int i = 0;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return i < inValues.length;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object[] next() {
|
||||
final double input = inValues[i++];
|
||||
final double output = Math.log10( 1 - Math.pow(10,input));
|
||||
return new Object[] { input, output };
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@DataProvider(name = "log1mexpData")
|
||||
public Iterator<Object[]> log1mexpData() {
|
||||
|
||||
final double[] inValues = new double[] { Double.NaN, 10, 1, 0, -1, -3, -10, -30, -100, -300, -1000, -3000 };
|
||||
return new Iterator<Object[]>() {
|
||||
|
||||
private int i = 0;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return i < inValues.length;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object[] next() {
|
||||
final double input = inValues[i++];
|
||||
final double output = Math.log( 1 - Math.exp(input));
|
||||
return new Object[] { input, output };
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that we get the right values from the binomial distribution
|
||||
*/
|
||||
|
|
@ -144,7 +219,7 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
Assert.assertTrue(MathUtils.sampleIndicesWithReplacement(5, 1000).size() == 1000);
|
||||
|
||||
// Check that the list contains only the k element range that as asked for - no more, no less
|
||||
List<Integer> Five = new ArrayList<Integer>();
|
||||
List<Integer> Five = new ArrayList<>();
|
||||
Collections.addAll(Five, 0, 1, 2, 3, 4);
|
||||
List<Integer> BigFive = MathUtils.sampleIndicesWithReplacement(5, 10000);
|
||||
Assert.assertTrue(BigFive.containsAll(Five));
|
||||
|
|
@ -160,9 +235,9 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
|
||||
// Check that the list contains only the k element range that as asked for - no more, no less but now
|
||||
// use the index list to pull elements from another list using sliceListByIndices
|
||||
List<Integer> Five = new ArrayList<Integer>();
|
||||
List<Integer> Five = new ArrayList<>();
|
||||
Collections.addAll(Five, 0, 1, 2, 3, 4);
|
||||
List<Character> FiveAlpha = new ArrayList<Character>();
|
||||
List<Character> FiveAlpha = new ArrayList<>();
|
||||
Collections.addAll(FiveAlpha, 'a', 'b', 'c', 'd', 'e');
|
||||
List<Integer> BigFive = MathUtils.sampleIndicesWithReplacement(5, 10000);
|
||||
List<Character> BigFiveAlpha = MathUtils.sliceListByIndices(BigFive, FiveAlpha);
|
||||
|
|
@ -180,8 +255,8 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
int[] numbers = {1, 2, 4, 5, 3, 128, 25678, -24};
|
||||
MathUtils.RunningAverage r = new MathUtils.RunningAverage();
|
||||
|
||||
for (int i = 0; i < numbers.length; i++)
|
||||
r.add((double) numbers[i]);
|
||||
for (final double b : numbers)
|
||||
r.add(b);
|
||||
|
||||
Assert.assertEquals((long) numbers.length, r.observationCount());
|
||||
Assert.assertTrue(r.mean() - 3224.625 < 2e-10);
|
||||
|
|
@ -253,24 +328,6 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Private functions used by testArrayShuffle()
|
||||
*/
|
||||
private boolean hasUniqueElements(Object[] x) {
|
||||
for (int i = 0; i < x.length; i++)
|
||||
for (int j = i + 1; j < x.length; j++)
|
||||
if (x[i].equals(x[j]) || x[i] == x[j])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean hasAllElements(final Object[] expected, final Object[] actual) {
|
||||
HashSet<Object> set = new HashSet<Object>();
|
||||
set.addAll(Arrays.asList(expected));
|
||||
set.removeAll(Arrays.asList(actual));
|
||||
return set.isEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testApproximateLog10SumLog10() {
|
||||
|
||||
|
|
@ -451,7 +508,7 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "ArrayMinData")
|
||||
public Object[][] makeArrayMinData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{Arrays.asList(10), 10});
|
||||
|
|
@ -554,9 +611,11 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
// generate the partitions of an integer, each partition sorted numerically
|
||||
int n;
|
||||
List<Integer> a;
|
||||
|
||||
int y;
|
||||
int k;
|
||||
int state;
|
||||
|
||||
int x;
|
||||
int l;
|
||||
|
||||
|
|
@ -564,7 +623,7 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
this.n = n;
|
||||
this.y = n - 1;
|
||||
this.k = 1;
|
||||
this.a = new ArrayList<Integer>();
|
||||
this.a = new ArrayList<>();
|
||||
for ( int i = 0; i < n; i++ ) {
|
||||
this.a.add(i);
|
||||
}
|
||||
|
|
@ -588,9 +647,9 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
if ( this.state == 1 ) {
|
||||
while ( 2*x <= y ) {
|
||||
while ( 2 * x <= y ) {
|
||||
this.a.set(k,x);
|
||||
this.y -= x;
|
||||
this.y -= (int) x;
|
||||
this.k++;
|
||||
}
|
||||
this.l = 1+this.k;
|
||||
|
|
@ -620,7 +679,7 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
final StringBuilder buf = new StringBuilder();
|
||||
buf.append("{ ");
|
||||
while ( hasNext() ) {
|
||||
buf.append("[");
|
||||
|
|
@ -671,12 +730,7 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private int[] clone(int[] arr) {
|
||||
int[] a = new int[arr.length];
|
||||
for ( int idx = 0; idx < a.length ; idx ++) {
|
||||
a[idx] = arr[idx];
|
||||
}
|
||||
|
||||
return a;
|
||||
return Arrays.copyOf(arr, arr.length);
|
||||
}
|
||||
|
||||
private int[] nextFromPartitioner() {
|
||||
|
|
@ -834,7 +888,7 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
for ( double[] alleles : testAlleles ) {
|
||||
for ( int count : numAlleleSampled ) {
|
||||
// test that everything sums to one. Generate all multinomial draws
|
||||
List<Double> likelihoods = new ArrayList<Double>(100000);
|
||||
List<Double> likelihoods = new ArrayList<>(100000);
|
||||
NextCounts generator = new NextCounts(alleles.length,count);
|
||||
double maxLog = Double.MIN_VALUE;
|
||||
//List<String> countLog = new ArrayList<String>(200);
|
||||
|
|
|
|||
|
|
@ -1627,4 +1627,82 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(result.getGenotype(expectedGenotype.getSampleName()).toString(), expectedGenotype.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "indexOfAlleleData")
|
||||
public void testIndexOfAllele(final Allele reference, final List<Allele> altAlleles, final List<Allele> otherAlleles) {
|
||||
final List<Allele> alleles = new ArrayList<>(altAlleles.size() + 1);
|
||||
alleles.add(reference);
|
||||
alleles.addAll(altAlleles);
|
||||
final VariantContext vc = makeVC("Source", alleles);
|
||||
|
||||
for (int i = 0; i < alleles.size(); i++) {
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,true,true),i);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,true,true),i);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,true,false),i);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,true,false),i);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i),true),true,true,true),i);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i),true),true,true,false),-1);
|
||||
if (i == 0) {
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,false,true),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,false,true),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),true,false,false),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,alleles.get(i),false,false,false),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i).getBases(),true),false,true,true),i);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,Allele.create(alleles.get(i).getBases(),false),false,true,true),-1);
|
||||
} else {
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,alleles.get(i),true),i - 1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,alleles.get(i),false), i - 1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,Allele.create(alleles.get(i),true),true),i-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAltAllele(vc,Allele.create(alleles.get(i),true),false),-1);
|
||||
}
|
||||
}
|
||||
|
||||
for (final Allele other : otherAlleles) {
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,true,true),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,true,true),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,true,false),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,true,false),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,false,true),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,false,true),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,true,false,false),-1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.indexOfAllele(vc,other,false,false,false),-1);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "indexOfAlleleData")
|
||||
public Iterator<Object[]> indexOfAlleleData() {
|
||||
|
||||
final Allele[] ALTERNATIVE_ALLELES = new Allele[] { T, C, G, ATC, ATCATC};
|
||||
|
||||
final int lastMask = 0x1F;
|
||||
|
||||
return new Iterator<Object[]>() {
|
||||
|
||||
int nextMask = 0;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return nextMask <= lastMask;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object[] next() {
|
||||
|
||||
int mask = nextMask++;
|
||||
final List<Allele> includedAlleles = new ArrayList<>(5);
|
||||
final List<Allele> excludedAlleles = new ArrayList<>(5);
|
||||
for (int i = 0; i < ALTERNATIVE_ALLELES.length; i++) {
|
||||
((mask & 1) == 1 ? includedAlleles : excludedAlleles).add(ALTERNATIVE_ALLELES[i]);
|
||||
mask >>= 1;
|
||||
}
|
||||
return new Object[] { Aref , includedAlleles, excludedAlleles};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue