Merged bug fix from Stable into Unstable
This commit is contained in:
commit
be9e48ba29
|
|
@ -42,7 +42,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
|
|||
if (!vc.isBiallelic()) {
|
||||
// for non-bliallelic case, do test with most common alt allele.
|
||||
// Get then corresponding indeces in GL vectors to retrieve GL of AA,AB and BB.
|
||||
int[] idxVector = vc.getGLIndecesOfAllele(vc.getAltAlleleWithHighestAlleleCount());
|
||||
int[] idxVector = vc.getGLIndecesOfAlternateAllele(vc.getAltAlleleWithHighestAlleleCount());
|
||||
idxAA = idxVector[0];
|
||||
idxAB = idxVector[1];
|
||||
idxBB = idxVector[2];
|
||||
|
|
|
|||
|
|
@ -64,7 +64,6 @@ public enum DiploidGenotype {
|
|||
return r != base2;
|
||||
else
|
||||
return base2 == r;
|
||||
//return MathUtils.countOccurrences(r, this.toString()) == 1;
|
||||
}
|
||||
|
||||
public boolean isHom() {
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -61,7 +60,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
|
||||
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result, false);
|
||||
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
|
@ -85,21 +84,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
for ( int i = 0; i < numOriginalAltAlleles; i++ )
|
||||
likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
|
||||
|
||||
// make sure that we've cached enough data
|
||||
if ( numOriginalAltAlleles > UnifiedGenotyperEngine.PLIndexToAlleleIndex.length - 1 )
|
||||
UnifiedGenotyperEngine.calculatePLcache(numOriginalAltAlleles);
|
||||
|
||||
// based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
|
||||
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes());
|
||||
for ( final double[] likelihoods : GLs ) {
|
||||
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
|
||||
if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) {
|
||||
int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[numOriginalAltAlleles][PLindexOfBestGL];
|
||||
if ( alleles[0] != 0 )
|
||||
likelihoodSums[alleles[0]-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL);
|
||||
if ( alleles.alleleIndex1 != 0 )
|
||||
likelihoodSums[alleles.alleleIndex1-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
// don't double-count it
|
||||
if ( alleles[1] != 0 && alleles[1] != alleles[0] )
|
||||
likelihoodSums[alleles[1]-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
if ( alleles.alleleIndex2 != 0 && alleles.alleleIndex2 != alleles.alleleIndex1 )
|
||||
likelihoodSums[alleles.alleleIndex2-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -189,24 +184,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
// the column of the matrix
|
||||
final double[] log10Likelihoods;
|
||||
|
||||
// mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column;
|
||||
// for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB.
|
||||
final HashMap<ExactACcounts, Integer> ACsetIndexToPLIndex = new HashMap<ExactACcounts, Integer>();
|
||||
|
||||
// to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them
|
||||
final ArrayList<ExactACcounts> dependentACsetsToDelete = new ArrayList<ExactACcounts>();
|
||||
|
||||
int sum = -1;
|
||||
|
||||
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||
this.ACcounts = ACcounts;
|
||||
log10Likelihoods = new double[size];
|
||||
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
// sum of all the non-reference alleles
|
||||
public int getACsum() {
|
||||
int sum = 0;
|
||||
for ( int count : ACcounts.getCounts() )
|
||||
sum += count;
|
||||
if ( sum == -1 ) {
|
||||
sum = 0;
|
||||
for ( int count : ACcounts.getCounts() )
|
||||
sum += count;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
|
@ -215,15 +207,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO -- remove me
|
||||
public static void linearExactMultiAllelic(final GenotypesContext GLs,
|
||||
final int numAlternateAlleles,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result,
|
||||
final boolean preserveData) {
|
||||
final boolean foo) {
|
||||
linearExactMultiAllelic(GLs, numAlternateAlleles, log10AlleleFrequencyPriors, result);
|
||||
}
|
||||
|
||||
// make sure the PL cache has been initialized
|
||||
if ( UnifiedGenotyperEngine.PLIndexToAlleleIndex == null )
|
||||
UnifiedGenotyperEngine.calculatePLcache(5);
|
||||
|
||||
|
||||
public static void linearExactMultiAllelic(final GenotypesContext GLs,
|
||||
final int numAlternateAlleles,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||
final int numSamples = genotypeLikelihoods.size()-1;
|
||||
|
|
@ -241,21 +239,20 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
|
||||
// optimization: create the temporary storage for computing L(j,k) just once
|
||||
final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1;
|
||||
final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies];
|
||||
for ( int i = 0; i < maxPossibleDependencies; i++ )
|
||||
tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY;
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods);
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(set.ACcounts);
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -273,27 +270,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final double maxLog10L,
|
||||
final int numChr,
|
||||
final boolean preserveData,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result,
|
||||
final double[][] tempLog10ConformationLikelihoods) {
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
|
||||
|
||||
// compute the log10Likelihoods
|
||||
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods);
|
||||
|
||||
// clean up memory
|
||||
if ( !preserveData ) {
|
||||
for ( ExactACcounts index : set.dependentACsetsToDelete ) {
|
||||
indexesToACset.remove(index);
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
|
||||
}
|
||||
}
|
||||
computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, result);
|
||||
|
||||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
|
||||
|
|
@ -301,11 +287,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
|
||||
// no reason to keep this data around because nothing depends on it
|
||||
if ( !preserveData )
|
||||
indexesToACset.remove(set.ACcounts);
|
||||
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
|
|
@ -316,15 +297,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
final int numAltAlleles = set.ACcounts.getCounts().length;
|
||||
|
||||
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
|
||||
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
|
||||
|
||||
// add conformations for the k+1 case
|
||||
int PLindex = 0;
|
||||
for ( int allele = 0; allele < numAltAlleles; allele++ ) {
|
||||
final int[] ACcountsClone = set.ACcounts.getCounts().clone();
|
||||
ACcountsClone[allele]++;
|
||||
updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset);
|
||||
// to get to this conformation, a sample would need to be AB (remember that ref=0)
|
||||
final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1);
|
||||
updateACset(ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
|
||||
|
|
@ -338,71 +317,51 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
ACcountsClone[allele_i]++;
|
||||
ACcountsClone[allele_j]++;
|
||||
|
||||
// to get to this conformation, a sample would need to be BB or BC (remember that ref=0, so add one to the index)
|
||||
final int PLindex = GenotypeLikelihoods.calculatePLindex(allele_i+1, allele_j+1);
|
||||
if ( allele_i == allele_j )
|
||||
sameAlleles.add(new DependentSet(ACcountsClone, ++PLindex));
|
||||
sameAlleles.add(new DependentSet(ACcountsClone, PLindex));
|
||||
else
|
||||
differentAlleles.add(new DependentSet(ACcountsClone, ++PLindex));
|
||||
differentAlleles.add(new DependentSet(ACcountsClone, PLindex));
|
||||
}
|
||||
}
|
||||
|
||||
// IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
|
||||
for ( DependentSet dependent : differentAlleles )
|
||||
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset);
|
||||
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
for ( DependentSet dependent : sameAlleles )
|
||||
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset);
|
||||
}
|
||||
|
||||
// determine which is the last dependent set in the queue (not necessarily the last one added above) so we can know when it is safe to clean up this column
|
||||
if ( !preserveData ) {
|
||||
final ExactACset lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
|
||||
if ( lastSet != null )
|
||||
lastSet.dependentACsetsToDelete.add(set.ACcounts);
|
||||
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
|
||||
// also adds it as a dependency to the given callingSetIndex.
|
||||
// returns the ExactACset if that set was not already in the queue and null otherwise.
|
||||
private static void updateACset(final int[] ACcounts,
|
||||
// also pushes its value to the given callingSetIndex.
|
||||
private static void updateACset(final int[] newSetCounts,
|
||||
final int numChr,
|
||||
final ExactACset callingSet,
|
||||
final ExactACset dependentSet,
|
||||
final int PLsetIndex,
|
||||
final Queue<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||
final ExactACcounts index = new ExactACcounts(ACcounts);
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final ArrayList<double[]> genotypeLikelihoods) {
|
||||
final ExactACcounts index = new ExactACcounts(newSetCounts);
|
||||
if ( !indexesToACset.containsKey(index) ) {
|
||||
ExactACset set = new ExactACset(numChr/2 +1, index);
|
||||
indexesToACset.put(index, set);
|
||||
ACqueue.add(set);
|
||||
}
|
||||
|
||||
// add the given dependency to the set
|
||||
// push data from the dependency to the new set
|
||||
//if ( DEBUG )
|
||||
// System.out.println(" *** adding dependency from " + index + " to " + callingSet.ACcounts);
|
||||
final ExactACset set = indexesToACset.get(index);
|
||||
set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex);
|
||||
}
|
||||
|
||||
private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final LinkedList<ExactACset> ACqueue) {
|
||||
Iterator<ExactACset> reverseIterator = ACqueue.descendingIterator();
|
||||
while ( reverseIterator.hasNext() ) {
|
||||
final ExactACset queued = reverseIterator.next();
|
||||
if ( queued.ACsetIndexToPLIndex.containsKey(callingSetIndex) )
|
||||
return queued;
|
||||
}
|
||||
|
||||
// shouldn't get here
|
||||
throw new ReviewedStingException("Error: no sets in the queue currently hold " + callingSetIndex + " as a dependent!");
|
||||
// System.out.println(" *** pushing data from " + index + " to " + dependencySet.ACcounts);
|
||||
pushData(indexesToACset.get(index), dependentSet, PLsetIndex, genotypeLikelihoods);
|
||||
}
|
||||
|
||||
private static void computeLofK(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result,
|
||||
final double[][] tempLog10ConformationLikelihoods) {
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
set.log10Likelihoods[0] = 0.0; // the zero case
|
||||
final int totalK = set.getACsum();
|
||||
|
|
@ -414,42 +373,18 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
// k > 0 for at least one k
|
||||
else {
|
||||
// deal with the non-AA possible conformations
|
||||
int conformationIndex = 1;
|
||||
for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
|
||||
|
||||
ExactACset dependent = indexesToACset.get(mapping.getKey());
|
||||
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
||||
|
||||
if ( totalK <= 2*j ) { // skip impossible conformations
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
tempLog10ConformationLikelihoods[j][conformationIndex] =
|
||||
determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()];
|
||||
} else {
|
||||
tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY;
|
||||
}
|
||||
}
|
||||
|
||||
conformationIndex++;
|
||||
}
|
||||
|
||||
// finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
|
||||
final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
|
||||
// the non-AA possible conformations were dealt with by pushes from dependent sets;
|
||||
// now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
||||
|
||||
if ( totalK < 2*j-1 ) {
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
|
||||
} else {
|
||||
tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY;
|
||||
final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
|
||||
set.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[j], conformationValue);
|
||||
}
|
||||
|
||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||
final double log10Max = MathUtils.approximateLog10SumLog10(tempLog10ConformationLikelihoods[j], numPaths);
|
||||
set.log10Likelihoods[j] = log10Max - logDenominator;
|
||||
set.log10Likelihoods[j] = set.log10Likelihoods[j] - logDenominator;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -478,6 +413,23 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
}
|
||||
|
||||
private static void pushData(final ExactACset targetSet,
|
||||
final ExactACset dependentSet,
|
||||
final int PLsetIndex,
|
||||
final ArrayList<double[]> genotypeLikelihoods) {
|
||||
final int totalK = targetSet.getACsum();
|
||||
|
||||
for ( int j = 1; j < targetSet.log10Likelihoods.length; j++ ) {
|
||||
|
||||
if ( totalK <= 2*j ) { // skip impossible conformations
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double conformationValue =
|
||||
determineCoefficient(PLsetIndex, j, targetSet.ACcounts.getCounts(), totalK) + dependentSet.log10Likelihoods[j-1] + gl[PLsetIndex];
|
||||
targetSet.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(targetSet.log10Likelihoods[j], conformationValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
|
||||
|
||||
// the closed form representation generalized for multiple alleles is as follows:
|
||||
|
|
@ -488,25 +440,26 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
// BC: 2 * k_b * k_c
|
||||
// CC: k_c * (k_c - 1)
|
||||
|
||||
final int numAltAlleles = ACcounts.length;
|
||||
// find the 2 alleles that are represented by this PL index
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
// *** note that throughout this method we subtract one from the alleleIndex because ACcounts ***
|
||||
// *** doesn't consider the reference allele whereas the GenotypeLikelihoods PL cache does. ***
|
||||
|
||||
// the AX het case
|
||||
if ( PLindex <= numAltAlleles )
|
||||
return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK];
|
||||
if ( alleles.alleleIndex1 == 0 )
|
||||
return MathUtils.log10Cache[2*ACcounts[alleles.alleleIndex2-1]] + MathUtils.log10Cache[2*j-totalK];
|
||||
|
||||
// find the 2 alternate alleles that are represented by this PL index
|
||||
int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[numAltAlleles][PLindex];
|
||||
|
||||
final int k_i = ACcounts[alleles[0]-1]; // subtract one because ACcounts doesn't consider the reference allele
|
||||
final int k_i = ACcounts[alleles.alleleIndex1-1];
|
||||
|
||||
// the hom var case (e.g. BB, CC, DD)
|
||||
final double coeff;
|
||||
if ( alleles[0] == alleles[1] ) {
|
||||
if ( alleles.alleleIndex1 == alleles.alleleIndex2 ) {
|
||||
coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
|
||||
}
|
||||
// the het non-ref case (e.g. BC, BD, CD)
|
||||
else {
|
||||
final int k_j = ACcounts[alleles[1]-1];
|
||||
final int k_j = ACcounts[alleles.alleleIndex2-1];
|
||||
coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -56,10 +56,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
|
||||
// make sure the PL cache has been initialized with enough alleles
|
||||
if ( UnifiedGenotyperEngine.PLIndexToAlleleIndex == null || UnifiedGenotyperEngine.PLIndexToAlleleIndex.length < 4 ) // +1 for 0 alt alleles
|
||||
UnifiedGenotyperEngine.calculatePLcache(3);
|
||||
}
|
||||
|
||||
public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
|
||||
|
|
@ -136,6 +132,16 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
}
|
||||
builder.alleles(alleles);
|
||||
|
||||
// create the PL ordering to use based on the allele ordering.
|
||||
final int[] PLordering = new int[numLikelihoods];
|
||||
for ( int i = 0; i <= numAltAlleles; i++ ) {
|
||||
for ( int j = i; j <= numAltAlleles; j++ ) {
|
||||
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
|
||||
// In other words, for biallelic sites the ordering is: AA,AB,BB; for triallelic sites the ordering is: AA,AB,BB,AC,BC,CC, etc."
|
||||
PLordering[(j * (j+1) / 2) + i] = DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal();
|
||||
}
|
||||
}
|
||||
|
||||
// create the genotypes; no-call everyone for now
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
final List<Allele> noCall = new ArrayList<Allele>();
|
||||
|
|
@ -145,12 +151,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
final double[] allLikelihoods = sampleData.GL.getLikelihoods();
|
||||
final double[] myLikelihoods = new double[numLikelihoods];
|
||||
|
||||
int myLikelihoodsIndex = 0;
|
||||
for ( int i = 0; i <= numAltAlleles; i++ ) {
|
||||
for ( int j = i; j <= numAltAlleles; j++ ) {
|
||||
myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()];
|
||||
}
|
||||
}
|
||||
for ( int i = 0; i < numLikelihoods; i++ )
|
||||
myLikelihoods[i] = allLikelihoods[PLordering[i]];
|
||||
|
||||
// normalize in log space so that max element is zero.
|
||||
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
|
||||
|
|
@ -177,12 +179,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
final double[] likelihoods = sampleData.GL.getLikelihoods();
|
||||
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
|
||||
if ( PLindexOfBestGL != PLindexOfRef ) {
|
||||
int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[3][PLindexOfBestGL];
|
||||
if ( alleles[0] != baseIndexOfRef )
|
||||
likelihoodSums[alleles[0]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePairUsingDeprecatedOrdering(PLindexOfBestGL);
|
||||
if ( alleles.alleleIndex1 != baseIndexOfRef )
|
||||
likelihoodSums[alleles.alleleIndex1] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
|
||||
// don't double-count it
|
||||
if ( alleles[1] != baseIndexOfRef && alleles[1] != alleles[0] )
|
||||
likelihoodSums[alleles[1]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
|
||||
if ( alleles.alleleIndex2 != baseIndexOfRef && alleles.alleleIndex2 != alleles.alleleIndex1 )
|
||||
likelihoodSums[alleles.alleleIndex2] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -104,10 +104,6 @@ public class UnifiedGenotyperEngine {
|
|||
private final GenomeLocParser genomeLocParser;
|
||||
private final boolean BAQEnabledOnCMDLine;
|
||||
|
||||
// a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
|
||||
// the representation is int[number of alternate alleles][PL index][pair of allele indexes (where reference = 0)]
|
||||
protected static int[][][] PLIndexToAlleleIndex;
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -140,27 +136,6 @@ public class UnifiedGenotyperEngine {
|
|||
genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
|
||||
filter.add(LOW_QUAL_FILTER_NAME);
|
||||
calculatePLcache(UAC.MAX_ALTERNATE_ALLELES);
|
||||
}
|
||||
|
||||
protected static void calculatePLcache(int maxAltAlleles) {
|
||||
PLIndexToAlleleIndex = new int[maxAltAlleles+1][][];
|
||||
PLIndexToAlleleIndex[0] = new int[][]{ new int[]{0, 0} };
|
||||
int numLikelihoods = 1;
|
||||
|
||||
// for each count of alternate alleles
|
||||
for ( int altAlleles = 1; altAlleles <= maxAltAlleles; altAlleles++ ) {
|
||||
numLikelihoods += altAlleles + 1;
|
||||
PLIndexToAlleleIndex[altAlleles] = new int[numLikelihoods][];
|
||||
int PLindex = 0;
|
||||
|
||||
// for all possible combinations of the 2 alt alleles
|
||||
for ( int allele1 = 0; allele1 <= altAlleles; allele1++ ) {
|
||||
for ( int allele2 = allele1; allele2 <= altAlleles; allele2++ ) {
|
||||
PLIndexToAlleleIndex[altAlleles][PLindex++] = new int[]{ allele1, allele2 };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -794,21 +769,17 @@ public class UnifiedGenotyperEngine {
|
|||
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
|
||||
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
||||
|
||||
// make sure that we've cached enough data
|
||||
if ( numOriginalAltAlleles > PLIndexToAlleleIndex.length - 1 )
|
||||
calculatePLcache(numOriginalAltAlleles);
|
||||
final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
|
||||
|
||||
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
|
||||
altAlleleIndexToUse[i] = true;
|
||||
}
|
||||
|
||||
for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) {
|
||||
final int[] alleles = PLcache[PLindex];
|
||||
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(numOriginalAltAlleles);
|
||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
// consider this entry only if both of the alleles are good
|
||||
if ( (alleles[0] == 0 || altAlleleIndexToUse[alleles[0] - 1]) && (alleles[1] == 0 || altAlleleIndexToUse[alleles[1] - 1]) )
|
||||
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
|
||||
likelihoodIndexesToUse.add(PLindex);
|
||||
}
|
||||
}
|
||||
|
|
@ -861,11 +832,11 @@ public class UnifiedGenotyperEngine {
|
|||
protected static Genotype assignGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final int numNewAltAlleles, final Map<String, Object> attrs) {
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||
myAlleles.add(allelesToUse.get(alleles[0]));
|
||||
myAlleles.add(allelesToUse.get(alleles[1]));
|
||||
myAlleles.add(allelesToUse.get(alleles.alleleIndex1));
|
||||
myAlleles.add(allelesToUse.get(alleles.alleleIndex2));
|
||||
|
||||
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
|
||||
return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);
|
||||
|
|
|
|||
|
|
@ -25,13 +25,10 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.jgrapht.util.MathUtil;
|
||||
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class GenotypeLikelihoods {
|
||||
public static final boolean CAP_PLS = false;
|
||||
|
|
@ -201,4 +198,116 @@ public class GenotypeLikelihoods {
|
|||
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Static conversion utilities, going from GL/PL index to allele index and vice versa.
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/*
|
||||
* Class representing the 2 alleles (or rather their indexes into VariantContext.getAllele()) corresponding to a specific PL index.
|
||||
* Note that the reference allele is always index=0.
|
||||
*/
|
||||
public static class GenotypeLikelihoodsAllelePair {
|
||||
public final int alleleIndex1, alleleIndex2;
|
||||
|
||||
public GenotypeLikelihoodsAllelePair(final int alleleIndex1, final int alleleIndex2) {
|
||||
this.alleleIndex1 = alleleIndex1;
|
||||
this.alleleIndex2 = alleleIndex2;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
|
||||
*/
|
||||
private static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = new GenotypeLikelihoodsAllelePair[]{ new GenotypeLikelihoodsAllelePair(0, 0) };
|
||||
|
||||
private static void calculatePLcache(final int minIndex) {
|
||||
// how many alternate alleles do we need to calculate for?
|
||||
int altAlleles = 0;
|
||||
int numLikelihoods = 1;
|
||||
while ( numLikelihoods <= minIndex ) {
|
||||
altAlleles++;
|
||||
numLikelihoods += altAlleles + 1;
|
||||
}
|
||||
|
||||
PLIndexToAlleleIndex = new GenotypeLikelihoodsAllelePair[numLikelihoods];
|
||||
|
||||
// for all possible combinations of 2 alleles
|
||||
for ( int allele1 = 0; allele1 <= altAlleles; allele1++ ) {
|
||||
for ( int allele2 = allele1; allele2 <= altAlleles; allele2++ ) {
|
||||
PLIndexToAlleleIndex[calculatePLindex(allele1, allele2)] = new GenotypeLikelihoodsAllelePair(allele1, allele2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// how many likelihoods are associated with the given number of alternate alleles?
|
||||
public static int calculateNumLikelihoods(int numAltAlleles) {
|
||||
int numLikelihoods = 1;
|
||||
for ( int i = 1; i <= numAltAlleles; i++ )
|
||||
numLikelihoods += i + 1;
|
||||
return numLikelihoods;
|
||||
}
|
||||
|
||||
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
|
||||
// In other words, for biallelic sites the ordering is: AA,AB,BB; for triallelic sites the ordering is: AA,AB,BB,AC,BC,CC, etc."
|
||||
// Assumes that allele1Index < allele2Index
|
||||
public static int calculatePLindex(final int allele1Index, final int allele2Index) {
|
||||
return (allele2Index * (allele2Index+1) / 2) + allele1Index;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the allele index pair for the given PL
|
||||
*
|
||||
* @param PLindex the PL index
|
||||
* @return the allele index pair
|
||||
*/
|
||||
public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) {
|
||||
// make sure that we've cached enough data
|
||||
if ( PLindex >= PLIndexToAlleleIndex.length )
|
||||
calculatePLcache(PLindex);
|
||||
|
||||
return PLIndexToAlleleIndex[PLindex];
|
||||
}
|
||||
|
||||
// An index conversion from the deprecated PL ordering to the new VCF-based ordering for up to 3 alternate alleles
|
||||
protected static int[] PLindexConversion = new int[]{0, 1, 3, 6, 2, 4, 7, 5, 8, 9};
|
||||
|
||||
/**
|
||||
* get the allele index pair for the given PL using the deprecated PL ordering:
|
||||
* AA,AB,AC,AD,BB,BC,BD,CC,CD,DD instead of AA,AB,BB,AC,BC,CC,AD,BD,CD,DD.
|
||||
* Although it's painful to keep this conversion around, our DiploidSNPGenotypeLikelihoods class uses the deprecated
|
||||
* ordering and I know with certainty that external users have built code on top of it; changing it now would
|
||||
* cause a whole lot of heartache for our collaborators, so for now at least there's a standard conversion method.
|
||||
* This method assumes at most 3 alternate alleles.
|
||||
* TODO -- address this issue at the source by updating DiploidSNPGenotypeLikelihoods.
|
||||
*
|
||||
* @param PLindex the PL index
|
||||
* @return the allele index pair
|
||||
*/
|
||||
public static GenotypeLikelihoodsAllelePair getAllelePairUsingDeprecatedOrdering(final int PLindex) {
|
||||
// make sure that we've cached enough data
|
||||
if ( PLindex >= PLIndexToAlleleIndex.length )
|
||||
calculatePLcache(PLindex);
|
||||
|
||||
return PLIndexToAlleleIndex[PLindexConversion[PLindex]];
|
||||
}
|
||||
|
||||
/**
|
||||
* get the PL indexes (AA, AB, BB) for the given allele pair; assumes allele1Index <= allele2Index.
|
||||
*
|
||||
* @param allele1Index the index in VariantContext.getAllele() of the first allele
|
||||
* @param allele2Index the index in VariantContext.getAllele() of the second allele
|
||||
* @return the PL indexes
|
||||
*/
|
||||
public static int[] getPLIndecesOfAlleles(final int allele1Index, final int allele2Index) {
|
||||
|
||||
final int[] indexes = new int[3];
|
||||
indexes[0] = calculatePLindex(allele1Index, allele1Index);
|
||||
indexes[1] = calculatePLindex(allele1Index, allele2Index);
|
||||
indexes[2] = calculatePLindex(allele2Index, allele2Index);
|
||||
return indexes;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1255,40 +1255,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return best;
|
||||
}
|
||||
|
||||
public int[] getGLIndecesOfAllele(Allele inputAllele) {
|
||||
public int[] getGLIndecesOfAlternateAllele(Allele targetAllele) {
|
||||
|
||||
// TODO -- this information is cached statically by the UnifiedGenotyperEngine; pull it out into a common utils class for all to use
|
||||
|
||||
int[] idxVector = new int[3];
|
||||
int numAlleles = this.getAlleles().size();
|
||||
|
||||
int idxDiag = numAlleles;
|
||||
int incr = numAlleles - 1;
|
||||
int k=1;
|
||||
for (Allele a: getAlternateAlleles()) {
|
||||
// multi-allelic approximation, part 1: Ideally
|
||||
// for each alt allele compute marginal (suboptimal) posteriors -
|
||||
// compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of
|
||||
// as a row-wise upper triangular matrix of likelihoods.
|
||||
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
|
||||
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
|
||||
|
||||
int idxAA = 0;
|
||||
int idxAB = k++;
|
||||
// yy is always element on the diagonal.
|
||||
// 2 alleles: BBelement 2
|
||||
// 3 alleles: BB element 3. CC element 5
|
||||
// 4 alleles:
|
||||
int idxBB = idxDiag;
|
||||
|
||||
if (a.equals(inputAllele)) {
|
||||
idxVector[0] = idxAA;
|
||||
idxVector[1] = idxAB;
|
||||
idxVector[2] = idxBB;
|
||||
int index = 1;
|
||||
for ( Allele allele : getAlternateAlleles() ) {
|
||||
if ( allele.equals(targetAllele) )
|
||||
break;
|
||||
}
|
||||
idxDiag += incr--;
|
||||
index++;
|
||||
}
|
||||
return idxVector;
|
||||
|
||||
return GenotypeLikelihoods.getPLIndecesOfAlleles(0, index);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
BB1 = new double[]{-20.0, -20.0, 0.0};
|
||||
AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0};
|
||||
AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0};
|
||||
AC2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0};
|
||||
BB2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0};
|
||||
AC2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0};
|
||||
BB2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0};
|
||||
BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0};
|
||||
CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,8 +5,10 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
// ********************************************************************************** //
|
||||
|
|
@ -60,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("5af005255240a2186f04cb50851b8b6f"));
|
||||
Arrays.asList("0de4aeed6a52f08ed86a7642c812478b"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -277,42 +279,53 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testWithIndelAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("9cd08dc412a007933381e9c76c073899"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithIndelAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithIndelAllelesPassedIn3() {
|
||||
public void testMultiSampleIndels() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("52340d578a708fa709b69ce48987bc9d"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||
Arrays.asList("2609675a356f2dfc86f8a1d911210978"));
|
||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("9566c7abef5ee5829a516d90445b347f"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithIndelAllelesPassedIn4() {
|
||||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("4fdd8da77167881b71b3547da5c13f94"));
|
||||
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||
public void testGGAwithNoEvidenceInReads() {
|
||||
final String vcf = "small.indel.test.vcf";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("7d069596597aee5e0d562964036141eb"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing SnpEff
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void testWithIndelAllelesPassedIn5() {
|
||||
final String vcf = "small.indel.test.vcf";
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testgetQualFromLikelihoods(){
|
||||
public void testgetQualFromLikelihoods() {
|
||||
double[] likelihoods = new double[]{-1, 0, -2};
|
||||
// qual values we expect for each possible "best" genotype
|
||||
double[] expectedQuals = new double[]{-0.04100161, -1, -0.003930294};
|
||||
|
|
@ -134,4 +134,33 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
Assert.assertEquals(v1[i], v2[i], 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCalculatePLindex(){
|
||||
int counter = 0;
|
||||
for ( int i = 0; i <= 3; i++ ) {
|
||||
for ( int j = i; j <= 3; j++ ) {
|
||||
Assert.assertEquals(GenotypeLikelihoods.calculatePLindex(i, j), GenotypeLikelihoods.PLindexConversion[counter++], "PL index of alleles " + i + "," + j + " was not calculated correctly");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetAllelePair(){
|
||||
allelePairTest(0, 0, 0);
|
||||
allelePairTest(1, 0, 1);
|
||||
allelePairTest(2, 1, 1);
|
||||
allelePairTest(3, 0, 2);
|
||||
allelePairTest(4, 1, 2);
|
||||
allelePairTest(5, 2, 2);
|
||||
allelePairTest(6, 0, 3);
|
||||
allelePairTest(7, 1, 3);
|
||||
allelePairTest(8, 2, 3);
|
||||
allelePairTest(9, 3, 3);
|
||||
}
|
||||
|
||||
private void allelePairTest(int PLindex, int allele1, int allele2) {
|
||||
Assert.assertEquals(GenotypeLikelihoods.getAllelePair(PLindex).alleleIndex1, allele1, "allele index " + allele1 + " from PL index " + PLindex + " was not calculated correctly");
|
||||
Assert.assertEquals(GenotypeLikelihoods.getAllelePair(PLindex).alleleIndex2, allele2, "allele index " + allele2 + " from PL index " + PLindex + " was not calculated correctly");
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue