Merged bug fix from Stable into Unstable

This commit is contained in:
Eric Banks 2012-03-16 14:33:53 -04:00
commit be9e48ba29
10 changed files with 288 additions and 237 deletions

View File

@ -42,7 +42,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno
if (!vc.isBiallelic()) { if (!vc.isBiallelic()) {
// for non-bliallelic case, do test with most common alt allele. // for non-bliallelic case, do test with most common alt allele.
// Get then corresponding indeces in GL vectors to retrieve GL of AA,AB and BB. // Get then corresponding indeces in GL vectors to retrieve GL of AA,AB and BB.
int[] idxVector = vc.getGLIndecesOfAllele(vc.getAltAlleleWithHighestAlleleCount()); int[] idxVector = vc.getGLIndecesOfAlternateAllele(vc.getAltAlleleWithHighestAlleleCount());
idxAA = idxVector[0]; idxAA = idxVector[0];
idxAB = idxVector[1]; idxAB = idxVector[1];
idxBB = idxVector[2]; idxBB = idxVector[2];

View File

@ -64,7 +64,6 @@ public enum DiploidGenotype {
return r != base2; return r != base2;
else else
return base2 == r; return base2 == r;
//return MathUtils.countOccurrences(r, this.toString()) == 1;
} }
public boolean isHom() { public boolean isHom() {

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream; import java.io.PrintStream;
@ -61,7 +60,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result, false); linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
return alleles; return alleles;
} }
@ -85,21 +84,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
for ( int i = 0; i < numOriginalAltAlleles; i++ ) for ( int i = 0; i < numOriginalAltAlleles; i++ )
likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i)); likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i));
// make sure that we've cached enough data
if ( numOriginalAltAlleles > UnifiedGenotyperEngine.PLIndexToAlleleIndex.length - 1 )
UnifiedGenotyperEngine.calculatePLcache(numOriginalAltAlleles);
// based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype // based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype
final ArrayList<double[]> GLs = getGLs(vc.getGenotypes()); final ArrayList<double[]> GLs = getGLs(vc.getGenotypes());
for ( final double[] likelihoods : GLs ) { for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) { if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) {
int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[numOriginalAltAlleles][PLindexOfBestGL]; GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL);
if ( alleles[0] != 0 ) if ( alleles.alleleIndex1 != 0 )
likelihoodSums[alleles[0]-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]; likelihoodSums[alleles.alleleIndex1-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
// don't double-count it // don't double-count it
if ( alleles[1] != 0 && alleles[1] != alleles[0] ) if ( alleles.alleleIndex2 != 0 && alleles.alleleIndex2 != alleles.alleleIndex1 )
likelihoodSums[alleles[1]-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]; likelihoodSums[alleles.alleleIndex2-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF];
} }
} }
@ -189,24 +184,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// the column of the matrix // the column of the matrix
final double[] log10Likelihoods; final double[] log10Likelihoods;
// mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column; int sum = -1;
// for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB.
final HashMap<ExactACcounts, Integer> ACsetIndexToPLIndex = new HashMap<ExactACcounts, Integer>();
// to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them
final ArrayList<ExactACcounts> dependentACsetsToDelete = new ArrayList<ExactACcounts>();
public ExactACset(final int size, final ExactACcounts ACcounts) { public ExactACset(final int size, final ExactACcounts ACcounts) {
this.ACcounts = ACcounts; this.ACcounts = ACcounts;
log10Likelihoods = new double[size]; log10Likelihoods = new double[size];
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
} }
// sum of all the non-reference alleles // sum of all the non-reference alleles
public int getACsum() { public int getACsum() {
int sum = 0; if ( sum == -1 ) {
for ( int count : ACcounts.getCounts() ) sum = 0;
sum += count; for ( int count : ACcounts.getCounts() )
sum += count;
}
return sum; return sum;
} }
@ -215,15 +207,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
} }
// TODO -- remove me
public static void linearExactMultiAllelic(final GenotypesContext GLs, public static void linearExactMultiAllelic(final GenotypesContext GLs,
final int numAlternateAlleles, final int numAlternateAlleles,
final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result, final AlleleFrequencyCalculationResult result,
final boolean preserveData) { final boolean foo) {
linearExactMultiAllelic(GLs, numAlternateAlleles, log10AlleleFrequencyPriors, result);
}
// make sure the PL cache has been initialized
if ( UnifiedGenotyperEngine.PLIndexToAlleleIndex == null )
UnifiedGenotyperEngine.calculatePLcache(5); public static void linearExactMultiAllelic(final GenotypesContext GLs,
final int numAlternateAlleles,
final double[][] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result) {
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs); final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
final int numSamples = genotypeLikelihoods.size()-1; final int numSamples = genotypeLikelihoods.size()-1;
@ -241,21 +239,20 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
ACqueue.add(zeroSet); ACqueue.add(zeroSet);
indexesToACset.put(zeroSet.ACcounts, zeroSet); indexesToACset.put(zeroSet.ACcounts, zeroSet);
// optimization: create the temporary storage for computing L(j,k) just once
final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1;
final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies];
for ( int i = 0; i < maxPossibleDependencies; i++ )
tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY;
// keep processing while we have AC conformations that need to be calculated // keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY; double maxLog10L = Double.NEGATIVE_INFINITY;
while ( !ACqueue.isEmpty() ) { while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods // compute log10Likelihoods
final ExactACset set = ACqueue.remove(); final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods); final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
// adjust max likelihood seen if needed // adjust max likelihood seen if needed
maxLog10L = Math.max(maxLog10L, log10LofKs); maxLog10L = Math.max(maxLog10L, log10LofKs);
// clean up memory
indexesToACset.remove(set.ACcounts);
//if ( DEBUG )
// System.out.printf(" *** removing used set=%s%n", set.ACcounts);
} }
} }
@ -273,27 +270,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final ArrayList<double[]> genotypeLikelihoods, final ArrayList<double[]> genotypeLikelihoods,
final double maxLog10L, final double maxLog10L,
final int numChr, final int numChr,
final boolean preserveData,
final LinkedList<ExactACset> ACqueue, final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset, final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result, final AlleleFrequencyCalculationResult result) {
final double[][] tempLog10ConformationLikelihoods) {
//if ( DEBUG ) //if ( DEBUG )
// System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
// compute the log10Likelihoods // compute the log10Likelihoods
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods); computeLofK(set, genotypeLikelihoods, log10AlleleFrequencyPriors, result);
// clean up memory
if ( !preserveData ) {
for ( ExactACcounts index : set.dependentACsetsToDelete ) {
indexesToACset.remove(index);
//if ( DEBUG )
// System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
}
}
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
@ -301,11 +287,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
//if ( DEBUG ) //if ( DEBUG )
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
// no reason to keep this data around because nothing depends on it
if ( !preserveData )
indexesToACset.remove(set.ACcounts);
return log10LofK; return log10LofK;
} }
@ -316,15 +297,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final int numAltAlleles = set.ACcounts.getCounts().length; final int numAltAlleles = set.ACcounts.getCounts().length;
// genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
// so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
// add conformations for the k+1 case // add conformations for the k+1 case
int PLindex = 0;
for ( int allele = 0; allele < numAltAlleles; allele++ ) { for ( int allele = 0; allele < numAltAlleles; allele++ ) {
final int[] ACcountsClone = set.ACcounts.getCounts().clone(); final int[] ACcountsClone = set.ACcounts.getCounts().clone();
ACcountsClone[allele]++; ACcountsClone[allele]++;
updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset); // to get to this conformation, a sample would need to be AB (remember that ref=0)
final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1);
updateACset(ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
} }
// add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different
@ -338,71 +317,51 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
ACcountsClone[allele_i]++; ACcountsClone[allele_i]++;
ACcountsClone[allele_j]++; ACcountsClone[allele_j]++;
// to get to this conformation, a sample would need to be BB or BC (remember that ref=0, so add one to the index)
final int PLindex = GenotypeLikelihoods.calculatePLindex(allele_i+1, allele_j+1);
if ( allele_i == allele_j ) if ( allele_i == allele_j )
sameAlleles.add(new DependentSet(ACcountsClone, ++PLindex)); sameAlleles.add(new DependentSet(ACcountsClone, PLindex));
else else
differentAlleles.add(new DependentSet(ACcountsClone, ++PLindex)); differentAlleles.add(new DependentSet(ACcountsClone, PLindex));
} }
} }
// IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering // IMPORTANT: we must first add the cases where the 2 new alleles are different so that the queue maintains its ordering
for ( DependentSet dependent : differentAlleles ) for ( DependentSet dependent : differentAlleles )
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset); updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
for ( DependentSet dependent : sameAlleles ) for ( DependentSet dependent : sameAlleles )
updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset); updateACset(dependent.ACcounts, numChr, set, dependent.PLindex, ACqueue, indexesToACset, genotypeLikelihoods);
}
// determine which is the last dependent set in the queue (not necessarily the last one added above) so we can know when it is safe to clean up this column
if ( !preserveData ) {
final ExactACset lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
if ( lastSet != null )
lastSet.dependentACsetsToDelete.add(set.ACcounts);
} }
return log10LofK; return log10LofK;
} }
// adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and // adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and
// also adds it as a dependency to the given callingSetIndex. // also pushes its value to the given callingSetIndex.
// returns the ExactACset if that set was not already in the queue and null otherwise. private static void updateACset(final int[] newSetCounts,
private static void updateACset(final int[] ACcounts,
final int numChr, final int numChr,
final ExactACset callingSet, final ExactACset dependentSet,
final int PLsetIndex, final int PLsetIndex,
final Queue<ExactACset> ACqueue, final Queue<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset) { final HashMap<ExactACcounts, ExactACset> indexesToACset,
final ExactACcounts index = new ExactACcounts(ACcounts); final ArrayList<double[]> genotypeLikelihoods) {
final ExactACcounts index = new ExactACcounts(newSetCounts);
if ( !indexesToACset.containsKey(index) ) { if ( !indexesToACset.containsKey(index) ) {
ExactACset set = new ExactACset(numChr/2 +1, index); ExactACset set = new ExactACset(numChr/2 +1, index);
indexesToACset.put(index, set); indexesToACset.put(index, set);
ACqueue.add(set); ACqueue.add(set);
} }
// add the given dependency to the set // push data from the dependency to the new set
//if ( DEBUG ) //if ( DEBUG )
// System.out.println(" *** adding dependency from " + index + " to " + callingSet.ACcounts); // System.out.println(" *** pushing data from " + index + " to " + dependencySet.ACcounts);
final ExactACset set = indexesToACset.get(index); pushData(indexesToACset.get(index), dependentSet, PLsetIndex, genotypeLikelihoods);
set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex);
}
private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final LinkedList<ExactACset> ACqueue) {
Iterator<ExactACset> reverseIterator = ACqueue.descendingIterator();
while ( reverseIterator.hasNext() ) {
final ExactACset queued = reverseIterator.next();
if ( queued.ACsetIndexToPLIndex.containsKey(callingSetIndex) )
return queued;
}
// shouldn't get here
throw new ReviewedStingException("Error: no sets in the queue currently hold " + callingSetIndex + " as a dependent!");
} }
private static void computeLofK(final ExactACset set, private static void computeLofK(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods, final ArrayList<double[]> genotypeLikelihoods,
final HashMap<ExactACcounts, ExactACset> indexesToACset,
final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result, final AlleleFrequencyCalculationResult result) {
final double[][] tempLog10ConformationLikelihoods) {
set.log10Likelihoods[0] = 0.0; // the zero case set.log10Likelihoods[0] = 0.0; // the zero case
final int totalK = set.getACsum(); final int totalK = set.getACsum();
@ -414,42 +373,18 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
// k > 0 for at least one k // k > 0 for at least one k
else { else {
// deal with the non-AA possible conformations // the non-AA possible conformations were dealt with by pushes from dependent sets;
int conformationIndex = 1; // now deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
//if ( DEBUG )
// System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
ExactACset dependent = indexesToACset.get(mapping.getKey());
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
if ( totalK <= 2*j ) { // skip impossible conformations
final double[] gl = genotypeLikelihoods.get(j);
tempLog10ConformationLikelihoods[j][conformationIndex] =
determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()];
} else {
tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY;
}
}
conformationIndex++;
}
// finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
if ( totalK < 2*j-1 ) { if ( totalK < 2*j-1 ) {
final double[] gl = genotypeLikelihoods.get(j); final double[] gl = genotypeLikelihoods.get(j);
tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; final double conformationValue = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
} else { set.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(set.log10Likelihoods[j], conformationValue);
tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY;
} }
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
final double log10Max = MathUtils.approximateLog10SumLog10(tempLog10ConformationLikelihoods[j], numPaths); set.log10Likelihoods[j] = set.log10Likelihoods[j] - logDenominator;
set.log10Likelihoods[j] = log10Max - logDenominator;
} }
} }
@ -478,6 +413,23 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
} }
private static void pushData(final ExactACset targetSet,
final ExactACset dependentSet,
final int PLsetIndex,
final ArrayList<double[]> genotypeLikelihoods) {
final int totalK = targetSet.getACsum();
for ( int j = 1; j < targetSet.log10Likelihoods.length; j++ ) {
if ( totalK <= 2*j ) { // skip impossible conformations
final double[] gl = genotypeLikelihoods.get(j);
final double conformationValue =
determineCoefficient(PLsetIndex, j, targetSet.ACcounts.getCounts(), totalK) + dependentSet.log10Likelihoods[j-1] + gl[PLsetIndex];
targetSet.log10Likelihoods[j] = MathUtils.approximateLog10SumLog10(targetSet.log10Likelihoods[j], conformationValue);
}
}
}
private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) { private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
// the closed form representation generalized for multiple alleles is as follows: // the closed form representation generalized for multiple alleles is as follows:
@ -488,25 +440,26 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// BC: 2 * k_b * k_c // BC: 2 * k_b * k_c
// CC: k_c * (k_c - 1) // CC: k_c * (k_c - 1)
final int numAltAlleles = ACcounts.length; // find the 2 alleles that are represented by this PL index
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
// *** note that throughout this method we subtract one from the alleleIndex because ACcounts ***
// *** doesn't consider the reference allele whereas the GenotypeLikelihoods PL cache does. ***
// the AX het case // the AX het case
if ( PLindex <= numAltAlleles ) if ( alleles.alleleIndex1 == 0 )
return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK]; return MathUtils.log10Cache[2*ACcounts[alleles.alleleIndex2-1]] + MathUtils.log10Cache[2*j-totalK];
// find the 2 alternate alleles that are represented by this PL index final int k_i = ACcounts[alleles.alleleIndex1-1];
int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[numAltAlleles][PLindex];
final int k_i = ACcounts[alleles[0]-1]; // subtract one because ACcounts doesn't consider the reference allele
// the hom var case (e.g. BB, CC, DD) // the hom var case (e.g. BB, CC, DD)
final double coeff; final double coeff;
if ( alleles[0] == alleles[1] ) { if ( alleles.alleleIndex1 == alleles.alleleIndex2 ) {
coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1]; coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
} }
// the het non-ref case (e.g. BC, BD, CD) // the het non-ref case (e.g. BC, BD, CD)
else { else {
final int k_j = ACcounts[alleles[1]-1]; final int k_j = ACcounts[alleles.alleleIndex2-1];
coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j]; coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
} }

View File

@ -56,10 +56,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger); super(UAC, logger);
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
// make sure the PL cache has been initialized with enough alleles
if ( UnifiedGenotyperEngine.PLIndexToAlleleIndex == null || UnifiedGenotyperEngine.PLIndexToAlleleIndex.length < 4 ) // +1 for 0 alt alleles
UnifiedGenotyperEngine.calculatePLcache(3);
} }
public VariantContext getLikelihoods(final RefMetaDataTracker tracker, public VariantContext getLikelihoods(final RefMetaDataTracker tracker,
@ -136,6 +132,16 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
} }
builder.alleles(alleles); builder.alleles(alleles);
// create the PL ordering to use based on the allele ordering.
final int[] PLordering = new int[numLikelihoods];
for ( int i = 0; i <= numAltAlleles; i++ ) {
for ( int j = i; j <= numAltAlleles; j++ ) {
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
// In other words, for biallelic sites the ordering is: AA,AB,BB; for triallelic sites the ordering is: AA,AB,BB,AC,BC,CC, etc."
PLordering[(j * (j+1) / 2) + i] = DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal();
}
}
// create the genotypes; no-call everyone for now // create the genotypes; no-call everyone for now
final GenotypesContext genotypes = GenotypesContext.create(); final GenotypesContext genotypes = GenotypesContext.create();
final List<Allele> noCall = new ArrayList<Allele>(); final List<Allele> noCall = new ArrayList<Allele>();
@ -145,12 +151,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
final double[] allLikelihoods = sampleData.GL.getLikelihoods(); final double[] allLikelihoods = sampleData.GL.getLikelihoods();
final double[] myLikelihoods = new double[numLikelihoods]; final double[] myLikelihoods = new double[numLikelihoods];
int myLikelihoodsIndex = 0; for ( int i = 0; i < numLikelihoods; i++ )
for ( int i = 0; i <= numAltAlleles; i++ ) { myLikelihoods[i] = allLikelihoods[PLordering[i]];
for ( int j = i; j <= numAltAlleles; j++ ) {
myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()];
}
}
// normalize in log space so that max element is zero. // normalize in log space so that max element is zero.
final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true)); final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true));
@ -177,12 +179,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
final double[] likelihoods = sampleData.GL.getLikelihoods(); final double[] likelihoods = sampleData.GL.getLikelihoods();
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
if ( PLindexOfBestGL != PLindexOfRef ) { if ( PLindexOfBestGL != PLindexOfRef ) {
int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[3][PLindexOfBestGL]; GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePairUsingDeprecatedOrdering(PLindexOfBestGL);
if ( alleles[0] != baseIndexOfRef ) if ( alleles.alleleIndex1 != baseIndexOfRef )
likelihoodSums[alleles[0]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef]; likelihoodSums[alleles.alleleIndex1] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
// don't double-count it // don't double-count it
if ( alleles[1] != baseIndexOfRef && alleles[1] != alleles[0] ) if ( alleles.alleleIndex2 != baseIndexOfRef && alleles.alleleIndex2 != alleles.alleleIndex1 )
likelihoodSums[alleles[1]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef]; likelihoodSums[alleles.alleleIndex2] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef];
} }
} }

View File

@ -104,10 +104,6 @@ public class UnifiedGenotyperEngine {
private final GenomeLocParser genomeLocParser; private final GenomeLocParser genomeLocParser;
private final boolean BAQEnabledOnCMDLine; private final boolean BAQEnabledOnCMDLine;
// a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
// the representation is int[number of alternate alleles][PL index][pair of allele indexes (where reference = 0)]
protected static int[][][] PLIndexToAlleleIndex;
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
// //
@ -140,27 +136,6 @@ public class UnifiedGenotyperEngine {
genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL); genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL);
filter.add(LOW_QUAL_FILTER_NAME); filter.add(LOW_QUAL_FILTER_NAME);
calculatePLcache(UAC.MAX_ALTERNATE_ALLELES);
}
protected static void calculatePLcache(int maxAltAlleles) {
PLIndexToAlleleIndex = new int[maxAltAlleles+1][][];
PLIndexToAlleleIndex[0] = new int[][]{ new int[]{0, 0} };
int numLikelihoods = 1;
// for each count of alternate alleles
for ( int altAlleles = 1; altAlleles <= maxAltAlleles; altAlleles++ ) {
numLikelihoods += altAlleles + 1;
PLIndexToAlleleIndex[altAlleles] = new int[numLikelihoods][];
int PLindex = 0;
// for all possible combinations of the 2 alt alleles
for ( int allele1 = 0; allele1 <= altAlleles; allele1++ ) {
for ( int allele2 = allele1; allele2 <= altAlleles; allele2++ ) {
PLIndexToAlleleIndex[altAlleles][PLindex++] = new int[]{ allele1, allele2 };
}
}
}
} }
/** /**
@ -794,21 +769,17 @@ public class UnifiedGenotyperEngine {
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
likelihoodIndexesToUse = new ArrayList<Integer>(30); likelihoodIndexesToUse = new ArrayList<Integer>(30);
// make sure that we've cached enough data
if ( numOriginalAltAlleles > PLIndexToAlleleIndex.length - 1 )
calculatePLcache(numOriginalAltAlleles);
final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles]; final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ ) { for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
if ( allelesToUse.contains(vc.getAlternateAllele(i)) ) if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
altAlleleIndexToUse[i] = true; altAlleleIndexToUse[i] = true;
} }
for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) { final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(numOriginalAltAlleles);
final int[] alleles = PLcache[PLindex]; for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
// consider this entry only if both of the alleles are good // consider this entry only if both of the alleles are good
if ( (alleles[0] == 0 || altAlleleIndexToUse[alleles[0] - 1]) && (alleles[1] == 0 || altAlleleIndexToUse[alleles[1] - 1]) ) if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
likelihoodIndexesToUse.add(PLindex); likelihoodIndexesToUse.add(PLindex);
} }
} }
@ -861,11 +832,11 @@ public class UnifiedGenotyperEngine {
protected static Genotype assignGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final int numNewAltAlleles, final Map<String, Object> attrs) { protected static Genotype assignGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final int numNewAltAlleles, final Map<String, Object> attrs) {
// find the genotype with maximum likelihoods // find the genotype with maximum likelihoods
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex]; GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
ArrayList<Allele> myAlleles = new ArrayList<Allele>(); ArrayList<Allele> myAlleles = new ArrayList<Allele>();
myAlleles.add(allelesToUse.get(alleles[0])); myAlleles.add(allelesToUse.get(alleles.alleleIndex1));
myAlleles.add(allelesToUse.get(alleles[1])); myAlleles.add(allelesToUse.get(alleles.alleleIndex2));
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods); final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false); return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);

View File

@ -25,13 +25,10 @@
package org.broadinstitute.sting.utils.variantcontext; package org.broadinstitute.sting.utils.variantcontext;
import org.broad.tribble.TribbleException; import org.broad.tribble.TribbleException;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.jgrapht.util.MathUtil;
import java.util.EnumMap; import java.util.EnumMap;
import java.util.Map;
public class GenotypeLikelihoods { public class GenotypeLikelihoods {
public static final boolean CAP_PLS = false; public static final boolean CAP_PLS = false;
@ -201,4 +198,116 @@ public class GenotypeLikelihoods {
return s.toString(); return s.toString();
} }
// -------------------------------------------------------------------------------------
//
// Static conversion utilities, going from GL/PL index to allele index and vice versa.
//
// -------------------------------------------------------------------------------------
/*
* Class representing the 2 alleles (or rather their indexes into VariantContext.getAllele()) corresponding to a specific PL index.
* Note that the reference allele is always index=0.
*/
public static class GenotypeLikelihoodsAllelePair {
public final int alleleIndex1, alleleIndex2;
public GenotypeLikelihoodsAllelePair(final int alleleIndex1, final int alleleIndex2) {
this.alleleIndex1 = alleleIndex1;
this.alleleIndex2 = alleleIndex2;
}
}
/*
* a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
*/
private static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = new GenotypeLikelihoodsAllelePair[]{ new GenotypeLikelihoodsAllelePair(0, 0) };
private static void calculatePLcache(final int minIndex) {
// how many alternate alleles do we need to calculate for?
int altAlleles = 0;
int numLikelihoods = 1;
while ( numLikelihoods <= minIndex ) {
altAlleles++;
numLikelihoods += altAlleles + 1;
}
PLIndexToAlleleIndex = new GenotypeLikelihoodsAllelePair[numLikelihoods];
// for all possible combinations of 2 alleles
for ( int allele1 = 0; allele1 <= altAlleles; allele1++ ) {
for ( int allele2 = allele1; allele2 <= altAlleles; allele2++ ) {
PLIndexToAlleleIndex[calculatePLindex(allele1, allele2)] = new GenotypeLikelihoodsAllelePair(allele1, allele2);
}
}
}
// how many likelihoods are associated with the given number of alternate alleles?
public static int calculateNumLikelihoods(int numAltAlleles) {
int numLikelihoods = 1;
for ( int i = 1; i <= numAltAlleles; i++ )
numLikelihoods += i + 1;
return numLikelihoods;
}
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
// In other words, for biallelic sites the ordering is: AA,AB,BB; for triallelic sites the ordering is: AA,AB,BB,AC,BC,CC, etc."
// Assumes that allele1Index < allele2Index
public static int calculatePLindex(final int allele1Index, final int allele2Index) {
return (allele2Index * (allele2Index+1) / 2) + allele1Index;
}
/**
* get the allele index pair for the given PL
*
* @param PLindex the PL index
* @return the allele index pair
*/
public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) {
// make sure that we've cached enough data
if ( PLindex >= PLIndexToAlleleIndex.length )
calculatePLcache(PLindex);
return PLIndexToAlleleIndex[PLindex];
}
// An index conversion from the deprecated PL ordering to the new VCF-based ordering for up to 3 alternate alleles
protected static int[] PLindexConversion = new int[]{0, 1, 3, 6, 2, 4, 7, 5, 8, 9};
/**
* get the allele index pair for the given PL using the deprecated PL ordering:
* AA,AB,AC,AD,BB,BC,BD,CC,CD,DD instead of AA,AB,BB,AC,BC,CC,AD,BD,CD,DD.
* Although it's painful to keep this conversion around, our DiploidSNPGenotypeLikelihoods class uses the deprecated
* ordering and I know with certainty that external users have built code on top of it; changing it now would
* cause a whole lot of heartache for our collaborators, so for now at least there's a standard conversion method.
* This method assumes at most 3 alternate alleles.
* TODO -- address this issue at the source by updating DiploidSNPGenotypeLikelihoods.
*
* @param PLindex the PL index
* @return the allele index pair
*/
public static GenotypeLikelihoodsAllelePair getAllelePairUsingDeprecatedOrdering(final int PLindex) {
// make sure that we've cached enough data
if ( PLindex >= PLIndexToAlleleIndex.length )
calculatePLcache(PLindex);
return PLIndexToAlleleIndex[PLindexConversion[PLindex]];
}
/**
* get the PL indexes (AA, AB, BB) for the given allele pair; assumes allele1Index <= allele2Index.
*
* @param allele1Index the index in VariantContext.getAllele() of the first allele
* @param allele2Index the index in VariantContext.getAllele() of the second allele
* @return the PL indexes
*/
public static int[] getPLIndecesOfAlleles(final int allele1Index, final int allele2Index) {
final int[] indexes = new int[3];
indexes[0] = calculatePLindex(allele1Index, allele1Index);
indexes[1] = calculatePLindex(allele1Index, allele2Index);
indexes[2] = calculatePLindex(allele2Index, allele2Index);
return indexes;
}
} }

View File

@ -1255,40 +1255,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return best; return best;
} }
public int[] getGLIndecesOfAllele(Allele inputAllele) { public int[] getGLIndecesOfAlternateAllele(Allele targetAllele) {
// TODO -- this information is cached statically by the UnifiedGenotyperEngine; pull it out into a common utils class for all to use int index = 1;
for ( Allele allele : getAlternateAlleles() ) {
int[] idxVector = new int[3]; if ( allele.equals(targetAllele) )
int numAlleles = this.getAlleles().size();
int idxDiag = numAlleles;
int incr = numAlleles - 1;
int k=1;
for (Allele a: getAlternateAlleles()) {
// multi-allelic approximation, part 1: Ideally
// for each alt allele compute marginal (suboptimal) posteriors -
// compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of
// as a row-wise upper triangular matrix of likelihoods.
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
int idxAA = 0;
int idxAB = k++;
// yy is always element on the diagonal.
// 2 alleles: BBelement 2
// 3 alleles: BB element 3. CC element 5
// 4 alleles:
int idxBB = idxDiag;
if (a.equals(inputAllele)) {
idxVector[0] = idxAA;
idxVector[1] = idxAB;
idxVector[2] = idxBB;
break; break;
} index++;
idxDiag += incr--;
} }
return idxVector;
return GenotypeLikelihoods.getPLIndecesOfAlleles(0, index);
} }
} }

View File

@ -27,8 +27,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
BB1 = new double[]{-20.0, -20.0, 0.0}; BB1 = new double[]{-20.0, -20.0, 0.0};
AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0}; AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0};
AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0}; AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0};
AC2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0}; AC2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0};
BB2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0}; BB2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0};
BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0}; BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0};
CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0}; CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0};
} }

View File

@ -5,8 +5,10 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
// ********************************************************************************** // // ********************************************************************************** //
@ -60,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() { public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1, "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
Arrays.asList("5af005255240a2186f04cb50851b8b6f")); Arrays.asList("0de4aeed6a52f08ed86a7642c812478b"));
executeTest("test Multiple SNP alleles", spec); executeTest("test Multiple SNP alleles", spec);
} }
@ -277,42 +279,53 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testWithIndelAllelesPassedIn1() { public void testWithIndelAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("9cd08dc412a007933381e9c76c073899")); Arrays.asList("9cd08dc412a007933381e9c76c073899"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
} }
@Test @Test
public void testWithIndelAllelesPassedIn2() { public void testWithIndelAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53")); Arrays.asList("5ef1f007d3ef77c1b8f31e5e036eff53"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
} }
@Test @Test
public void testWithIndelAllelesPassedIn3() { public void testMultiSampleIndels() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("52340d578a708fa709b69ce48987bc9d"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("2609675a356f2dfc86f8a1d911210978")); Arrays.asList("9566c7abef5ee5829a516d90445b347f"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
} }
@Test @Test
public void testWithIndelAllelesPassedIn4() { public void testGGAwithNoEvidenceInReads() {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( final String vcf = "small.indel.test.vcf";
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + vcf + " -I " + validationDataLocation +
Arrays.asList("4fdd8da77167881b71b3547da5c13f94")); "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
executeTest("test MultiSample Phase1 indels with complicated records", spec4); Arrays.asList("7d069596597aee5e0d562964036141eb"));
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
} }
// --------------------------------------------------------------------------------------------------------------
//
// testing SnpEff
//
// --------------------------------------------------------------------------------------------------------------
@Test @Test
public void testWithIndelAllelesPassedIn5() { public void testWithIndelAllelesPassedIn5() {
final String vcf = "small.indel.test.vcf"; final String vcf = "small.indel.test.vcf";

View File

@ -117,7 +117,7 @@ public class GenotypeLikelihoodsUnitTest {
} }
@Test @Test
public void testgetQualFromLikelihoods(){ public void testgetQualFromLikelihoods() {
double[] likelihoods = new double[]{-1, 0, -2}; double[] likelihoods = new double[]{-1, 0, -2};
// qual values we expect for each possible "best" genotype // qual values we expect for each possible "best" genotype
double[] expectedQuals = new double[]{-0.04100161, -1, -0.003930294}; double[] expectedQuals = new double[]{-0.04100161, -1, -0.003930294};
@ -134,4 +134,33 @@ public class GenotypeLikelihoodsUnitTest {
Assert.assertEquals(v1[i], v2[i], 1e-6); Assert.assertEquals(v1[i], v2[i], 1e-6);
} }
} }
@Test
public void testCalculatePLindex(){
int counter = 0;
for ( int i = 0; i <= 3; i++ ) {
for ( int j = i; j <= 3; j++ ) {
Assert.assertEquals(GenotypeLikelihoods.calculatePLindex(i, j), GenotypeLikelihoods.PLindexConversion[counter++], "PL index of alleles " + i + "," + j + " was not calculated correctly");
}
}
}
@Test
public void testGetAllelePair(){
allelePairTest(0, 0, 0);
allelePairTest(1, 0, 1);
allelePairTest(2, 1, 1);
allelePairTest(3, 0, 2);
allelePairTest(4, 1, 2);
allelePairTest(5, 2, 2);
allelePairTest(6, 0, 3);
allelePairTest(7, 1, 3);
allelePairTest(8, 2, 3);
allelePairTest(9, 3, 3);
}
private void allelePairTest(int PLindex, int allele1, int allele2) {
Assert.assertEquals(GenotypeLikelihoods.getAllelePair(PLindex).alleleIndex1, allele1, "allele index " + allele1 + " from PL index " + PLindex + " was not calculated correctly");
Assert.assertEquals(GenotypeLikelihoods.getAllelePair(PLindex).alleleIndex2, allele2, "allele index " + allele2 + " from PL index " + PLindex + " was not calculated correctly");
}
} }