Greedy version of function to compute the max achievable AC for each alt allele
-- walks over the genotypes in VC, and computes for each alt allele the maximum AC we need to consider in that alt allele dimension. Does the calculation based on the PLs in each genotype g, choosing to update the max AC for the alt alleles corresponding to that PL. Only takes the first lowest PL, if there are multiple genotype configurations with the same PL value. It takes values in the order of the alt alleles.
This commit is contained in:
parent
7666a58773
commit
efad215edb
|
|
@ -378,14 +378,70 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100);
|
||||
final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc);
|
||||
|
||||
// this is necessary because cannot ensure that the tester gives us back the requested ACs due
|
||||
// to rounding errors
|
||||
testExpectedACs(vc, maxACsToVisit);
|
||||
}
|
||||
|
||||
private void testExpectedACs(final VariantContext vc, final int[] maxACsToVisit) {
|
||||
// this is necessary because cannot ensure that the tester gives us back the
|
||||
// requested ACs due to rounding errors
|
||||
final List<Integer> ACs = new ArrayList<Integer>();
|
||||
for ( final Allele a : vc.getAlternateAlleles() )
|
||||
ACs.add(vc.getCalledChrCount(a));
|
||||
|
||||
for ( int i = 0; i < nAlts; i++ ) {
|
||||
for ( int i = 0; i < maxACsToVisit.length; i++ ) {
|
||||
Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "MaxACsGenotypes")
|
||||
public Object[][] makeMaxACsForGenotype() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final List<Allele> AA = Arrays.asList(A, A);
|
||||
final List<Allele> AC = Arrays.asList(A, C);
|
||||
final List<Allele> CC = Arrays.asList(C, C);
|
||||
final List<Allele> AG = Arrays.asList(A, G);
|
||||
final List<Allele> GG = Arrays.asList(G, G);
|
||||
final List<Allele> CG = Arrays.asList(C, G);
|
||||
|
||||
final VariantContext vc2 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C)).make();
|
||||
final VariantContext vc3 = new VariantContextBuilder("x","1", 1, 1, Arrays.asList(A, C, G)).make();
|
||||
|
||||
tests.add(new Object[]{vc2, makePL(AA, 0, 10, 10)});
|
||||
tests.add(new Object[]{vc2, makePL(AC, 10, 0, 10)});
|
||||
tests.add(new Object[]{vc2, makePL(CC, 10, 10, 0)});
|
||||
|
||||
// make sure non-informative => 0
|
||||
tests.add(new Object[]{vc2, makePL(AA, 0, 0, 0)});
|
||||
tests.add(new Object[]{vc3, makePL(AA, 0, 0, 0, 0, 0, 0)});
|
||||
|
||||
// multi-allelics
|
||||
tests.add(new Object[]{vc3, makePL(AG, 10, 10, 10, 0, 10, 10)});
|
||||
tests.add(new Object[]{vc3, makePL(CG, 10, 10, 10, 10, 0, 10)});
|
||||
tests.add(new Object[]{vc3, makePL(GG, 10, 10, 10, 10, 10, 0)});
|
||||
|
||||
// deal with non-informatives third alleles
|
||||
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 10)});
|
||||
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 10)});
|
||||
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 10, 0, 0)});
|
||||
tests.add(new Object[]{vc3, makePL(AC, 10, 0, 10, 0, 0, 0)});
|
||||
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 10)});
|
||||
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 10)});
|
||||
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 10, 0, 0)});
|
||||
tests.add(new Object[]{vc3, makePL(CC, 10, 10, 0, 0, 0, 0)});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "MaxACsGenotypes")
|
||||
private void testMakeACByGenotype(final VariantContext vcRoot, final Genotype g) {
|
||||
final VariantContext vc = new VariantContextBuilder(vcRoot).genotypes(g).make();
|
||||
|
||||
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact;
|
||||
final ExactAFCalculationTestBuilder testBuilder
|
||||
= new ExactAFCalculationTestBuilder(1, vc.getNAlleles()-1, modelType,
|
||||
ExactAFCalculationTestBuilder.PriorType.human);
|
||||
final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc);
|
||||
testExpectedACs(vc, maxACsToVisit);
|
||||
}
|
||||
}
|
||||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -83,48 +85,86 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
|||
return genotypeLikelihoods;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the maximum ACs we need to consider for each alt allele
|
||||
*
|
||||
* Walks over the genotypes in VC, and computes for each alt allele the maximum
|
||||
* AC we need to consider in that alt allele dimension. Does the calculation
|
||||
* based on the PLs in each genotype g, choosing to update the max AC for the
|
||||
* alt alleles corresponding to that PL. Only takes the first lowest PL,
|
||||
* if there are multiple genotype configurations with the same PL value. It
|
||||
* takes values in the order of the alt alleles.
|
||||
*
|
||||
* @param vc the variant context we will compute max alt alleles for
|
||||
* @return a vector of max alt alleles, indexed by alt allele, so result[0] is the AC of the
|
||||
* first alt allele.
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
protected int[] computeMaxACs(final VariantContext vc) {
|
||||
final int nAlleles = vc.getNAlleles();
|
||||
final int[] maxACs = new int[nAlleles-1];
|
||||
final int[] maxACs = new int[vc.getNAlleles()-1];
|
||||
|
||||
for ( int altI = 0; altI < nAlleles-1; altI++ ) {
|
||||
maxACs[altI] = computeMaxAC(vc, altI+1, nAlleles);
|
||||
}
|
||||
for ( final Genotype g : vc.getGenotypes() )
|
||||
updateMaxACs(g, maxACs);
|
||||
|
||||
return maxACs;
|
||||
}
|
||||
|
||||
private int computeMaxAC(final VariantContext vc, final int altI, final int nAlleles) {
|
||||
int maxAC = 0;
|
||||
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final int gMaxAlt = computeAC(g, altI, nAlleles);
|
||||
maxAC += gMaxAlt;
|
||||
}
|
||||
|
||||
return maxAC;
|
||||
}
|
||||
|
||||
private int computeAC(final Genotype g, final int altI, final int nAlleles) {
|
||||
/**
|
||||
* Update the maximum achievable allele counts in maxAC according to the PLs in g
|
||||
*
|
||||
* Selects the maximum genotype configuration from the PLs in g, and updates
|
||||
* the maxAC for this configure. For example, if the lowest PL is for 0/1, updates
|
||||
* the maxAC for the alt allele 1 by 1. If it's 1/1, update is 2. Works for
|
||||
* many number of alt alleles (determined by length of maxACs).
|
||||
*
|
||||
* If the max PL occurs at 0/0, updates nothing
|
||||
* Note that this function greedily takes the first min PL, so that if 0/1 and 1/1 have
|
||||
* the same PL value, then updates the first one.
|
||||
*
|
||||
* Also, only will update 1 alt allele, so if 0/1 and 0/2 both have the same PL,
|
||||
* then only first one (1) will be updated
|
||||
*
|
||||
* @param g the genotype to update
|
||||
* @param maxACs the max allele count vector for alt alleles (starting at 0 => first alt allele)
|
||||
*/
|
||||
@Requires({
|
||||
"g != null",
|
||||
"maxACs != null",
|
||||
"MathUtils.sum(maxACs) >= 0"})
|
||||
private void updateMaxACs(final Genotype g, final int[] maxACs) {
|
||||
final int[] PLs = g.getLikelihoods().getAsPLs();
|
||||
|
||||
final int refPL = PLs[0];
|
||||
if ( refPL == 0 ) // if ref is most likely, return 0
|
||||
return 0;
|
||||
int minPLi = 0;
|
||||
int minPL = PLs[0];
|
||||
|
||||
final int homPL = PLs[GenotypeLikelihoods.calculatePLindex(altI, altI)];
|
||||
if (homPL < refPL) // if hom-var is < ref, our max possible is 2
|
||||
return 2;
|
||||
|
||||
for ( int i = 0; i < nAlleles; i++ ) {
|
||||
final int one = i < altI ? i : altI;
|
||||
final int two = i < altI ? altI : i;
|
||||
final int hetPL = PLs[GenotypeLikelihoods.calculatePLindex(one, two)];
|
||||
if ( hetPL < refPL ) // if het has PL < ref, we must check AC = 1
|
||||
return 1;
|
||||
for ( int i = 0; i < PLs.length; i++ ) {
|
||||
if ( PLs[i] < minPL ) {
|
||||
minPL = PLs[i];
|
||||
minPLi = i;
|
||||
}
|
||||
}
|
||||
|
||||
return 0; // in this case REF is the most likely but in fact another allele is best
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair pair = GenotypeLikelihoods.getAllelePair(minPLi);
|
||||
updateMaxACs(maxACs, pair.alleleIndex1);
|
||||
updateMaxACs(maxACs, pair.alleleIndex2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple helper. Update max alt alleles maxACs according to the allele index (where 0 == ref)
|
||||
*
|
||||
* If alleleI == 0 => doesn't update anything
|
||||
* else maxACs[alleleI - 1]++
|
||||
*
|
||||
* @param maxACs array of max alt allele ACs
|
||||
* @param alleleI the index (relative to 0) to update a count of 1 in max alt alleles.
|
||||
*/
|
||||
@Requires({
|
||||
"alleleI >= 0",
|
||||
"(alleleI - 1) < maxACs.length",
|
||||
"MathUtils.sum(maxACs) >= 0"})
|
||||
private void updateMaxACs(final int[] maxACs, final int alleleI) {
|
||||
if ( alleleI > 0 )
|
||||
maxACs[alleleI-1]++;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
|
@ -133,99 +173,99 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
|||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||
|
||||
// a wrapper around the int array so that we can make it hashable
|
||||
protected static final class ExactACcounts {
|
||||
// a wrapper around the int array so that we can make it hashable
|
||||
protected static final class ExactACcounts {
|
||||
|
||||
protected final int[] counts;
|
||||
private int hashcode = -1;
|
||||
protected final int[] counts;
|
||||
private int hashcode = -1;
|
||||
|
||||
public ExactACcounts(final int[] counts) {
|
||||
this.counts = counts;
|
||||
}
|
||||
|
||||
public int[] getCounts() {
|
||||
return counts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if ( hashcode == -1 )
|
||||
hashcode = Arrays.hashCode(counts);
|
||||
return hashcode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(counts[0]);
|
||||
for ( int i = 1; i < counts.length; i++ ) {
|
||||
sb.append("/");
|
||||
sb.append(counts[i]);
|
||||
public ExactACcounts(final int[] counts) {
|
||||
this.counts = counts;
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// This class represents a column in the Exact AC calculation matrix
|
||||
protected static final class ExactACset {
|
||||
|
||||
// the counts of the various alternate alleles which this column represents
|
||||
final ExactACcounts ACcounts;
|
||||
|
||||
// the column of the matrix
|
||||
final double[] log10Likelihoods;
|
||||
|
||||
int sum = -1;
|
||||
|
||||
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||
this.ACcounts = ACcounts;
|
||||
log10Likelihoods = new double[size];
|
||||
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
// sum of all the non-reference alleles
|
||||
public int getACsum() {
|
||||
if ( sum == -1 ) {
|
||||
sum = 0;
|
||||
for ( int count : ACcounts.getCounts() )
|
||||
sum += count;
|
||||
public int[] getCounts() {
|
||||
return counts;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
protected static final class MaxLikelihoodSeen {
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
ExactACcounts ACs = null;
|
||||
|
||||
public MaxLikelihoodSeen() {}
|
||||
|
||||
public void update(final double maxLog10L, final ExactACcounts ACs) {
|
||||
this.maxLog10L = maxLog10L;
|
||||
this.ACs = ACs;
|
||||
}
|
||||
|
||||
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
final int[] myACcounts = this.ACs.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if ( hashcode == -1 )
|
||||
hashcode = Arrays.hashCode(counts);
|
||||
return hashcode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(counts[0]);
|
||||
for ( int i = 1; i < counts.length; i++ ) {
|
||||
sb.append("/");
|
||||
sb.append(counts[i]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// This class represents a column in the Exact AC calculation matrix
|
||||
protected static final class ExactACset {
|
||||
|
||||
// the counts of the various alternate alleles which this column represents
|
||||
final ExactACcounts ACcounts;
|
||||
|
||||
// the column of the matrix
|
||||
final double[] log10Likelihoods;
|
||||
|
||||
int sum = -1;
|
||||
|
||||
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||
this.ACcounts = ACcounts;
|
||||
log10Likelihoods = new double[size];
|
||||
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
// sum of all the non-reference alleles
|
||||
public int getACsum() {
|
||||
if ( sum == -1 ) {
|
||||
sum = 0;
|
||||
for ( int count : ACcounts.getCounts() )
|
||||
sum += count;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
protected static final class MaxLikelihoodSeen {
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
ExactACcounts ACs = null;
|
||||
|
||||
public MaxLikelihoodSeen() {}
|
||||
|
||||
public void update(final double maxLog10L, final ExactACcounts ACs) {
|
||||
this.maxLog10L = maxLog10L;
|
||||
this.ACs = ACs;
|
||||
}
|
||||
|
||||
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
final int[] myACcounts = this.ACs.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue