Function to compute the max achievable AC for each alt allele

-- Additional minor cleanup of ExactAFCalculation
This commit is contained in:
Mark DePristo 2012-10-04 19:38:48 -07:00
parent b3cb33a416
commit 7666a58773
6 changed files with 212 additions and 114 deletions

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -58,7 +57,7 @@ public class ExactAFCalculationPerformanceTest {
final double[] priors = testBuilder.makePriors();
for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) {
final VariantContext vc = testBuilder.makeACTest(ACs, nonTypePL);
final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL);
timer.start();
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
@ -115,7 +114,7 @@ public class ExactAFCalculationPerformanceTest {
final int[] ac = new int[testBuilder.numAltAlleles];
ac[0] = 1;
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
final VariantContext vc = testBuilder.makeACTest(ac, 0, nonTypePL);
for ( int position = 0; position < vc.getNSamples(); position++ ) {
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
@ -149,19 +148,12 @@ public class ExactAFCalculationPerformanceTest {
final int[] ac = new int[testBuilder.numAltAlleles];
ac[0] = 1;
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
final Genotype nonInformative = testBuilder.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0);
for ( int nNonInformative = 0; nNonInformative < vc.getNSamples(); nNonInformative++ ) {
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
final List<Genotype> genotypes = new ArrayList<Genotype>();
genotypes.addAll(vc.getGenotypes().subList(0, nNonInformative + 1));
genotypes.addAll(Collections.nCopies(vc.getNSamples() - nNonInformative, nonInformative));
vcb.genotypes(genotypes);
for ( int nNonInformative = 0; nNonInformative < testBuilder.nSamples; nNonInformative++ ) {
final VariantContext vc = testBuilder.makeACTest(ac, nNonInformative, nonTypePL);
timer.start();
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors);
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
final long runtime = timer.getElapsedTimeNano();
final List<Object> columns = new LinkedList<Object>(coreValues);

View File

@ -1,11 +1,13 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class ExactAFCalculationTestBuilder {
@ -68,7 +70,11 @@ public class ExactAFCalculationTestBuilder {
}
}
public VariantContext makeACTest(final int[] ACs, final int nonTypePL) {
public VariantContext makeACTest(final List<Integer> ACs, final int nNonInformative, final int nonTypePL) {
return makeACTest(ArrayUtils.toPrimitive(ACs.toArray(new Integer[]{})), nNonInformative, nonTypePL);
}
public VariantContext makeACTest(final int[] ACs, final int nNonInformative, final int nonTypePL) {
final int nChrom = nSamples * 2;
final int[] nhet = new int[numAltAlleles];
@ -76,7 +82,7 @@ public class ExactAFCalculationTestBuilder {
for ( int i = 0; i < ACs.length; i++ ) {
final double p = ACs[i] / (1.0 * nChrom);
nhomvar[i] = (int)Math.floor(nSamples * p * p);
nhomvar[i] = (int)Math.floor((nSamples - nNonInformative) * p * p);
nhet[i] = ACs[i] - 2 * nhomvar[i];
if ( nhet[i] < 0 )
@ -87,10 +93,10 @@ public class ExactAFCalculationTestBuilder {
if ( calcAC != MathUtils.sum(ACs) )
throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + Utils.join(",", ACs));
return makeACTest(nhet, nhomvar, nonTypePL);
return makeACTest(nhet, nhomvar, nNonInformative, nonTypePL);
}
public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nonTypePL) {
public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nNonInformative, final int nonTypePL) {
List<Genotype> samples = new ArrayList<Genotype>(nSamples);
for ( int altI = 0; altI < nhet.length; altI++ ) {
@ -100,8 +106,12 @@ public class ExactAFCalculationTestBuilder {
samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL, altI+1));
}
final int nRef = (int)(nSamples - MathUtils.sum(nhet) - MathUtils.sum(nhomvar));
for ( int i = 0; i < nRef; i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL, 0));
final int[] nonInformativePLs = new int[GenotypeLikelihoods.numLikelihoods(numAltAlleles, 2)];
final Genotype nonInformative = makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), nonInformativePLs);
samples.addAll(Collections.nCopies(nNonInformative, nonInformative));
final int nRef = Math.max((int) (nSamples - nNonInformative - MathUtils.sum(nhet) - MathUtils.sum(nhomvar)), 0);
samples.addAll(Collections.nCopies(nRef, makePL(GenotypeType.HOM_REF, nonTypePL, 0)));
samples = samples.subList(0, nSamples);

View File

@ -54,12 +54,12 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
@Override
protected VariantContext reduceScope(VariantContext vc) {
// don't try to genotype too many alternate alleles
if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) {
logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
if ( vc.getAlternateAlleles().size() > maxAltAlleles) {
logger.warn("this tool is currently set to genotype at most " + maxAltAlleles + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
final List<Allele> alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
final List<Allele> alleles = new ArrayList<Allele>(maxAltAlleles + 1);
alleles.add(vc.getReference());
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE, ploidy));
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, maxAltAlleles, ploidy));
VariantContextBuilder builder = new VariantContextBuilder(vc);
builder.alleles(alleles);

View File

@ -345,4 +345,47 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
+ expectedAC_AC + " priors " + Utils.join(",", priors));
}
}
@DataProvider(name = "MaxACsToVisit")
public Object[][] makeMaxACsToVisit() {
List<Object[]> tests = new ArrayList<Object[]>();
final int nSamples = 10;
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact;
for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) {
final int nChrom = (nSamples - nNonInformative) * 2;
for ( int i = 0; i < nChrom; i++ ) {
// bi-allelic
tests.add(new Object[]{nSamples, Arrays.asList(i), nNonInformative, modelType});
// tri-allelic
for ( int j = 0; j < (nChrom - i); j++)
tests.add(new Object[]{nSamples, Arrays.asList(i, j), nNonInformative, modelType});
}
}
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "MaxACsToVisit")
public void testMaxACsToVisit(final int nSamples, final List<Integer> requestedACs, final int nNonInformative, final ExactAFCalculationTestBuilder.ModelType modelType) {
final int nAlts = requestedACs.size();
final ExactAFCalculationTestBuilder testBuilder
= new ExactAFCalculationTestBuilder(nSamples, nAlts, modelType,
ExactAFCalculationTestBuilder.PriorType.human);
final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100);
final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc);
// this is necessary because cannot ensure that the tester gives us back the requested ACs due
// to rounding errors
final List<Integer> ACs = new ArrayList<Integer>();
for ( final Allele a : vc.getAlternateAlleles() )
ACs.add(vc.getCalledChrCount(a));
for ( int i = 0; i < nAlts; i++ ) {
Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i);
}
}
}

View File

@ -102,7 +102,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
*/
public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) {
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(getMaxAltAlleles()));
}
/**
@ -183,6 +183,17 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
final boolean assignGenotypes,
final int ploidy);
// ---------------------------------------------------------------------------
//
// accessors
//
// ---------------------------------------------------------------------------
public int getMaxAltAlleles() {
return Math.max(MAX_ALTERNATE_ALLELES_TO_GENOTYPE, MAX_ALTERNATE_ALLELES_FOR_INDELS);
}
// ---------------------------------------------------------------------------
//
// Print information about the call to the calls log

View File

@ -27,9 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.File;
import java.io.PrintStream;
@ -85,105 +83,149 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
return genotypeLikelihoods;
}
protected int[] computeMaxACs(final VariantContext vc) {
final int nAlleles = vc.getNAlleles();
final int[] maxACs = new int[nAlleles-1];
for ( int altI = 0; altI < nAlleles-1; altI++ ) {
maxACs[altI] = computeMaxAC(vc, altI+1, nAlleles);
}
return maxACs;
}
private int computeMaxAC(final VariantContext vc, final int altI, final int nAlleles) {
int maxAC = 0;
for ( final Genotype g : vc.getGenotypes() ) {
final int gMaxAlt = computeAC(g, altI, nAlleles);
maxAC += gMaxAlt;
}
return maxAC;
}
private int computeAC(final Genotype g, final int altI, final int nAlleles) {
final int[] PLs = g.getLikelihoods().getAsPLs();
final int refPL = PLs[0];
if ( refPL == 0 ) // if ref is most likely, return 0
return 0;
final int homPL = PLs[GenotypeLikelihoods.calculatePLindex(altI, altI)];
if (homPL < refPL) // if hom-var is < ref, our max possible is 2
return 2;
for ( int i = 0; i < nAlleles; i++ ) {
final int one = i < altI ? i : altI;
final int two = i < altI ? altI : i;
final int hetPL = PLs[GenotypeLikelihoods.calculatePLindex(one, two)];
if ( hetPL < refPL ) // if het has PL < ref, we must check AC = 1
return 1;
}
return 0; // in this case REF is the most likely but in fact another allele is best
}
// -------------------------------------------------------------------------------------
//
// protected classes used to store exact model matrix columns
//
// -------------------------------------------------------------------------------------
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
// a wrapper around the int array so that we can make it hashable
protected static final class ExactACcounts {
// a wrapper around the int array so that we can make it hashable
protected static final class ExactACcounts {
protected final int[] counts;
private int hashcode = -1;
protected final int[] counts;
private int hashcode = -1;
public ExactACcounts(final int[] counts) {
this.counts = counts;
}
public int[] getCounts() {
return counts;
}
@Override
public boolean equals(Object obj) {
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts);
}
@Override
public int hashCode() {
if ( hashcode == -1 )
hashcode = Arrays.hashCode(counts);
return hashcode;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(counts[0]);
for ( int i = 1; i < counts.length; i++ ) {
sb.append("/");
sb.append(counts[i]);
}
return sb.toString();
}
public ExactACcounts(final int[] counts) {
this.counts = counts;
}
// This class represents a column in the Exact AC calculation matrix
protected static final class ExactACset {
// the counts of the various alternate alleles which this column represents
final ExactACcounts ACcounts;
// the column of the matrix
final double[] log10Likelihoods;
int sum = -1;
public ExactACset(final int size, final ExactACcounts ACcounts) {
this.ACcounts = ACcounts;
log10Likelihoods = new double[size];
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
}
// sum of all the non-reference alleles
public int getACsum() {
if ( sum == -1 ) {
sum = 0;
for ( int count : ACcounts.getCounts() )
sum += count;
}
return sum;
}
public boolean equals(Object obj) {
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
}
public int[] getCounts() {
return counts;
}
protected static final class MaxLikelihoodSeen {
double maxLog10L = Double.NEGATIVE_INFINITY;
ExactACcounts ACs = null;
public MaxLikelihoodSeen() {}
public void update(final double maxLog10L, final ExactACcounts ACs) {
this.maxLog10L = maxLog10L;
this.ACs = ACs;
}
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
public boolean isLowerAC(final ExactACcounts otherACs) {
final int[] myACcounts = this.ACs.getCounts();
final int[] otherACcounts = otherACs.getCounts();
for ( int i = 0; i < myACcounts.length; i++ ) {
if ( myACcounts[i] > otherACcounts[i] )
return false;
}
return true;
}
@Override
public boolean equals(Object obj) {
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts);
}
@Override
public int hashCode() {
if ( hashcode == -1 )
hashcode = Arrays.hashCode(counts);
return hashcode;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(counts[0]);
for ( int i = 1; i < counts.length; i++ ) {
sb.append("/");
sb.append(counts[i]);
}
return sb.toString();
}
}
// This class represents a column in the Exact AC calculation matrix
protected static final class ExactACset {
// the counts of the various alternate alleles which this column represents
final ExactACcounts ACcounts;
// the column of the matrix
final double[] log10Likelihoods;
int sum = -1;
public ExactACset(final int size, final ExactACcounts ACcounts) {
this.ACcounts = ACcounts;
log10Likelihoods = new double[size];
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
}
// sum of all the non-reference alleles
public int getACsum() {
if ( sum == -1 ) {
sum = 0;
for ( int count : ACcounts.getCounts() )
sum += count;
}
return sum;
}
public boolean equals(Object obj) {
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
}
}
protected static final class MaxLikelihoodSeen {
double maxLog10L = Double.NEGATIVE_INFINITY;
ExactACcounts ACs = null;
public MaxLikelihoodSeen() {}
public void update(final double maxLog10L, final ExactACcounts ACs) {
this.maxLog10L = maxLog10L;
this.ACs = ACs;
}
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
public boolean isLowerAC(final ExactACcounts otherACs) {
final int[] myACcounts = this.ACs.getCounts();
final int[] otherACcounts = otherACs.getCounts();
for ( int i = 0; i < myACcounts.length; i++ ) {
if ( myACcounts[i] > otherACcounts[i] )
return false;
}
return true;
}
}
}