Function to compute the max achievable AC for each alt allele

-- Additional minor cleanup of ExactAFCalculation
This commit is contained in:
Mark DePristo 2012-10-04 19:38:48 -07:00
parent b3cb33a416
commit 7666a58773
6 changed files with 212 additions and 114 deletions

View File

@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -58,7 +57,7 @@ public class ExactAFCalculationPerformanceTest {
final double[] priors = testBuilder.makePriors(); final double[] priors = testBuilder.makePriors();
for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) { for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) {
final VariantContext vc = testBuilder.makeACTest(ACs, nonTypePL); final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL);
timer.start(); timer.start();
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors); final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
@ -115,7 +114,7 @@ public class ExactAFCalculationPerformanceTest {
final int[] ac = new int[testBuilder.numAltAlleles]; final int[] ac = new int[testBuilder.numAltAlleles];
ac[0] = 1; ac[0] = 1;
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL); final VariantContext vc = testBuilder.makeACTest(ac, 0, nonTypePL);
for ( int position = 0; position < vc.getNSamples(); position++ ) { for ( int position = 0; position < vc.getNSamples(); position++ ) {
final VariantContextBuilder vcb = new VariantContextBuilder(vc); final VariantContextBuilder vcb = new VariantContextBuilder(vc);
@ -149,19 +148,12 @@ public class ExactAFCalculationPerformanceTest {
final int[] ac = new int[testBuilder.numAltAlleles]; final int[] ac = new int[testBuilder.numAltAlleles];
ac[0] = 1; ac[0] = 1;
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
final Genotype nonInformative = testBuilder.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0);
for ( int nNonInformative = 0; nNonInformative < vc.getNSamples(); nNonInformative++ ) { for ( int nNonInformative = 0; nNonInformative < testBuilder.nSamples; nNonInformative++ ) {
final VariantContextBuilder vcb = new VariantContextBuilder(vc); final VariantContext vc = testBuilder.makeACTest(ac, nNonInformative, nonTypePL);
final List<Genotype> genotypes = new ArrayList<Genotype>();
genotypes.addAll(vc.getGenotypes().subList(0, nNonInformative + 1));
genotypes.addAll(Collections.nCopies(vc.getNSamples() - nNonInformative, nonInformative));
vcb.genotypes(genotypes);
timer.start(); timer.start();
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors); final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
final long runtime = timer.getElapsedTimeNano(); final long runtime = timer.getElapsedTimeNano();
final List<Object> columns = new LinkedList<Object>(coreValues); final List<Object> columns = new LinkedList<Object>(coreValues);

View File

@ -1,11 +1,13 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.List; import java.util.List;
public class ExactAFCalculationTestBuilder { public class ExactAFCalculationTestBuilder {
@ -68,7 +70,11 @@ public class ExactAFCalculationTestBuilder {
} }
} }
public VariantContext makeACTest(final int[] ACs, final int nonTypePL) { public VariantContext makeACTest(final List<Integer> ACs, final int nNonInformative, final int nonTypePL) {
return makeACTest(ArrayUtils.toPrimitive(ACs.toArray(new Integer[]{})), nNonInformative, nonTypePL);
}
public VariantContext makeACTest(final int[] ACs, final int nNonInformative, final int nonTypePL) {
final int nChrom = nSamples * 2; final int nChrom = nSamples * 2;
final int[] nhet = new int[numAltAlleles]; final int[] nhet = new int[numAltAlleles];
@ -76,7 +82,7 @@ public class ExactAFCalculationTestBuilder {
for ( int i = 0; i < ACs.length; i++ ) { for ( int i = 0; i < ACs.length; i++ ) {
final double p = ACs[i] / (1.0 * nChrom); final double p = ACs[i] / (1.0 * nChrom);
nhomvar[i] = (int)Math.floor(nSamples * p * p); nhomvar[i] = (int)Math.floor((nSamples - nNonInformative) * p * p);
nhet[i] = ACs[i] - 2 * nhomvar[i]; nhet[i] = ACs[i] - 2 * nhomvar[i];
if ( nhet[i] < 0 ) if ( nhet[i] < 0 )
@ -87,10 +93,10 @@ public class ExactAFCalculationTestBuilder {
if ( calcAC != MathUtils.sum(ACs) ) if ( calcAC != MathUtils.sum(ACs) )
throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + Utils.join(",", ACs)); throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + Utils.join(",", ACs));
return makeACTest(nhet, nhomvar, nonTypePL); return makeACTest(nhet, nhomvar, nNonInformative, nonTypePL);
} }
public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nonTypePL) { public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nNonInformative, final int nonTypePL) {
List<Genotype> samples = new ArrayList<Genotype>(nSamples); List<Genotype> samples = new ArrayList<Genotype>(nSamples);
for ( int altI = 0; altI < nhet.length; altI++ ) { for ( int altI = 0; altI < nhet.length; altI++ ) {
@ -100,8 +106,12 @@ public class ExactAFCalculationTestBuilder {
samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL, altI+1)); samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL, altI+1));
} }
final int nRef = (int)(nSamples - MathUtils.sum(nhet) - MathUtils.sum(nhomvar)); final int[] nonInformativePLs = new int[GenotypeLikelihoods.numLikelihoods(numAltAlleles, 2)];
for ( int i = 0; i < nRef; i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL, 0)); final Genotype nonInformative = makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), nonInformativePLs);
samples.addAll(Collections.nCopies(nNonInformative, nonInformative));
final int nRef = Math.max((int) (nSamples - nNonInformative - MathUtils.sum(nhet) - MathUtils.sum(nhomvar)), 0);
samples.addAll(Collections.nCopies(nRef, makePL(GenotypeType.HOM_REF, nonTypePL, 0)));
samples = samples.subList(0, nSamples); samples = samples.subList(0, nSamples);

View File

@ -54,12 +54,12 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
@Override @Override
protected VariantContext reduceScope(VariantContext vc) { protected VariantContext reduceScope(VariantContext vc) {
// don't try to genotype too many alternate alleles // don't try to genotype too many alternate alleles
if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) { if ( vc.getAlternateAlleles().size() > maxAltAlleles) {
logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument"); logger.warn("this tool is currently set to genotype at most " + maxAltAlleles + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
final List<Allele> alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1); final List<Allele> alleles = new ArrayList<Allele>(maxAltAlleles + 1);
alleles.add(vc.getReference()); alleles.add(vc.getReference());
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE, ploidy)); alleles.addAll(chooseMostLikelyAlternateAlleles(vc, maxAltAlleles, ploidy));
VariantContextBuilder builder = new VariantContextBuilder(vc); VariantContextBuilder builder = new VariantContextBuilder(vc);
builder.alleles(alleles); builder.alleles(alleles);

View File

@ -345,4 +345,47 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
+ expectedAC_AC + " priors " + Utils.join(",", priors)); + expectedAC_AC + " priors " + Utils.join(",", priors));
} }
} }
@DataProvider(name = "MaxACsToVisit")
public Object[][] makeMaxACsToVisit() {
List<Object[]> tests = new ArrayList<Object[]>();
final int nSamples = 10;
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact;
for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) {
final int nChrom = (nSamples - nNonInformative) * 2;
for ( int i = 0; i < nChrom; i++ ) {
// bi-allelic
tests.add(new Object[]{nSamples, Arrays.asList(i), nNonInformative, modelType});
// tri-allelic
for ( int j = 0; j < (nChrom - i); j++)
tests.add(new Object[]{nSamples, Arrays.asList(i, j), nNonInformative, modelType});
}
}
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "MaxACsToVisit")
public void testMaxACsToVisit(final int nSamples, final List<Integer> requestedACs, final int nNonInformative, final ExactAFCalculationTestBuilder.ModelType modelType) {
final int nAlts = requestedACs.size();
final ExactAFCalculationTestBuilder testBuilder
= new ExactAFCalculationTestBuilder(nSamples, nAlts, modelType,
ExactAFCalculationTestBuilder.PriorType.human);
final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100);
final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc);
// this is necessary because cannot ensure that the tester gives us back the requested ACs due
// to rounding errors
final List<Integer> ACs = new ArrayList<Integer>();
for ( final Allele a : vc.getAlternateAlleles() )
ACs.add(vc.getCalledChrCount(a));
for ( int i = 0; i < nAlts; i++ ) {
Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i);
}
}
} }

View File

@ -102,7 +102,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
*/ */
public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc, public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
final double[] log10AlleleFrequencyPriors) { final double[] log10AlleleFrequencyPriors) {
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(MAX_ALTERNATE_ALLELES_TO_GENOTYPE)); return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(getMaxAltAlleles()));
} }
/** /**
@ -183,6 +183,17 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
final boolean assignGenotypes, final boolean assignGenotypes,
final int ploidy); final int ploidy);
// ---------------------------------------------------------------------------
//
// accessors
//
// ---------------------------------------------------------------------------
public int getMaxAltAlleles() {
return Math.max(MAX_ALTERNATE_ALLELES_TO_GENOTYPE, MAX_ALTERNATE_ALLELES_FOR_INDELS);
}
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// //
// Print information about the call to the calls log // Print information about the call to the calls log

View File

@ -27,9 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import java.io.File; import java.io.File;
import java.io.PrintStream; import java.io.PrintStream;
@ -85,16 +83,60 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
return genotypeLikelihoods; return genotypeLikelihoods;
} }
protected int[] computeMaxACs(final VariantContext vc) {
final int nAlleles = vc.getNAlleles();
final int[] maxACs = new int[nAlleles-1];
for ( int altI = 0; altI < nAlleles-1; altI++ ) {
maxACs[altI] = computeMaxAC(vc, altI+1, nAlleles);
}
return maxACs;
}
private int computeMaxAC(final VariantContext vc, final int altI, final int nAlleles) {
int maxAC = 0;
for ( final Genotype g : vc.getGenotypes() ) {
final int gMaxAlt = computeAC(g, altI, nAlleles);
maxAC += gMaxAlt;
}
return maxAC;
}
private int computeAC(final Genotype g, final int altI, final int nAlleles) {
final int[] PLs = g.getLikelihoods().getAsPLs();
final int refPL = PLs[0];
if ( refPL == 0 ) // if ref is most likely, return 0
return 0;
final int homPL = PLs[GenotypeLikelihoods.calculatePLindex(altI, altI)];
if (homPL < refPL) // if hom-var is < ref, our max possible is 2
return 2;
for ( int i = 0; i < nAlleles; i++ ) {
final int one = i < altI ? i : altI;
final int two = i < altI ? altI : i;
final int hetPL = PLs[GenotypeLikelihoods.calculatePLindex(one, two)];
if ( hetPL < refPL ) // if het has PL < ref, we must check AC = 1
return 1;
}
return 0; // in this case REF is the most likely but in fact another allele is best
}
// ------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------
// //
// protected classes used to store exact model matrix columns // protected classes used to store exact model matrix columns
// //
// ------------------------------------------------------------------------------------- // -------------------------------------------------------------------------------------
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
// a wrapper around the int array so that we can make it hashable // a wrapper around the int array so that we can make it hashable
protected static final class ExactACcounts { protected static final class ExactACcounts {
protected final int[] counts; protected final int[] counts;
private int hashcode = -1; private int hashcode = -1;
@ -129,10 +171,10 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
} }
return sb.toString(); return sb.toString();
} }
} }
// This class represents a column in the Exact AC calculation matrix // This class represents a column in the Exact AC calculation matrix
protected static final class ExactACset { protected static final class ExactACset {
// the counts of the various alternate alleles which this column represents // the counts of the various alternate alleles which this column represents
final ExactACcounts ACcounts; final ExactACcounts ACcounts;
@ -161,9 +203,9 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
public boolean equals(Object obj) { public boolean equals(Object obj) {
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
} }
} }
protected static final class MaxLikelihoodSeen { protected static final class MaxLikelihoodSeen {
double maxLog10L = Double.NEGATIVE_INFINITY; double maxLog10L = Double.NEGATIVE_INFINITY;
ExactACcounts ACs = null; ExactACcounts ACs = null;
@ -185,5 +227,5 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
} }
return true; return true;
} }
} }
} }