Function to compute the max achievable AC for each alt allele
-- Additional minor cleanup of ExactAFCalculation
This commit is contained in:
parent
b3cb33a416
commit
7666a58773
|
|
@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.report.GATKReport;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
|
@ -58,7 +57,7 @@ public class ExactAFCalculationPerformanceTest {
|
|||
final double[] priors = testBuilder.makePriors();
|
||||
|
||||
for ( int[] ACs : makeACs(testBuilder.numAltAlleles, testBuilder.nSamples*2) ) {
|
||||
final VariantContext vc = testBuilder.makeACTest(ACs, nonTypePL);
|
||||
final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL);
|
||||
|
||||
timer.start();
|
||||
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
|
||||
|
|
@ -115,7 +114,7 @@ public class ExactAFCalculationPerformanceTest {
|
|||
|
||||
final int[] ac = new int[testBuilder.numAltAlleles];
|
||||
ac[0] = 1;
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, 0, nonTypePL);
|
||||
|
||||
for ( int position = 0; position < vc.getNSamples(); position++ ) {
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
|
||||
|
|
@ -149,19 +148,12 @@ public class ExactAFCalculationPerformanceTest {
|
|||
|
||||
final int[] ac = new int[testBuilder.numAltAlleles];
|
||||
ac[0] = 1;
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, nonTypePL);
|
||||
final Genotype nonInformative = testBuilder.makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 0, 0, 0);
|
||||
|
||||
for ( int nNonInformative = 0; nNonInformative < vc.getNSamples(); nNonInformative++ ) {
|
||||
final VariantContextBuilder vcb = new VariantContextBuilder(vc);
|
||||
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
genotypes.addAll(vc.getGenotypes().subList(0, nNonInformative + 1));
|
||||
genotypes.addAll(Collections.nCopies(vc.getNSamples() - nNonInformative, nonInformative));
|
||||
vcb.genotypes(genotypes);
|
||||
for ( int nNonInformative = 0; nNonInformative < testBuilder.nSamples; nNonInformative++ ) {
|
||||
final VariantContext vc = testBuilder.makeACTest(ac, nNonInformative, nonTypePL);
|
||||
|
||||
timer.start();
|
||||
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vcb.make(), priors);
|
||||
final AlleleFrequencyCalculationResult result = calc.getLog10PNonRef(vc, priors);
|
||||
final long runtime = timer.getElapsedTimeNano();
|
||||
|
||||
final List<Object> columns = new LinkedList<Object>(coreValues);
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class ExactAFCalculationTestBuilder {
|
||||
|
|
@ -68,7 +70,11 @@ public class ExactAFCalculationTestBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
public VariantContext makeACTest(final int[] ACs, final int nonTypePL) {
|
||||
public VariantContext makeACTest(final List<Integer> ACs, final int nNonInformative, final int nonTypePL) {
|
||||
return makeACTest(ArrayUtils.toPrimitive(ACs.toArray(new Integer[]{})), nNonInformative, nonTypePL);
|
||||
}
|
||||
|
||||
public VariantContext makeACTest(final int[] ACs, final int nNonInformative, final int nonTypePL) {
|
||||
final int nChrom = nSamples * 2;
|
||||
|
||||
final int[] nhet = new int[numAltAlleles];
|
||||
|
|
@ -76,7 +82,7 @@ public class ExactAFCalculationTestBuilder {
|
|||
|
||||
for ( int i = 0; i < ACs.length; i++ ) {
|
||||
final double p = ACs[i] / (1.0 * nChrom);
|
||||
nhomvar[i] = (int)Math.floor(nSamples * p * p);
|
||||
nhomvar[i] = (int)Math.floor((nSamples - nNonInformative) * p * p);
|
||||
nhet[i] = ACs[i] - 2 * nhomvar[i];
|
||||
|
||||
if ( nhet[i] < 0 )
|
||||
|
|
@ -87,10 +93,10 @@ public class ExactAFCalculationTestBuilder {
|
|||
if ( calcAC != MathUtils.sum(ACs) )
|
||||
throw new IllegalStateException("calculated AC " + calcAC + " not equal to desired AC " + Utils.join(",", ACs));
|
||||
|
||||
return makeACTest(nhet, nhomvar, nonTypePL);
|
||||
return makeACTest(nhet, nhomvar, nNonInformative, nonTypePL);
|
||||
}
|
||||
|
||||
public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nonTypePL) {
|
||||
public VariantContext makeACTest(final int[] nhet, final int[] nhomvar, final int nNonInformative, final int nonTypePL) {
|
||||
List<Genotype> samples = new ArrayList<Genotype>(nSamples);
|
||||
|
||||
for ( int altI = 0; altI < nhet.length; altI++ ) {
|
||||
|
|
@ -100,8 +106,12 @@ public class ExactAFCalculationTestBuilder {
|
|||
samples.add(makePL(GenotypeType.HOM_VAR, nonTypePL, altI+1));
|
||||
}
|
||||
|
||||
final int nRef = (int)(nSamples - MathUtils.sum(nhet) - MathUtils.sum(nhomvar));
|
||||
for ( int i = 0; i < nRef; i++ ) samples.add(makePL(GenotypeType.HOM_REF, nonTypePL, 0));
|
||||
final int[] nonInformativePLs = new int[GenotypeLikelihoods.numLikelihoods(numAltAlleles, 2)];
|
||||
final Genotype nonInformative = makePL(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), nonInformativePLs);
|
||||
samples.addAll(Collections.nCopies(nNonInformative, nonInformative));
|
||||
|
||||
final int nRef = Math.max((int) (nSamples - nNonInformative - MathUtils.sum(nhet) - MathUtils.sum(nhomvar)), 0);
|
||||
samples.addAll(Collections.nCopies(nRef, makePL(GenotypeType.HOM_REF, nonTypePL, 0)));
|
||||
|
||||
samples = samples.subList(0, nSamples);
|
||||
|
||||
|
|
|
|||
|
|
@ -54,12 +54,12 @@ public class GeneralPloidyExactAFCalculation extends ExactAFCalculation {
|
|||
@Override
|
||||
protected VariantContext reduceScope(VariantContext vc) {
|
||||
// don't try to genotype too many alternate alleles
|
||||
if ( vc.getAlternateAlleles().size() > MAX_ALTERNATE_ALLELES_TO_GENOTYPE ) {
|
||||
logger.warn("this tool is currently set to genotype at most " + MAX_ALTERNATE_ALLELES_TO_GENOTYPE + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
if ( vc.getAlternateAlleles().size() > maxAltAlleles) {
|
||||
logger.warn("this tool is currently set to genotype at most " + maxAltAlleles + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
|
||||
final List<Allele> alleles = new ArrayList<Allele>(maxAltAlleles + 1);
|
||||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE, ploidy));
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, maxAltAlleles, ploidy));
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
builder.alleles(alleles);
|
||||
|
|
|
|||
|
|
@ -345,4 +345,47 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
+ expectedAC_AC + " priors " + Utils.join(",", priors));
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "MaxACsToVisit")
|
||||
public Object[][] makeMaxACsToVisit() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final int nSamples = 10;
|
||||
final ExactAFCalculationTestBuilder.ModelType modelType = ExactAFCalculationTestBuilder.ModelType.DiploidExact;
|
||||
|
||||
for (int nNonInformative = 0; nNonInformative < nSamples - 1; nNonInformative++ ) {
|
||||
final int nChrom = (nSamples - nNonInformative) * 2;
|
||||
for ( int i = 0; i < nChrom; i++ ) {
|
||||
// bi-allelic
|
||||
tests.add(new Object[]{nSamples, Arrays.asList(i), nNonInformative, modelType});
|
||||
|
||||
// tri-allelic
|
||||
for ( int j = 0; j < (nChrom - i); j++)
|
||||
tests.add(new Object[]{nSamples, Arrays.asList(i, j), nNonInformative, modelType});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "MaxACsToVisit")
|
||||
public void testMaxACsToVisit(final int nSamples, final List<Integer> requestedACs, final int nNonInformative, final ExactAFCalculationTestBuilder.ModelType modelType) {
|
||||
final int nAlts = requestedACs.size();
|
||||
final ExactAFCalculationTestBuilder testBuilder
|
||||
= new ExactAFCalculationTestBuilder(nSamples, nAlts, modelType,
|
||||
ExactAFCalculationTestBuilder.PriorType.human);
|
||||
|
||||
final VariantContext vc = testBuilder.makeACTest(requestedACs, nNonInformative, 100);
|
||||
final int[] maxACsToVisit = testBuilder.makeModel().computeMaxACs(vc);
|
||||
|
||||
// this is necessary because cannot ensure that the tester gives us back the requested ACs due
|
||||
// to rounding errors
|
||||
final List<Integer> ACs = new ArrayList<Integer>();
|
||||
for ( final Allele a : vc.getAlternateAlleles() )
|
||||
ACs.add(vc.getCalledChrCount(a));
|
||||
|
||||
for ( int i = 0; i < nAlts; i++ ) {
|
||||
Assert.assertEquals(maxACsToVisit[i], (int)ACs.get(i), "Maximum AC computed wasn't equal to the max possible in the construction for alt allele " + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -102,7 +102,7 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
|||
*/
|
||||
public final AlleleFrequencyCalculationResult getLog10PNonRef(final VariantContext vc,
|
||||
final double[] log10AlleleFrequencyPriors) {
|
||||
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
|
||||
return getLog10PNonRef(vc, log10AlleleFrequencyPriors, new AlleleFrequencyCalculationResult(getMaxAltAlleles()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -183,6 +183,17 @@ public abstract class AlleleFrequencyCalculation implements Cloneable {
|
|||
final boolean assignGenotypes,
|
||||
final int ploidy);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// accessors
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
public int getMaxAltAlleles() {
|
||||
return Math.max(MAX_ALTERNATE_ALLELES_TO_GENOTYPE, MAX_ALTERNATE_ALLELES_FOR_INDELS);
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
//
|
||||
// Print information about the call to the calls log
|
||||
|
|
|
|||
|
|
@ -27,9 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -85,105 +83,149 @@ abstract class ExactAFCalculation extends AlleleFrequencyCalculation {
|
|||
return genotypeLikelihoods;
|
||||
}
|
||||
|
||||
protected int[] computeMaxACs(final VariantContext vc) {
|
||||
final int nAlleles = vc.getNAlleles();
|
||||
final int[] maxACs = new int[nAlleles-1];
|
||||
|
||||
for ( int altI = 0; altI < nAlleles-1; altI++ ) {
|
||||
maxACs[altI] = computeMaxAC(vc, altI+1, nAlleles);
|
||||
}
|
||||
|
||||
return maxACs;
|
||||
}
|
||||
|
||||
private int computeMaxAC(final VariantContext vc, final int altI, final int nAlleles) {
|
||||
int maxAC = 0;
|
||||
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final int gMaxAlt = computeAC(g, altI, nAlleles);
|
||||
maxAC += gMaxAlt;
|
||||
}
|
||||
|
||||
return maxAC;
|
||||
}
|
||||
|
||||
private int computeAC(final Genotype g, final int altI, final int nAlleles) {
|
||||
final int[] PLs = g.getLikelihoods().getAsPLs();
|
||||
|
||||
final int refPL = PLs[0];
|
||||
if ( refPL == 0 ) // if ref is most likely, return 0
|
||||
return 0;
|
||||
|
||||
final int homPL = PLs[GenotypeLikelihoods.calculatePLindex(altI, altI)];
|
||||
if (homPL < refPL) // if hom-var is < ref, our max possible is 2
|
||||
return 2;
|
||||
|
||||
for ( int i = 0; i < nAlleles; i++ ) {
|
||||
final int one = i < altI ? i : altI;
|
||||
final int two = i < altI ? altI : i;
|
||||
final int hetPL = PLs[GenotypeLikelihoods.calculatePLindex(one, two)];
|
||||
if ( hetPL < refPL ) // if het has PL < ref, we must check AC = 1
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0; // in this case REF is the most likely but in fact another allele is best
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// protected classes used to store exact model matrix columns
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||
|
||||
// a wrapper around the int array so that we can make it hashable
|
||||
protected static final class ExactACcounts {
|
||||
// a wrapper around the int array so that we can make it hashable
|
||||
protected static final class ExactACcounts {
|
||||
|
||||
protected final int[] counts;
|
||||
private int hashcode = -1;
|
||||
protected final int[] counts;
|
||||
private int hashcode = -1;
|
||||
|
||||
public ExactACcounts(final int[] counts) {
|
||||
this.counts = counts;
|
||||
}
|
||||
|
||||
public int[] getCounts() {
|
||||
return counts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if ( hashcode == -1 )
|
||||
hashcode = Arrays.hashCode(counts);
|
||||
return hashcode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(counts[0]);
|
||||
for ( int i = 1; i < counts.length; i++ ) {
|
||||
sb.append("/");
|
||||
sb.append(counts[i]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
public ExactACcounts(final int[] counts) {
|
||||
this.counts = counts;
|
||||
}
|
||||
|
||||
// This class represents a column in the Exact AC calculation matrix
|
||||
protected static final class ExactACset {
|
||||
|
||||
// the counts of the various alternate alleles which this column represents
|
||||
final ExactACcounts ACcounts;
|
||||
|
||||
// the column of the matrix
|
||||
final double[] log10Likelihoods;
|
||||
|
||||
int sum = -1;
|
||||
|
||||
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||
this.ACcounts = ACcounts;
|
||||
log10Likelihoods = new double[size];
|
||||
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
// sum of all the non-reference alleles
|
||||
public int getACsum() {
|
||||
if ( sum == -1 ) {
|
||||
sum = 0;
|
||||
for ( int count : ACcounts.getCounts() )
|
||||
sum += count;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
||||
}
|
||||
public int[] getCounts() {
|
||||
return counts;
|
||||
}
|
||||
|
||||
protected static final class MaxLikelihoodSeen {
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
ExactACcounts ACs = null;
|
||||
|
||||
public MaxLikelihoodSeen() {}
|
||||
|
||||
public void update(final double maxLog10L, final ExactACcounts ACs) {
|
||||
this.maxLog10L = maxLog10L;
|
||||
this.ACs = ACs;
|
||||
}
|
||||
|
||||
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
final int[] myACcounts = this.ACs.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if ( hashcode == -1 )
|
||||
hashcode = Arrays.hashCode(counts);
|
||||
return hashcode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(counts[0]);
|
||||
for ( int i = 1; i < counts.length; i++ ) {
|
||||
sb.append("/");
|
||||
sb.append(counts[i]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
// This class represents a column in the Exact AC calculation matrix
|
||||
protected static final class ExactACset {
|
||||
|
||||
// the counts of the various alternate alleles which this column represents
|
||||
final ExactACcounts ACcounts;
|
||||
|
||||
// the column of the matrix
|
||||
final double[] log10Likelihoods;
|
||||
|
||||
int sum = -1;
|
||||
|
||||
public ExactACset(final int size, final ExactACcounts ACcounts) {
|
||||
this.ACcounts = ACcounts;
|
||||
log10Likelihoods = new double[size];
|
||||
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
// sum of all the non-reference alleles
|
||||
public int getACsum() {
|
||||
if ( sum == -1 ) {
|
||||
sum = 0;
|
||||
for ( int count : ACcounts.getCounts() )
|
||||
sum += count;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
protected static final class MaxLikelihoodSeen {
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
ExactACcounts ACs = null;
|
||||
|
||||
public MaxLikelihoodSeen() {}
|
||||
|
||||
public void update(final double maxLog10L, final ExactACcounts ACs) {
|
||||
this.maxLog10L = maxLog10L;
|
||||
this.ACs = ACs;
|
||||
}
|
||||
|
||||
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
final int[] myACcounts = this.ACs.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue