Merge pull request #1069 from broadinstitute/vrr_ad_genotype_gvcfs_bugfix

Fix AD propagation when subsetting alleles in non-diploid GenotypeGVCF.
This commit is contained in:
Valentin Ruano Rubio 2015-07-22 18:53:43 -04:00
commit 66cf22b28f
3 changed files with 18 additions and 72 deletions

View File

@ -66,9 +66,6 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
private final static boolean VERBOSE = false;
protected GeneralPloidyExactAFCalculator() {
}
@ -229,9 +226,6 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
// clean up memory
indexesToACset.remove(ACset.getACcounts());
if ( VERBOSE )
System.out.printf(" *** removing used set=%s%n", ACset.getACcounts());
}
return newPool;
}
@ -301,68 +295,6 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
}
// /**
// * Naive combiner of two multiallelic pools - number of alt alleles must be the same.
// * Math is generalization of biallelic combiner.
// *
// * For vector K representing an allele count conformation,
// * Pr(D | AC = K) = Sum_G Pr(D|AC1 = G) Pr (D|AC2=K-G) * F(G,K)
// * where F(G,K) = choose(m1,[g0 g1 ...])*choose(m2,[...]) / choose(m1+m2,[k1 k2 ...])
// * @param originalPool First log-likelihood pool GL vector
// * @param yy Second pool GL vector
// * @param ploidy1 Ploidy of first pool (# of chromosomes in it)
// * @param ploidy2 Ploidy of second pool
// * @param numAlleles Number of alleles
// * @param log10AlleleFrequencyPriors Array of biallelic priors
// * @param resultTracker Af calculation result object
// */
// public static void combineMultiallelicPoolNaively(CombinedPoolLikelihoods originalPool, double[] yy, int ploidy1, int ploidy2, int numAlleles,
// final double[] log10AlleleFrequencyPriors,
// final AFCalcResultTracker resultTracker) {
///*
// final int dim1 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy1);
// final int dim2 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy2);
//
// if (dim1 != originalPool.getLength() || dim2 != yy.length)
// throw new ReviewedGATKException("BUG: Inconsistent vector length");
//
// if (ploidy2 == 0)
// return;
//
// final int newPloidy = ploidy1 + ploidy2;
//
// // Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
// // and L2(K) = Pr(D|AC2=K) * choose(m2,K)
// GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
// final double[] x = originalPool.getLikelihoodsAsVector(true);
// while(firstIterator.hasNext()) {
// x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
// firstIterator.next();
// }
//
// GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
// final double[] y = yy.clone();
// while(secondIterator.hasNext()) {
// y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
// secondIterator.next();
// }
//
// // initialize output to -log10(choose(m1+m2,[k1 k2...])
// final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
// final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
//
//
// // Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
// while(outputIterator.hasNext()) {
// final ExactACset set = new ExactACset(1, new ExactACcounts(outputIterator.getCurrentAltVector()));
// double likelihood = computeLofK(set, x,y, log10AlleleFrequencyPriors, numAlleles, ploidy1, ploidy2, result);
//
// originalPool.add(likelihood, set, outputIterator.getLinearIndex());
// outputIterator.next();
// }
//*/
// }
/**
* Compute likelihood of a particular AC conformation and update AFresult object
* @param set Set of AC counts to compute
@ -479,8 +411,9 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
* for a sample.
* @param allelesToUse alleles to subset
* @param assignGenotypes true: assign hard genotypes, false: leave as no-call
* @return GenotypesContext with new PLs
* @return GenotypesContext with new PLs and AD.
*/
@Override
public GenotypesContext subsetAlleles(final VariantContext vc, final int defaultPloidy,
final List<Allele> allelesToUse,
final boolean assignGenotypes) {
@ -544,8 +477,7 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
}
}
return newGTs;
return GATKVariantContextUtils.fixADFromSubsettedAlleles(newGTs, vc, allelesToUse);
}
/**

View File

@ -514,4 +514,15 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
spec.disableShadowBCF();
executeTest("testGenotypingSpanningDeletionOverSpan", spec);
}
@Test(enabled = true)
public void testBadADPropagationHaploidBugTest() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "ad-bug-input.vcf",
1,
Arrays.asList("ba04d9401d330ed8e31fdacc8b720d12"));
spec.disableShadowBCF();
executeTest("testBadADPropagationHaploidBugTest", spec);
}
}

View File

@ -1214,7 +1214,10 @@ public class GATKVariantContextUtils {
* @param allelesToUse the new (sub)set of alleles to use
* @return a new non-null GenotypesContext
*/
static private GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
public static GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
if (originalGs == null) throw new IllegalArgumentException("the original Gs cannot be null");
if (originalVC == null) throw new IllegalArgumentException("the original VC cannot be null");
if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use list cannot be null");
// the bitset representing the allele indexes we want to keep
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);