Merge pull request #1069 from broadinstitute/vrr_ad_genotype_gvcfs_bugfix
Fix AD propagation when subsetting alleles in non-diploid GenotypeGVCF.
This commit is contained in:
commit
66cf22b28f
|
|
@ -66,9 +66,6 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
|||
|
||||
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
|
||||
|
||||
|
||||
private final static boolean VERBOSE = false;
|
||||
|
||||
protected GeneralPloidyExactAFCalculator() {
|
||||
}
|
||||
|
||||
|
|
@ -229,9 +226,6 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
|||
|
||||
// clean up memory
|
||||
indexesToACset.remove(ACset.getACcounts());
|
||||
if ( VERBOSE )
|
||||
System.out.printf(" *** removing used set=%s%n", ACset.getACcounts());
|
||||
|
||||
}
|
||||
return newPool;
|
||||
}
|
||||
|
|
@ -301,68 +295,6 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
|||
}
|
||||
|
||||
|
||||
// /**
|
||||
// * Naive combiner of two multiallelic pools - number of alt alleles must be the same.
|
||||
// * Math is generalization of biallelic combiner.
|
||||
// *
|
||||
// * For vector K representing an allele count conformation,
|
||||
// * Pr(D | AC = K) = Sum_G Pr(D|AC1 = G) Pr (D|AC2=K-G) * F(G,K)
|
||||
// * where F(G,K) = choose(m1,[g0 g1 ...])*choose(m2,[...]) / choose(m1+m2,[k1 k2 ...])
|
||||
// * @param originalPool First log-likelihood pool GL vector
|
||||
// * @param yy Second pool GL vector
|
||||
// * @param ploidy1 Ploidy of first pool (# of chromosomes in it)
|
||||
// * @param ploidy2 Ploidy of second pool
|
||||
// * @param numAlleles Number of alleles
|
||||
// * @param log10AlleleFrequencyPriors Array of biallelic priors
|
||||
// * @param resultTracker Af calculation result object
|
||||
// */
|
||||
// public static void combineMultiallelicPoolNaively(CombinedPoolLikelihoods originalPool, double[] yy, int ploidy1, int ploidy2, int numAlleles,
|
||||
// final double[] log10AlleleFrequencyPriors,
|
||||
// final AFCalcResultTracker resultTracker) {
|
||||
///*
|
||||
// final int dim1 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy1);
|
||||
// final int dim2 = GenotypeLikelihoods.numLikelihoods(numAlleles, ploidy2);
|
||||
//
|
||||
// if (dim1 != originalPool.getLength() || dim2 != yy.length)
|
||||
// throw new ReviewedGATKException("BUG: Inconsistent vector length");
|
||||
//
|
||||
// if (ploidy2 == 0)
|
||||
// return;
|
||||
//
|
||||
// final int newPloidy = ploidy1 + ploidy2;
|
||||
//
|
||||
// // Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
|
||||
// // and L2(K) = Pr(D|AC2=K) * choose(m2,K)
|
||||
// GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
|
||||
// final double[] x = originalPool.getLikelihoodsAsVector(true);
|
||||
// while(firstIterator.hasNext()) {
|
||||
// x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
|
||||
// firstIterator.next();
|
||||
// }
|
||||
//
|
||||
// GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
|
||||
// final double[] y = yy.clone();
|
||||
// while(secondIterator.hasNext()) {
|
||||
// y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
|
||||
// secondIterator.next();
|
||||
// }
|
||||
//
|
||||
// // initialize output to -log10(choose(m1+m2,[k1 k2...])
|
||||
// final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
|
||||
// final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
|
||||
//
|
||||
//
|
||||
// // Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
|
||||
// while(outputIterator.hasNext()) {
|
||||
// final ExactACset set = new ExactACset(1, new ExactACcounts(outputIterator.getCurrentAltVector()));
|
||||
// double likelihood = computeLofK(set, x,y, log10AlleleFrequencyPriors, numAlleles, ploidy1, ploidy2, result);
|
||||
//
|
||||
// originalPool.add(likelihood, set, outputIterator.getLinearIndex());
|
||||
// outputIterator.next();
|
||||
// }
|
||||
//*/
|
||||
// }
|
||||
|
||||
/**
|
||||
* Compute likelihood of a particular AC conformation and update AFresult object
|
||||
* @param set Set of AC counts to compute
|
||||
|
|
@ -479,8 +411,9 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
|||
* for a sample.
|
||||
* @param allelesToUse alleles to subset
|
||||
* @param assignGenotypes true: assign hard genotypes, false: leave as no-call
|
||||
* @return GenotypesContext with new PLs
|
||||
* @return GenotypesContext with new PLs and AD.
|
||||
*/
|
||||
@Override
|
||||
public GenotypesContext subsetAlleles(final VariantContext vc, final int defaultPloidy,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes) {
|
||||
|
|
@ -544,8 +477,7 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
|
|||
}
|
||||
}
|
||||
|
||||
return newGTs;
|
||||
|
||||
return GATKVariantContextUtils.fixADFromSubsettedAlleles(newGTs, vc, allelesToUse);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -514,4 +514,15 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
spec.disableShadowBCF();
|
||||
executeTest("testGenotypingSpanningDeletionOverSpan", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBadADPropagationHaploidBugTest() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "ad-bug-input.vcf",
|
||||
1,
|
||||
Arrays.asList("ba04d9401d330ed8e31fdacc8b720d12"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBadADPropagationHaploidBugTest", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -1214,7 +1214,10 @@ public class GATKVariantContextUtils {
|
|||
* @param allelesToUse the new (sub)set of alleles to use
|
||||
* @return a new non-null GenotypesContext
|
||||
*/
|
||||
static private GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||
public static GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||
if (originalGs == null) throw new IllegalArgumentException("the original Gs cannot be null");
|
||||
if (originalVC == null) throw new IllegalArgumentException("the original VC cannot be null");
|
||||
if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use list cannot be null");
|
||||
|
||||
// the bitset representing the allele indexes we want to keep
|
||||
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
|
||||
|
|
|
|||
Loading…
Reference in New Issue