SelectVariants works with non-diploids

This commit is contained in:
Ron Levine 2016-06-13 11:39:46 -04:00
parent d3862459e9
commit 427645162b
10 changed files with 372 additions and 192 deletions

View File

@ -110,7 +110,7 @@ public abstract class DiploidExactAFCalculator extends ExactAFCalculator {
@Override @Override
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse) { protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse) {
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL); return GATKVariantContextUtils.subsetAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
} }
@Override @Override
@ -346,8 +346,8 @@ public abstract class DiploidExactAFCalculator extends ExactAFCalculator {
if (defaultPloidy != 2) if (defaultPloidy != 2)
throw new IllegalArgumentException("cannot support ploidy different than 2 and the default ploidy is " + defaultPloidy); throw new IllegalArgumentException("cannot support ploidy different than 2 and the default ploidy is " + defaultPloidy);
return allelesToUse.size() == 1 return allelesToUse.size() == 1
? GATKVariantContextUtils.subsetToRefOnly(vc, 2) ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy)
: GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, : GATKVariantContextUtils.subsetAlleles(vc, allelesToUse,
assignGenotypes ? GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN : GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL); assignGenotypes ? GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN : GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
} }
} }

View File

@ -165,7 +165,7 @@ public class FamilyLikelihoodsUtils {
//final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors); //final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors);
//update genotype types based on posteriors //update genotype types based on posteriors
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc.getAlleles(), builder, GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc.getAlleles(), genotype.getPloidy(), builder,
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, vc.getAlleles()); GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, vc.getAlleles());
builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY, builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY,

View File

@ -153,7 +153,7 @@ public class PosteriorLikelihoodsUtils {
final GenotypeBuilder builder = new GenotypeBuilder(vc1.getGenotype(genoIdx)); final GenotypeBuilder builder = new GenotypeBuilder(vc1.getGenotype(genoIdx));
builder.phased(vc1.getGenotype(genoIdx).isPhased()); builder.phased(vc1.getGenotype(genoIdx).isPhased());
if ( posteriors.get(genoIdx) != null ) { if ( posteriors.get(genoIdx) != null ) {
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc1.getAlleles(), builder, GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc1.getAlleles(), vc1.getMaxPloidy(2), builder,
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, posteriors.get(genoIdx), vc1.getAlleles()); GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, posteriors.get(genoIdx), vc1.getAlleles());
builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY, builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY,
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(posteriors.get(genoIdx)).getAsPLs())); Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(posteriors.get(genoIdx)).getAsPLs()));

View File

@ -304,7 +304,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -selectType INDEL --variant " + testFile + " -o %s --no_cmdline_in_header --minIndelSize 2", "-T SelectVariants -R " + b36KGReference + " -selectType INDEL --variant " + testFile + " -o %s --no_cmdline_in_header --minIndelSize 2",
1, 1,
Arrays.asList("ed9dc00d0551630a2eed9e81a2a357d3") Arrays.asList("ad0965edb1dbd30060afd21ba9f11bf7")
); );
executeTest("testMinIndelLengthSelection--" + testFile, spec); executeTest("testMinIndelLengthSelection--" + testFile, spec);
@ -395,7 +395,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header", "-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
1, 1,
Arrays.asList("c78a65b41edbdd386211042e8f65220b") Arrays.asList("1fc77d7f47e75a24222a358c69de7f3d")
); );
executeTest("testNoGTs--" + testFile, spec); executeTest("testNoGTs--" + testFile, spec);
@ -606,7 +606,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -IDs " + idFile + " --variant " + testFile), baseTestString(" -IDs " + idFile + " --variant " + testFile),
1, 1,
Arrays.asList("c6632b63617162455f02670174a2322a") Arrays.asList("da1117cba380345c622a6d8e52c2270b")
); );
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testKeepSelectionID--" + testFile, spec); executeTest("testKeepSelectionID--" + testFile, spec);
@ -641,7 +641,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -xlSelectType SNP --variant " + testFile + " -o %s --no_cmdline_in_header", "-T SelectVariants -R " + b36KGReference + " -xlSelectType SNP --variant " + testFile + " -o %s --no_cmdline_in_header",
1, 1,
Arrays.asList("ed9dc00d0551630a2eed9e81a2a357d3") Arrays.asList("ad0965edb1dbd30060afd21ba9f11bf7")
); );
executeTest("testExcludeSelectionType--" + testFile, spec); executeTest("testExcludeSelectionType--" + testFile, spec);
@ -782,7 +782,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12891 -trimAlternates", "-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12891 -trimAlternates",
1, 1,
ReviewedGATKException.class); Arrays.asList("7880f8a1dfae1804998b6a994574e734"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testSACNonDiploid", spec); executeTest("testSACNonDiploid", spec);
} }
@ -838,4 +838,43 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testMaxNoCall0_5", spec); executeTest("testMaxNoCall0_5", spec);
} }
@Test
public void testHaploid() {
final String testfile = privateTestDir + "haploid-multisample.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn HG00610 -select 'DP > 7'",
1,
Arrays.asList("bc6caa81836f4c94a1216babd0c1ac72"));
spec.disableShadowBCF();
executeTest("testHaploid", spec);
}
@Test
public void testTetraploid() {
final String testfile = privateTestDir + "tetraploid-multisample.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA18486 -select 'DP > 19'",
1,
Arrays.asList("4fcfa5e0ba5d39ca9f0593aa0c0f7a63"));
spec.disableShadowBCF();
executeTest("testTetraploid", spec);
}
@Test
public void testTetraDiploid() {
final String testfile = privateTestDir + "tetra-diploid.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12878 -select 'DP > 48' -trimAlternates",
1,
Arrays.asList("709782f7a07cd500d41370e6a275fcdf"));
spec.disableShadowBCF();
executeTest("testTetraDiploid", spec);
}
} }

View File

@ -95,7 +95,7 @@ public class SelectVariantsParallelIntegrationTest extends WalkerTest {
{ // new tests on b37 using testdir VCF { // new tests on b37 using testdir VCF
final String testfile = privateTestDir + "NA12878.hg19.example1.vcf"; final String testfile = privateTestDir + "NA12878.hg19.example1.vcf";
final String args = "-select 'DP > 30' -V " + testfile; final String args = "-select 'DP > 30' -V " + testfile;
new ParallelSelectTestProvider(b37KGReference, args, "64f9258e9e3024b6361abbeeeefafee9", nt); new ParallelSelectTestProvider(b37KGReference, args, "51645037428729c3a9fa0e25fc2104ad", nt);
} }
{ // AD and PL decoding race condition { // AD and PL decoding race condition
final String testfile = privateTestDir + "race_condition.vcf"; final String testfile = privateTestDir + "race_condition.vcf";

View File

@ -87,7 +87,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
" --no_cmdline_in_header " + " --no_cmdline_in_header " +
" -o %s", " -o %s",
1, 1,
Arrays.asList("f9f6418698f967ba7ca451ac1fb4cc8d") Arrays.asList("94057d7a98c1af0a7490540ea1d9b247")
); );
executeTest("testSimpleVCFStreaming", spec); executeTest("testSimpleVCFStreaming", spec);

View File

@ -626,7 +626,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
private Set<String> IDsToRemove = null; private Set<String> IDsToRemove = null;
private Map<String, VCFHeader> vcfRods; private Map<String, VCFHeader> vcfRods;
private final List<Allele> diploidNoCallAlleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private final List<Allele> diploidNoCallAlleles = GATKVariantContextUtils.noCallAlleles(2);
private final Map<Integer, Integer> ploidyToNumberOfAlleles = new HashMap<Integer, Integer>();
/** /**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
@ -844,12 +845,24 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
continue; continue;
} }
if (considerNoCallGenotypes()) { if (considerNoCallGenotypes()) {
final int numNoCallSamples = numNoCallGenotypes(vc); final int numNoCallSamples = numNoCallGenotypes(vc);
final double fractionNoCallGenotypes = samples.isEmpty() ? 0.0 : ((double) numNoCallSamples) / samples.size(); final double fractionNoCallGenotypes = samples.isEmpty() ? 0.0 : ((double) numNoCallSamples) / samples.size();
if (numNoCallSamples > maxNOCALLnumber || fractionNoCallGenotypes > maxNOCALLfraction) if (numNoCallSamples > maxNOCALLnumber || fractionNoCallGenotypes > maxNOCALLfraction)
continue; continue;
} }
// Initialize cache of PL index to a list of alleles for any ploidy
if (vc.getType() != VariantContext.Type.NO_VARIATION) {
for (final Genotype g : vc.getGenotypes()) {
if (g.getPloidy() != 0) {
if (!ploidyToNumberOfAlleles.containsKey(g.getPloidy()) || ploidyToNumberOfAlleles.get(g.getPloidy()) < vc.getNAlleles()) {
GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(vc.getNAlleles() - 1, g.getPloidy());
ploidyToNumberOfAlleles.put(g.getPloidy(), vc.getNAlleles());
}
}
}
}
VariantContext sub = subsetRecord(vc, preserveAlleles, removeUnusedAlternates); VariantContext sub = subsetRecord(vc, preserveAlleles, removeUnusedAlternates);
@ -1089,7 +1102,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
for ( Genotype genotype : newGC ) { for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction. //Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){ if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
genotypes.add(new GenotypeBuilder(genotype).alleles(diploidNoCallAlleles).noGQ().make()); final List<Allele> noCallAlleles = (genotype.getPloidy() == 2 ? diploidNoCallAlleles :
GATKVariantContextUtils.noCallAlleles(genotype.getPloidy()));
genotypes.add(new GenotypeBuilder(genotype).alleles(noCallAlleles).noGQ().make());
} }
else{ else{
genotypes.add(genotype); genotypes.add(genotype);
@ -1137,7 +1152,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
for ( final Genotype g : vc.getGenotypes() ) { for ( final Genotype g : vc.getGenotypes() ) {
if ( g.isCalled() && g.isFiltered() ) { if ( g.isCalled() && g.isFiltered() ) {
haveFilteredNoCallAlleles = true; haveFilteredNoCallAlleles = true;
genotypes.add(new GenotypeBuilder(g).alleles(diploidNoCallAlleles).make()); final List<Allele> noCallAlleles = (g.getPloidy() == 2 ? diploidNoCallAlleles :
GATKVariantContextUtils.noCallAlleles(g.getPloidy()));
genotypes.add(new GenotypeBuilder(g).alleles(noCallAlleles).make());
} }
else { else {
// increment the number called alleles and called alternate alleles // increment the number called alleles and called alternate alleles

View File

@ -109,7 +109,7 @@ public class VCFIntegrationTest extends WalkerTest {
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF; String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("122340e3dc333d2b4b79c5c0c443a3fe")); WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("8e35e142fea8891f72e577e7b1526756"));
executeTest("Test reading and writing samtools vcf", spec1); executeTest("Test reading and writing samtools vcf", spec1);
} }
@ -118,7 +118,7 @@ public class VCFIntegrationTest extends WalkerTest {
String testVCF = privateTestDir + "ex2.vcf"; String testVCF = privateTestDir + "ex2.vcf";
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
String test1 = baseCommand + "-T SelectVariants -V " + testVCF; String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("db565efb14b2fe5f00a11762751d2476")); WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("5245b967ac66b29c8e836bfa5b5b4643"));
executeTest("Test writing samtools WEx BCF example", spec1); executeTest("Test writing samtools WEx BCF example", spec1);
} }
@ -360,7 +360,7 @@ public class VCFIntegrationTest extends WalkerTest {
" -o %s "; " -o %s ";
final String name = "testBlockCompressedInput: " + testSpec.toString(); final String name = "testBlockCompressedInput: " + testSpec.toString();
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList("ce9c0bf31ee9452ac4a12a59d5814545")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList("1cc0ab636f33b9105d54bdaf2b80ad28"));
executeTest(name, spec); executeTest(name, spec);
} }

View File

@ -48,14 +48,6 @@ public class GATKVariantContextUtils {
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
/**
* Diploid NO_CALL allele list...
*
* @deprecated you should use {@link #noCallAlleles(int)} instead. It indicates the presence of a hardcoded diploid assumption which is bad.
*/
@Deprecated
public final static List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
public final static String MERGE_FILTER_PREFIX = "filterIn"; public final static String MERGE_FILTER_PREFIX = "filterIn";
public final static String MERGE_REF_IN_ALL = "ReferenceInAll"; public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll"; public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
@ -600,11 +592,11 @@ public class GATKVariantContextUtils {
* @param vc variant context with genotype likelihoods * @param vc variant context with genotype likelihoods
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC *** * @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
* @param assignGenotypes assignment strategy for the (subsetted) PLs * @param assignGenotypes assignment strategy for the (subsetted) PLs
* @return a new non-null GenotypesContext * @return a new non-null GenotypesContext with subsetted alleles
*/ */
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc, public static GenotypesContext subsetAlleles(final VariantContext vc,
final List<Allele> allelesToUse, final List<Allele> allelesToUse,
final GenotypeAssignmentMethod assignGenotypes) { final GenotypeAssignmentMethod assignGenotypes) {
if ( vc == null ) throw new IllegalArgumentException("the VariantContext cannot be null"); if ( vc == null ) throw new IllegalArgumentException("the VariantContext cannot be null");
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null"); if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
if ( allelesToUse.isEmpty() ) throw new IllegalArgumentException("must have alleles to use"); if ( allelesToUse.isEmpty() ) throw new IllegalArgumentException("must have alleles to use");
@ -615,7 +607,7 @@ public class GATKVariantContextUtils {
if (vc.getGenotypes().isEmpty()) return GenotypesContext.create(); if (vc.getGenotypes().isEmpty()) return GenotypesContext.create();
// find the likelihoods indexes to use from the used alternate alleles // find the likelihoods indexes to use from the used alternate alleles
final List<Integer> likelihoodIndexesToUse = determineDiploidLikelihoodIndexesToUse(vc, allelesToUse); final List<List<Integer>> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(vc, allelesToUse);
// find the strand allele count indexes to use from the used alternate alleles // find the strand allele count indexes to use from the used alternate alleles
final List<Integer> sacIndexesToUse = determineSACIndexesToUse(vc, allelesToUse); final List<Integer> sacIndexesToUse = determineSACIndexesToUse(vc, allelesToUse);
@ -625,26 +617,26 @@ public class GATKVariantContextUtils {
} }
/** /**
* Find the likelihood indexes to use for a selected set of diploid alleles * Find the likelihood indexes to use for a selected set of alleles
* *
* @param originalVC the original VariantContext * @param originalVC the original VariantContext
* @param allelesToUse the subset of alleles to use * @param allelesToUse the subset of alleles to use
* @return a list of PL indexes to use or null if none * @return a list of PL indexes to use or null if none
*/ */
private static List<Integer> determineDiploidLikelihoodIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) { private static List<List<Integer>> determineLikelihoodIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
if ( originalVC == null) throw new IllegalArgumentException("the original VariantContext cannot be null"); if ( originalVC == null) throw new IllegalArgumentException("the original VariantContext cannot be null");
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null"); if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
// the bitset representing the allele indexes we want to keep // the bitset representing the allele indexes we want to keep
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse); final BitSet alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
// then we can keep the PLs as is; otherwise, we determine which ones to keep // then we can keep the PLs as is; otherwise, we determine which ones to keep
if ( MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length ) if ( alleleIndexesToUse.cardinality() == alleleIndexesToUse.size() )
return null; return null;
return getDiploidLikelihoodIndexes(originalVC, alleleIndexesToUse); return getLikelihoodIndexes(originalVC, alleleIndexesToUse);
} }
/** /**
@ -654,17 +646,17 @@ public class GATKVariantContextUtils {
* @param allelesToUse the subset of alleles to use * @param allelesToUse the subset of alleles to use
* @return a list of SAC indexes to use or null if none * @return a list of SAC indexes to use or null if none
*/ */
public static List<Integer> determineSACIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) { public static List<Integer> determineSACIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
if ( originalVC == null ) throw new IllegalArgumentException("the original VC cannot be null"); if ( originalVC == null ) throw new IllegalArgumentException("the original VC cannot be null");
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null"); if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
// the bitset representing the allele indexes we want to keep // the bitset representing the allele indexes we want to keep
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse); final BitSet alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
// then we can keep the SACs as is; otherwise, we determine which ones to keep // then we can keep the SACs as is; otherwise, we determine which ones to keep
if (MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length) if (alleleIndexesToUse.cardinality() == alleleIndexesToUse.size())
return null; return null;
return getSACIndexes(alleleIndexesToUse); return getSACIndexes(alleleIndexesToUse);
@ -675,32 +667,24 @@ public class GATKVariantContextUtils {
* *
* @param originalVC the original VariantContext * @param originalVC the original VariantContext
* @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset) * @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
* @return a non-null List * @return likelihoods indexes for each genotype
*/ */
private static List<Integer> getDiploidLikelihoodIndexes(final VariantContext originalVC, final boolean[] alleleIndexesToUse) { private static List<List<Integer>> getLikelihoodIndexes(final VariantContext originalVC, BitSet alleleIndexesToUse) {
if (originalVC == null) throw new IllegalArgumentException("the original VC cannot be null"); final List<List<Integer>> likelihoodIndexesPerGenotype = new ArrayList<List<Integer>>(10);
if (alleleIndexesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null");
// All samples must be diploid for (final Genotype g : originalVC.getGenotypes()) {
for ( final Genotype g : originalVC.getGenotypes() ){ final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), g.getPloidy());
if ( g.getPloidy() != DEFAULT_PLOIDY ) final List<Integer> likelihoodIndexes = new ArrayList<>(30);
throw new ReviewedGATKException("All samples must be diploid"); for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
// consider this entry only if all the alleles are good
if ( GenotypeLikelihoods.getAlleles(PLindex, g.getPloidy()).stream().allMatch(i -> alleleIndexesToUse.get(i)) )
likelihoodIndexes.add(PLindex);
}
likelihoodIndexesPerGenotype.add(likelihoodIndexes);
} }
final List<Integer> result = new ArrayList<>(30); return likelihoodIndexesPerGenotype;
// numLikelihoods takes total # of alleles.
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), DEFAULT_PLOIDY);
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
// consider this entry only if both of the alleles are good
if ( alleleIndexesToUse[alleles.alleleIndex1] && alleleIndexesToUse[alleles.alleleIndex2] )
result.add(PLindex);
}
return result;
} }
/** /**
@ -709,15 +693,15 @@ public class GATKVariantContextUtils {
* @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset) * @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
* @return a non-null List * @return a non-null List
*/ */
private static List<Integer> getSACIndexes(final boolean[] alleleIndexesToUse) { private static List<Integer> getSACIndexes(final BitSet alleleIndexesToUse) {
if (alleleIndexesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null"); if (alleleIndexesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null");
if (alleleIndexesToUse.length == 0) throw new IllegalArgumentException("cannot have no alleles to use"); if (alleleIndexesToUse.isEmpty()) throw new IllegalArgumentException("cannot have no alleles to use");
final List<Integer> result = new ArrayList<>(2 * alleleIndexesToUse.length); final List<Integer> result = new ArrayList<>(2 * alleleIndexesToUse.size());
for (int SACindex = 0; SACindex < alleleIndexesToUse.length; SACindex++) { for (int SACindex = 0; SACindex < alleleIndexesToUse.size(); SACindex++) {
if (alleleIndexesToUse[SACindex]) { if (alleleIndexesToUse.get(SACindex)) {
result.add(2 * SACindex); result.add(2 * SACindex);
result.add(2 * SACindex + 1); result.add(2 * SACindex + 1);
} }
@ -734,19 +718,19 @@ public class GATKVariantContextUtils {
* @param allelesToUse the list of alleles to keep * @param allelesToUse the list of alleles to keep
* @return non-null bitset * @return non-null bitset
*/ */
private static boolean[] getAlleleIndexBitset(final VariantContext originalVC, final List<Allele> allelesToUse) { private static BitSet getAlleleIndexBitset(final VariantContext originalVC, final List<Allele> allelesToUse) {
if (originalVC == null) throw new IllegalArgumentException("the original VC cannot be null"); if (originalVC == null) throw new IllegalArgumentException("the original VC cannot be null");
if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null"); if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null");
final int numOriginalAltAlleles = originalVC.getNAlleles() - 1; final int numOriginalAltAlleles = originalVC.getNAlleles() - 1;
final boolean[] alleleIndexesToKeep = new boolean[numOriginalAltAlleles + 1]; final BitSet alleleIndexesToKeep = new BitSet(numOriginalAltAlleles + 1);
// the reference Allele is definitely still used // the reference Allele is always used
alleleIndexesToKeep[0] = true; alleleIndexesToKeep.set(0);
for (int i = 0; i < numOriginalAltAlleles; i++) { for (int i = 0; i < numOriginalAltAlleles; i++) {
if (allelesToUse.contains(originalVC.getAlternateAllele(i))) if (allelesToUse.contains(originalVC.getAlternateAllele(i)))
alleleIndexesToKeep[i + 1] = true; alleleIndexesToKeep.set(i + 1);
} }
return alleleIndexesToKeep; return alleleIndexesToKeep;
@ -813,7 +797,7 @@ public class GATKVariantContextUtils {
* @param originalGs the original GenotypesContext * @param originalGs the original GenotypesContext
* @param originalVC the original VariantContext * @param originalVC the original VariantContext
* @param allelesToUse the actual alleles to use with the new Genotypes * @param allelesToUse the actual alleles to use with the new Genotypes
* @param likelihoodIndexesToUse the indexes in the PL to use given the allelesToUse (@see #determineDiploidLikelihoodIndexesToUse()) * @param likelihoodIndexesToUse the indexes in the PL to use given the allelesToUse for each genotype (@see #determineLikelihoodIndexesToUse())
* @param sacIndexesToUse the indexes in the SAC to use given the allelesToUse (@see #determineSACIndexesToUse()) * @param sacIndexesToUse the indexes in the SAC to use given the allelesToUse (@see #determineSACIndexesToUse())
* @param assignGenotypes assignment strategy for the (subsetted) PLs * @param assignGenotypes assignment strategy for the (subsetted) PLs
* @return a new non-null GenotypesContext * @return a new non-null GenotypesContext
@ -821,7 +805,7 @@ public class GATKVariantContextUtils {
private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs, private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs,
final VariantContext originalVC, final VariantContext originalVC,
final List<Allele> allelesToUse, final List<Allele> allelesToUse,
final List<Integer> likelihoodIndexesToUse, final List<List<Integer>> likelihoodIndexesToUse,
final List<Integer> sacIndexesToUse, final List<Integer> sacIndexesToUse,
final GenotypeAssignmentMethod assignGenotypes) { final GenotypeAssignmentMethod assignGenotypes) {
@ -832,9 +816,6 @@ public class GATKVariantContextUtils {
// the new genotypes to create // the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size()); final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
// make sure we are seeing the expected number of likelihoods per sample
final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), 2);
// the samples // the samples
final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName(); final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
@ -850,6 +831,8 @@ public class GATKVariantContextUtils {
newLikelihoods = null; newLikelihoods = null;
gb.noPL(); gb.noPL();
} else { } else {
// make sure we are seeing the expected number of likelihoods per sample
final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), g.getPloidy());
final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
if ( likelihoodIndexesToUse == null ) { if ( likelihoodIndexesToUse == null ) {
newLikelihoods = originalLikelihoods; newLikelihoods = originalLikelihoods;
@ -857,19 +840,20 @@ public class GATKVariantContextUtils {
logger.debug("Wrong number of likelihoods in sample " + g.getSampleName() + " at " + originalVC + " got " + g.getLikelihoodsString() + " but expected " + expectedNumLikelihoods); logger.debug("Wrong number of likelihoods in sample " + g.getSampleName() + " at " + originalVC + " got " + g.getLikelihoodsString() + " but expected " + expectedNumLikelihoods);
newLikelihoods = null; newLikelihoods = null;
} else { } else {
newLikelihoods = new double[likelihoodIndexesToUse.size()]; newLikelihoods = new double[likelihoodIndexesToUse.get(k).size()];
int newIndex = 0; int newIndex = 0;
for ( final int oldIndex : likelihoodIndexesToUse ) for ( final int oldIndex : likelihoodIndexesToUse.get(k) )
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex]; newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
// might need to re-normalize // might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
} }
if ( newLikelihoods == null || (originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0) == 0 && likelihoodsAreUninformative(newLikelihoods) )) if ( newLikelihoods == null || (originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0) == 0 && likelihoodsAreUninformative(newLikelihoods) )) {
gb.noPL(); gb.noPL();
else } else {
gb.PL(newLikelihoods); gb.PL(newLikelihoods);
}
} }
// create the new strand allele counts array from the used alleles // create the new strand allele counts array from the used alleles
@ -878,7 +862,7 @@ public class GATKVariantContextUtils {
gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs); gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs);
} }
updateGenotypeAfterSubsetting(g.getAlleles(), gb, assignGenotypes, newLikelihoods, allelesToUse); updateGenotypeAfterSubsetting(g.getAlleles(), g.getPloidy(), gb, assignGenotypes, newLikelihoods, allelesToUse);
newGTs.add(gb.make()); newGTs.add(gb.make());
} }
@ -893,12 +877,14 @@ public class GATKVariantContextUtils {
* Add the genotype call (GT) field to GenotypeBuilder using the requested algorithm assignmentMethod * Add the genotype call (GT) field to GenotypeBuilder using the requested algorithm assignmentMethod
* *
* @param originalGT the original genotype calls, cannot be null * @param originalGT the original genotype calls, cannot be null
* @param ploidy the number of sets of chromosomes
* @param gb the builder where we should put our newly called alleles, cannot be null * @param gb the builder where we should put our newly called alleles, cannot be null
* @param assignmentMethod the method to use to do the assignment, cannot be null * @param assignmentMethod the method to use to do the assignment, cannot be null
* @param newLikelihoods a vector of likelihoods to use if the method requires PLs, should be log10 likelihoods, cannot be null * @param newLikelihoods a vector of likelihoods to use if the method requires PLs, should be log10 likelihoods, cannot be null
* @param allelesToUse the alleles we are using for our subsetting * @param allelesToUse the alleles we are using for our subsetting
*/ */
public static void updateGenotypeAfterSubsetting(final List<Allele> originalGT, public static void updateGenotypeAfterSubsetting(final List<Allele> originalGT,
final int ploidy,
final GenotypeBuilder gb, final GenotypeBuilder gb,
final GenotypeAssignmentMethod assignmentMethod, final GenotypeAssignmentMethod assignmentMethod,
final double[] newLikelihoods, final double[] newLikelihoods,
@ -911,11 +897,11 @@ public class GATKVariantContextUtils {
case DO_NOT_ASSIGN_GENOTYPES: case DO_NOT_ASSIGN_GENOTYPES:
break; break;
case SET_TO_NO_CALL: case SET_TO_NO_CALL:
gb.alleles(NO_CALL_ALLELES); gb.alleles(noCallAlleles(ploidy));
gb.noGQ(); gb.noGQ();
break; break;
case SET_TO_NO_CALL_NO_ANNOTATIONS: case SET_TO_NO_CALL_NO_ANNOTATIONS:
gb.alleles(NO_CALL_ALLELES); gb.alleles(noCallAlleles(ploidy));
gb.noGQ(); gb.noGQ();
gb.noAD(); gb.noAD();
gb.noPL(); gb.noPL();
@ -924,13 +910,16 @@ public class GATKVariantContextUtils {
case USE_PLS_TO_ASSIGN: case USE_PLS_TO_ASSIGN:
if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) { if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) {
// if there is no mass on the (new) likelihoods, then just no-call the sample // if there is no mass on the (new) likelihoods, then just no-call the sample
gb.alleles(NO_CALL_ALLELES); gb.alleles(noCallAlleles(ploidy));
gb.noGQ(); gb.noGQ();
} else { } else {
// find the genotype with maximum likelihoods // find the genotype with maximum likelihoods
final int PLindex = MathUtils.maxElementIndex(newLikelihoods); final int PLindex = MathUtils.maxElementIndex(newLikelihoods);
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); final List<Allele> alleles = new ArrayList<>();
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2))); for ( final Integer alleleIndex : GenotypeLikelihoods.getAlleles(PLindex, ploidy)) {
alleles.add(allelesToUse.get(alleleIndex) );
}
gb.alleles(alleles);
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
} }
break; break;
@ -973,7 +962,7 @@ public class GATKVariantContextUtils {
// create the new genotypes // create the new genotypes
for ( final Genotype g : vc.getGenotypes() ) { for ( final Genotype g : vc.getGenotypes() ) {
final int gPloidy = g.getPloidy() == 0 ? ploidy : g.getPloidy(); final int gPloidy = g.getPloidy() == 0 ? ploidy : g.getPloidy();
final List<Allele> refAlleles = gPloidy == 2 ? diploidRefAlleles : Collections.nCopies(gPloidy, ref); final List<Allele> refAlleles = Collections.nCopies(gPloidy, vc.getReference());
final GenotypeBuilder gb = new GenotypeBuilder(g.getSampleName(), refAlleles); final GenotypeBuilder gb = new GenotypeBuilder(g.getSampleName(), refAlleles);
if ( g.hasDP() ) gb.DP(g.getDP()); if ( g.hasDP() ) gb.DP(g.getDP());
if ( g.hasGQ() ) gb.GQ(g.getGQ()); if ( g.hasGQ() ) gb.GQ(g.getGQ());
@ -990,7 +979,7 @@ public class GATKVariantContextUtils {
* @return genotypes context * @return genotypes context
*/ */
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) { public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
return subsetDiploidAlleles(vc, vc.getAlleles(), GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN); return subsetAlleles(vc, vc.getAlleles(), GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN);
} }
/** /**
@ -1079,7 +1068,7 @@ public class GATKVariantContextUtils {
genotypeAssignmentMethodUsed != GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL ) genotypeAssignmentMethodUsed != GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL )
addInfoFiledAnnotations(vc, builder, alt, keepOriginalChrCounts); addInfoFiledAnnotations(vc, builder, alt, keepOriginalChrCounts);
builder.genotypes(subsetDiploidAlleles(vc, alleles, genotypeAssignmentMethodUsed)); builder.genotypes(subsetAlleles(vc, alleles, genotypeAssignmentMethodUsed));
final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true); final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true);
biallelics.add(trimmed); biallelics.add(trimmed);
} }
@ -1400,7 +1389,7 @@ public class GATKVariantContextUtils {
if ( numNewAlleles == numOriginalAlleles ) if ( numNewAlleles == numOriginalAlleles )
return oldGs; return oldGs;
return fixDiploidGenotypesFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles()); return fixGenotypesFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles());
} }
/** /**
@ -1411,14 +1400,14 @@ public class GATKVariantContextUtils {
* @param allelesToUse the new subset of alleles to use * @param allelesToUse the new subset of alleles to use
* @return a new non-null GenotypesContext * @return a new non-null GenotypesContext
*/ */
static private GenotypesContext fixDiploidGenotypesFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) { static private GenotypesContext fixGenotypesFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
if ( originalGs == null ) throw new IllegalArgumentException("the selected GenotypesContext cannot be null"); if ( originalGs == null ) throw new IllegalArgumentException("the selected GenotypesContext cannot be null");
if ( originalVC == null ) throw new IllegalArgumentException("the original VariantContext cannot be null"); if ( originalVC == null ) throw new IllegalArgumentException("the original VariantContext cannot be null");
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null"); if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
// find the likelihoods indexes to use from the used alternate alleles // find the likelihoods indexes to use from the used alternate alleles
final List<Integer> likelihoodIndexesToUse = determineDiploidLikelihoodIndexesToUse(originalVC, allelesToUse); final List<List<Integer>> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(originalVC, allelesToUse);
// find the strand allele count indexes to use from the used alternate alleles // find the strand allele count indexes to use from the used alternate alleles
final List<Integer> sacIndexesToUse = determineSACIndexesToUse(originalVC, allelesToUse); final List<Integer> sacIndexesToUse = determineSACIndexesToUse(originalVC, allelesToUse);
@ -1441,7 +1430,7 @@ public class GATKVariantContextUtils {
if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use list cannot be null"); if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use list cannot be null");
// the bitset representing the allele indexes we want to keep // the bitset representing the allele indexes we want to keep
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse); final BitSet alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
// the new genotypes to create // the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size()); final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
@ -1452,7 +1441,7 @@ public class GATKVariantContextUtils {
// create the new genotypes // create the new genotypes
for ( int k = 0; k < originalGs.size(); k++ ) { for ( int k = 0; k < originalGs.size(); k++ ) {
final Genotype g = originalGs.get(sampleIndices.get(k)); final Genotype g = originalGs.get(sampleIndices.get(k));
newGTs.add(fixAD(g, alleleIndexesToUse, allelesToUse.size())); newGTs.add(fixAD(g, alleleIndexesToUse));
} }
return newGTs; return newGTs;
@ -1463,10 +1452,9 @@ public class GATKVariantContextUtils {
* *
* @param genotype the original Genotype * @param genotype the original Genotype
* @param alleleIndexesToUse a bitset describing whether or not to keep a given index * @param alleleIndexesToUse a bitset describing whether or not to keep a given index
* @param nAllelesToUse how many alleles we are keeping
* @return a non-null Genotype * @return a non-null Genotype
*/ */
private static Genotype fixAD(final Genotype genotype, final boolean[] alleleIndexesToUse, final int nAllelesToUse) { private static Genotype fixAD(final Genotype genotype, final BitSet alleleIndexesToUse) {
// if it ain't broke don't fix it // if it ain't broke don't fix it
if ( !genotype.hasAD() ) if ( !genotype.hasAD() )
return genotype; return genotype;
@ -1474,18 +1462,14 @@ public class GATKVariantContextUtils {
final GenotypeBuilder builder = new GenotypeBuilder(genotype); final GenotypeBuilder builder = new GenotypeBuilder(genotype);
final int[] oldAD = genotype.getAD(); final int[] oldAD = genotype.getAD();
if ( oldAD.length != alleleIndexesToUse.length ) { final int[] newAD = new int[alleleIndexesToUse.cardinality()];
builder.noAD();
} else { int currentIndex = 0;
final int[] newAD = new int[nAllelesToUse]; for ( int i = alleleIndexesToUse.nextSetBit(0); i >= 0; i = alleleIndexesToUse.nextSetBit(i+1) ) {
int currentIndex = 0; newAD[currentIndex++] = oldAD[i];
for ( int i = 0; i < oldAD.length; i++ ) {
if ( alleleIndexesToUse[i] )
newAD[currentIndex++] = oldAD[i];
}
builder.AD(newAD);
} }
return builder.make();
return builder.AD(newAD).make();
} }
private static Allele determineReferenceAllele(final List<VariantContext> VCs) { private static Allele determineReferenceAllele(final List<VariantContext> VCs) {
@ -2247,11 +2231,11 @@ public class GATKVariantContextUtils {
final Object[] splitOriginalField = getVAttributeValues(vc.getAttribute(infoFieldName)); final Object[] splitOriginalField = getVAttributeValues(vc.getAttribute(infoFieldName));
// subset the field // subset the field
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(vc, Arrays.asList(altAllele)); final BitSet alleleIndexesToUse = getAlleleIndexBitset(vc, Arrays.asList(altAllele));
// skip the first allele, which is the reference // skip the first allele, which is the reference
for (int i = 1; i < alleleIndexesToUse.length; i++) { for (int i = 1; i < alleleIndexesToUse.size(); i++) {
if (alleleIndexesToUse[i] == true) if (alleleIndexesToUse.get(i))
return splitOriginalField[i-1]; return splitOriginalField[i-1];
} }

View File

@ -554,8 +554,8 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
// final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2)); // final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2));
// //
// final VariantContext merged = VariantContextUtils.simpleMerge( // final VariantContext merged = VariantContextUtils.simpleMerge(
// Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, // Arrays.asList(vc1, vc2), null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
// VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false, false); // GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false);
// } // }
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
@ -835,7 +835,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
@Test(enabled = !DEBUG) @Test(enabled = !DEBUG)
public void testRepeatAllele() { public void testRepeatAllele() {
Allele nullR = Allele.create("A", true); Allele nullR = Aref;
Allele nullA = Allele.create("A", false); Allele nullA = Allele.create("A", false);
Allele atc = Allele.create("AATC", false); Allele atc = Allele.create("AATC", false);
Allele atcatc = Allele.create("AATCATC", false); Allele atcatc = Allele.create("AATCATC", false);
@ -1126,24 +1126,25 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
// //
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
@DataProvider(name = "subsetDiploidAllelesData") @DataProvider(name = "SubsetAllelesData")
public Object[][] makesubsetDiploidAllelesData() { public Object[][] makesubsetAllelesData() {
List<Object[]> tests = new ArrayList<>(); List<Object[]> tests = new ArrayList<>();
final Allele A = Allele.create("A", true); final List<Allele> AA = Arrays.asList(Aref,Aref);
final Allele C = Allele.create("C"); final List<Allele> AC = Arrays.asList(Aref,C);
final Allele G = Allele.create("G");
final List<Allele> AA = Arrays.asList(A,A);
final List<Allele> AC = Arrays.asList(A,C);
final List<Allele> CC = Arrays.asList(C,C); final List<Allele> CC = Arrays.asList(C,C);
final List<Allele> AG = Arrays.asList(A,G); final List<Allele> AG = Arrays.asList(Aref,G);
final List<Allele> CG = Arrays.asList(C,G);
final List<Allele> GG = Arrays.asList(G,G); final List<Allele> GG = Arrays.asList(G,G);
final List<Allele> ACG = Arrays.asList(A,C,G); final List<Allele> ACG = Arrays.asList(Aref,C,G);
final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make(); final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
// haploid, one alt allele
final double[] haploidRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.1});
final double[] haploidAltPL = MathUtils.normalizeFromRealSpace(new double[]{0.1, 0.9});
final double[] haploidUninformative = new double[]{0, 0};
// diploid, one alt allele
final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01}); final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01});
final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01}); final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01});
final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9}); final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9});
@ -1151,37 +1152,47 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(50).make(); final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(50).make();
// make sure we don't screw up the simple case // the simple case where no selection occurs
final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make(); final Genotype aHaploidGT = new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).AD(new int[]{10,2}).PL(haploidRefPL).GQ(8).make();
final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10, 2}).PL(hetPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make(); final Genotype cHaploidGT = new GenotypeBuilder(base).alleles(Arrays.asList(C)).AD(new int[]{10,2}).PL(haploidAltPL).GQ(8).make();
final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10, 2}).PL(homVarPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make(); final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).GQ(8).make();
final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10,2}).PL(hetPL).GQ(8).make();
final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10,2}).PL(homVarPL).GQ(8).make();
// haploid
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aHaploidGT).make(), AC, Arrays.asList(new GenotypeBuilder(aHaploidGT).make())});
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(cHaploidGT).make(), AC, Arrays.asList(new GenotypeBuilder(cHaploidGT).make())});
// diploid
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), AC, Arrays.asList(new GenotypeBuilder(aaGT).make())}); tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), AC, Arrays.asList(new GenotypeBuilder(aaGT).make())});
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), AC, Arrays.asList(new GenotypeBuilder(acGT).make())}); tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), AC, Arrays.asList(new GenotypeBuilder(acGT).make())});
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), AC, Arrays.asList(new GenotypeBuilder(ccGT).make())}); tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), AC, Arrays.asList(new GenotypeBuilder(ccGT).make())});
// uninformative test case // uninformative test cases
// diploid
final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).PL(uninformative).GQ(0).make(); final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).PL(uninformative).GQ(0).make();
final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noPL().noGQ().make(); final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.noCallAlleles(2)).noPL().noGQ().make();
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), AC, Arrays.asList(emptyGT)}); tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), AC, Arrays.asList(emptyGT)});
// haploid
final Genotype haploidUninformativeGT = new GenotypeBuilder(base).alleles(Arrays.asList(C)).PL(haploidUninformative).GQ(0).make();
final Genotype haplpoidEmptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.noCallAlleles(1)).noPL().noGQ().make();
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(haploidUninformativeGT).make(), AC, Arrays.asList(haplpoidEmptyGT)});
// actually subsetting down from multiple alt values // subsetting from 3 to 2 alleles
// diploid PL order is: AA, AC, CC, AG, CG, GG (00, 01, 11, 02, 12, 22)
final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50}; final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50};
final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50}; final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50};
final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50}; final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50};
final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50}; final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50};
final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50};
final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0};
tests.add(new Object[]{ tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(homRef3AllelesPL).make()).make(), new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(homRef3AllelesPL).make()).make(),
AC, AC,
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).GQ(100).make())}); Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).GQ(100).make())});
tests.add(new Object[]{ tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetRefC3AllelesPL).make()).make(), new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetRefC3AllelesPL).make()).make(),
AC, AC,
Arrays.asList(new GenotypeBuilder(base).alleles(AC).PL(new double[]{-10, 0, -20}).GQ(100).make())}); Arrays.asList(new GenotypeBuilder(base).alleles(AC).PL(new double[]{-10, 0, -20}).GQ(100).make())});
tests.add(new Object[]{ tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(homC3AllelesPL).make()).make(), new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(homC3AllelesPL).make()).make(),
AC, AC,
@ -1190,7 +1201,6 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetRefG3AllelesPL).make()).make(), new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetRefG3AllelesPL).make()).make(),
AG, AG,
Arrays.asList(new GenotypeBuilder(base).alleles(AG).PL(new double[]{-20, 0, -50}).GQ(200).make())}); Arrays.asList(new GenotypeBuilder(base).alleles(AG).PL(new double[]{-20, 0, -50}).GQ(200).make())});
// wow, scary -- bad output but discussed with Eric and we think this is the only thing that can be done // wow, scary -- bad output but discussed with Eric and we think this is the only thing that can be done
tests.add(new Object[]{ tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetCG3AllelesPL).make()).make(), new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetCG3AllelesPL).make()).make(),
@ -1202,14 +1212,36 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
AG, AG,
Arrays.asList(new GenotypeBuilder(base).alleles(GG).PL(new double[]{-20, -40, 0}).GQ(200).make())}); Arrays.asList(new GenotypeBuilder(base).alleles(GG).PL(new double[]{-20, -40, 0}).GQ(200).make())});
// haploid PL order is: A, C, G (0, 1, 2)
final double[] haploidRef3AllelesPL = new double[]{0, -10, -20};
final double[] haploidAltC3AllelesPL = new double[]{-10, 0, -20};
final double[] haploidAltG3AllelesPL = new double[]{-20, -10, 0};
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(haploidRef3AllelesPL).make()).make(),
AC,
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{0, -10}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(C)).PL(haploidAltC3AllelesPL).make()).make(),
AC,
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(C)).PL(new double[]{-10, 0}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(G)).PL(haploidAltG3AllelesPL).make()).make(),
AG,
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(G)).PL(new double[]{-20, 0}).GQ(200).make())});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@Test(dataProvider = "subsetDiploidAllelesData") @Test(dataProvider = "SubsetAllelesData")
public void testsubsetDiploidAllelesData(final VariantContext inputVC, public void testSubsetAlleles(final VariantContext inputVC,
final List<Allele> allelesToUse, final List<Allele> allelesToUse,
final List<Genotype> expectedGenotypes) { final List<Genotype> expectedGenotypes) {
final GenotypesContext actual = GATKVariantContextUtils.subsetDiploidAlleles(inputVC, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN); // initialize cache of allele anyploid indices
for (final Genotype genotype : inputVC.getGenotypes()) {
GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(inputVC.getNAlleles() - 1, genotype.getPloidy());
}
final GenotypesContext actual = GATKVariantContextUtils.subsetAlleles(inputVC, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN);
Assert.assertEquals(actual.size(), expectedGenotypes.size()); Assert.assertEquals(actual.size(), expectedGenotypes.size());
for ( final Genotype expected : expectedGenotypes ) { for ( final Genotype expected : expectedGenotypes ) {
@ -1221,71 +1253,134 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
@DataProvider(name = "UpdateGenotypeAfterSubsettingData") @DataProvider(name = "UpdateGenotypeAfterSubsettingData")
public Object[][] makeUpdateGenotypeAfterSubsettingData() { public Object[][] makeUpdateGenotypeAfterSubsettingData() {
List<Object[]> tests = new ArrayList<Object[]>(); final List<Object[]> tests = new ArrayList<>();
final Allele A = Allele.create("A", true); final List<Allele> AA = Arrays.asList(Aref,Aref);
final Allele C = Allele.create("C"); final List<Allele> AC = Arrays.asList(Aref,C);
final Allele G = Allele.create("G");
final List<Allele> AA = Arrays.asList(A,A);
final List<Allele> AC = Arrays.asList(A,C);
final List<Allele> CC = Arrays.asList(C,C); final List<Allele> CC = Arrays.asList(C,C);
final List<Allele> AG = Arrays.asList(A,G); final List<Allele> AG = Arrays.asList(Aref,G);
final List<Allele> CG = Arrays.asList(C,G); final List<Allele> CG = Arrays.asList(C,G);
final List<Allele> GG = Arrays.asList(G,G); final List<Allele> GG = Arrays.asList(G,G);
final List<Allele> ACG = Arrays.asList(A,C,G); final List<Allele> AAA = Arrays.asList(Aref,Aref,Aref);
final List<List<Allele>> allSubsetAlleles = Arrays.asList(AC,AG,ACG); final List<Allele> AAC = Arrays.asList(Aref,Aref,C);
final List<Allele> ACC = Arrays.asList(Aref,C,C);
final List<Allele> CCC = Arrays.asList(C,C,C);
final List<Allele> AAG = Arrays.asList(Aref,Aref,G);
final List<Allele> ACG = Arrays.asList(Aref,C,G);
final List<Allele> CCG = Arrays.asList(C,C,G);
final List<Allele> AGG = Arrays.asList(Aref,G,G);
final List<Allele> CGG = Arrays.asList(C,G,G);
final List<Allele> GGG = Arrays.asList(G,G,G);
final List<List<Allele>> allDiploidSubsetAlleles = Arrays.asList(AC,AG,ACG);
final List<List<Allele>> allTriploidSubsetAlleles = Arrays.asList(AAA,AAC,ACC,CCC,AAG,ACG,CCG,AGG,CGG,GGG);
// for P=1, the index of the genotype a is a
final double[] aRefPL = new double[]{0.9, 0.09, 0.01};
final double[] cPL = new double[]{0.09, 0.9, 0.01};
final double[] gPL = new double[]{0.01, 0.09, 0.9};
final List<double[]> allHaploidPLs = Arrays.asList(aRefPL, cPL, gPL);
final List<List<Allele>> allHaploidSubsetAlleles = Arrays.asList(Arrays.asList(Aref), Arrays.asList(G));
// for P=2 and N=1, the ordering is 00,01,11
final double[] homRefPL = new double[]{0.9, 0.09, 0.01}; final double[] homRefPL = new double[]{0.9, 0.09, 0.01};
final double[] hetPL = new double[]{0.09, 0.9, 0.01}; final double[] hetPL = new double[]{0.09, 0.9, 0.01};
final double[] homVarPL = new double[]{0.01, 0.09, 0.9}; final double[] homVarPL = new double[]{0.01, 0.09, 0.9};
final double[] uninformative = new double[]{0.33, 0.33, 0.33}; final double[] uninformative = new double[]{0.33, 0.33, 0.33};
final List<double[]> allPLs = Arrays.asList(homRefPL, hetPL, homVarPL, uninformative); final List<double[]> allDiploidPLs = Arrays.asList(homRefPL, hetPL, homVarPL, uninformative);
for ( final List<Allele> alleles : allSubsetAlleles ) { // for P=3 and N=2, the ordering is 000, 001, 011, 111, 002, 012, 112, 022, 122, 222
for ( final double[] pls : allPLs ) { final double[] aaaPL = new double[]{0.9, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL, pls, AA, alleles, GATKVariantContextUtils.NO_CALL_ALLELES}); final double[] aacPL = new double[]{0.01, 0.9, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
} final double[] accPL = new double[]{0.01, 0.02, 0.9, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
final double[] cccPL = new double[]{0.01, 0.02, 0.03, 0.9, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
final double[] aagPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.9, 0.05, 0.06, 0.07, 0.08, 0.09};
final double[] acgPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.9, 0.06, 0.07, 0.08, 0.09};
final double[] ccgPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.9, 0.07, 0.08, 0.09};
final double[] aggPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.9, 0.08, 0.09};
final double[] cggPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.9, 0.09};
final double[] gggPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.9};
final double[] uninformativeTriploid = new double[]{0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
final List<double[]> allTriploidPLs = Arrays.asList(homRefPL, hetPL, homVarPL, uninformativeTriploid);
for ( final List<Allele> alleles : allHaploidSubsetAlleles ) {
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL, allHaploidPLs.get(0), Arrays.asList(Aref), alleles, GATKVariantContextUtils.noCallAlleles(1)});
} }
for ( final List<Allele> alleles : allDiploidSubsetAlleles ) {
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL, allDiploidPLs.get(0), AA, alleles, GATKVariantContextUtils.noCallAlleles(2)});
}
for ( final List<Allele> alleles : allTriploidSubsetAlleles ) {
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL, allTriploidPLs.get(0), AAA, alleles, GATKVariantContextUtils.noCallAlleles(3)});
}
final List<Allele> originalHaploidGT = Arrays.asList(Aref, C, G );
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aRefPL, originalHaploidGT, originalHaploidGT, Arrays.asList(Aref)});
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, cPL, originalHaploidGT, originalHaploidGT, Arrays.asList(C)});
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, gPL, originalHaploidGT, originalHaploidGT, Arrays.asList(G)});
for ( final List<Allele> originalGT : Arrays.asList(AA, AC, CC, AG, CG, GG) ) { for ( final List<Allele> originalGT : Arrays.asList(AA, AC, CC, AG, CG, GG) ) {
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homRefPL, originalGT, AC, AA}); tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homRefPL, originalGT, AC, AA});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, hetPL, originalGT, AC, AC}); tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, hetPL, originalGT, AC, AC});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homVarPL, originalGT, AC, CC}); tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homVarPL, originalGT, AC, CC});
// tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, uninformative, AA, AC, GATKVariantContextUtils.NO_CALL_ALLELES});
} }
for ( final double[] pls : allPLs ) { for ( final List<Allele> originalGT : allTriploidSubsetAlleles) {
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AA, AC, AA}); tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aaaPL, originalGT, ACG, AAA});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AC, AC, AC}); tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aacPL, originalGT, ACG, AAC});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CC, AC, CC}); tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, accPL, originalGT, ACG, ACC});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CG, AC, AC}); tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, cccPL, originalGT, ACG, CCC});
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aagPL, originalGT, ACG, AAG});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AA, AG, AA}); tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, acgPL, originalGT, ACG, ACG});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AC, AG, AA}); tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, ccgPL, originalGT, ACG, CCG});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CC, AG, AA}); tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aggPL, originalGT, ACG, AGG});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CG, AG, AG}); tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, cggPL, originalGT, ACG, CCG});
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, gggPL, originalGT, ACG, GGG});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AA, ACG, AA});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AC, ACG, AC});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CC, ACG, CC});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AG, ACG, AG});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CG, ACG, CG});
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, GG, ACG, GG});
} }
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allHaploidPLs.get(0), Arrays.asList(Aref, C, G), Arrays.asList(Aref), Arrays.asList(Aref)});
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allHaploidPLs.get(0), Arrays.asList(Aref, C, G), Arrays.asList(C), Arrays.asList(C)});
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allHaploidPLs.get(0), Arrays.asList(Aref, C, G), Arrays.asList(G), Arrays.asList(G)});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AA, AC, AA});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AC, AC, AC});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CC, AC, CC});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CG, AC, AC});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AA, AG, AA});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AC, AG, AA});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CC, AG, AA});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CG, AG, AG});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AA, ACG, AA});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AC, ACG, AC});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CC, ACG, CC});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AG, ACG, AG});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CG, ACG, CG});
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), GG, ACG, GG});
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), AAA, AAC, AAA});
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), ACC, AAC, ACC});
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), AAC, AAC, AAC});
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), AAC, ACG, AAC});
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), GGG, AAA, AAA});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@Test(enabled = !DEBUG, dataProvider = "UpdateGenotypeAfterSubsettingData") @Test(enabled = !DEBUG, dataProvider = "UpdateGenotypeAfterSubsettingData")
public void testUpdateGenotypeAfterSubsetting(final GATKVariantContextUtils.GenotypeAssignmentMethod mode, public void testUpdateGenotypeAfterSubsetting(final int ploidy,
final GATKVariantContextUtils.GenotypeAssignmentMethod mode,
final double[] likelihoods, final double[] likelihoods,
final List<Allele> originalGT, final List<Allele> originalGT,
final List<Allele> allelesToUse, final List<Allele> allelesToUse,
final List<Allele> expectedAlleles) { final List<Allele> expectedAlleles) {
final int numAltAlleles = originalGT.size() > 1 ? originalGT.size() - 1 : 1;
GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(numAltAlleles, ploidy);
final GenotypeBuilder gb = new GenotypeBuilder("test"); final GenotypeBuilder gb = new GenotypeBuilder("test");
final double[] log10Likelhoods = MathUtils.normalizeFromLog10(likelihoods, true, false); final double[] log10Likelhoods = MathUtils.normalizeFromLog10(likelihoods, true, false);
GATKVariantContextUtils.updateGenotypeAfterSubsetting(originalGT, gb, mode, log10Likelhoods, allelesToUse); GATKVariantContextUtils.updateGenotypeAfterSubsetting(originalGT, ploidy, gb, mode, log10Likelhoods, allelesToUse);
final Genotype g = gb.make(); final Genotype g = gb.make();
Assert.assertEquals(new HashSet<>(g.getAlleles()), new HashSet<>(expectedAlleles)); Assert.assertEquals(new HashSet<>(g.getAlleles()), new HashSet<>(expectedAlleles));
} }
@ -1331,17 +1426,11 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
public Object[][] makeUpdatePLsSACsAndADData() { public Object[][] makeUpdatePLsSACsAndADData() {
List<Object[]> tests = new ArrayList<>(); List<Object[]> tests = new ArrayList<>();
final Allele A = Allele.create("A", true); final List<Allele> AA = Arrays.asList(Aref,Aref);
final Allele C = Allele.create("C"); final List<Allele> AC = Arrays.asList(Aref,C);
final Allele G = Allele.create("G");
final List<Allele> AA = Arrays.asList(A,A);
final List<Allele> AC = Arrays.asList(A,C);
final List<Allele> CC = Arrays.asList(C,C); final List<Allele> CC = Arrays.asList(C,C);
final List<Allele> AG = Arrays.asList(A,G); final List<Allele> AG = Arrays.asList(Aref,G);
final List<Allele> CG = Arrays.asList(C,G); final List<Allele> ACG = Arrays.asList(Aref,C,G);
final List<Allele> GG = Arrays.asList(G,G);
final List<Allele> ACG = Arrays.asList(A,C,G);
final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make(); final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
@ -1352,7 +1441,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(100).make(); final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(100).make();
// make sure we don't screw up the simple case where no selection happens // make sure we don't screw up the simple case where no selection occurs
final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make(); final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10, 2}).PL(hetPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make(); final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10, 2}).PL(hetPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10, 2}).PL(homVarPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make(); final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10, 2}).PL(homVarPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
@ -1364,7 +1453,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
// uninformative test cases // uninformative test cases
final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make(); final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make();
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(uninformativeGT)}); tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(uninformativeGT)});
final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noAD().noPL().noGQ().make(); final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.noCallAlleles(2)).noAD().noPL().noGQ().make();
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(emptyGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(emptyGT)}); tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(emptyGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(emptyGT)});
// actually subsetting down from multiple alt values // actually subsetting down from multiple alt values
@ -1375,6 +1464,14 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG
final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG
final double[] haploidRef3AllelesPL = new double[]{0, -10, -20};
final double[] haploidAltC3AllelesPL = new double[]{-10, 0, -20};
final double[] haploidAltG3AllelesPL = new double[]{-20, -10, 0};
// for P=3 and N=2, the ordering is 000, 001, 011, 111, 002, 012, 112, 022, 122, 222
final double[] triploidRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50, -60, -70, -80, -90};
final double[] tripoidAltC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50, -60, -70, -80, -90};
final int[] homRef3AllelesAD = new int[]{20, 0, 1}; final int[] homRef3AllelesAD = new int[]{20, 0, 1};
final int[] hetRefC3AllelesAD = new int[]{10, 10, 1}; final int[] hetRefC3AllelesAD = new int[]{10, 10, 1};
final int[] homC3AllelesAD = new int[]{0, 20, 1}; final int[] homC3AllelesAD = new int[]{0, 20, 1};
@ -1389,6 +1486,44 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
final int[] hetCG3AllelesSAC = new int[]{0, 0, 12, 12, 11, 11}; // AA, AC, CC, AG, CG, GG final int[] hetCG3AllelesSAC = new int[]{0, 0, 12, 12, 11, 11}; // AA, AC, CC, AG, CG, GG
final int[] homG3AllelesSAC = new int[]{0, 0, 1, 1, 21, 21}; // AA, AC, CC, AG, CG, GG final int[] homG3AllelesSAC = new int[]{0, 0, 1, 1, 21, 21}; // AA, AC, CC, AG, CG, GG
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).AD(homRef3AllelesAD).PL(haploidRef3AllelesPL).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homRef3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AC).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{0, -10}).AD(new int[]{20, 0}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{20, 19, 0, 1}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).AD(hetRefC3AllelesAD).PL(haploidAltC3AllelesPL).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, hetRefC3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AC).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{-10, 0}).AD(new int[]{10, 10}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{10, 9, 10, 9}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).AD(homC3AllelesAD).PL(haploidAltG3AllelesPL).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homC3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{-20, 0}).AD(new int[]{0, 1}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 1, 1}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, Aref)).AD(homRef3AllelesAD).PL(triploidRef3AllelesPL).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homRef3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AC).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, Aref)).PL(new double[]{0, -10, -20, -30}).AD(new int[]{20, 0}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{20, 19, 0, 1}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, C)).AD(hetRefC3AllelesAD).PL(tripoidAltC3AllelesPL).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, hetRefC3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AC).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, C)).PL(new double[]{-10, 0, -20, -30}).AD(new int[]{10, 10}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{10, 9, 10, 9}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, G)).AD(homRef3AllelesAD).PL(triploidRef3AllelesPL).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homC3AllelesSAC).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, G)).PL(new double[]{0, -40, -70, -90}).AD(new int[]{20, 1}).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 1, 1}).GQ(100).make())});
tests.add(new Object[]{ tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL). new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL).
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homRef3AllelesSAC).make()).make(), attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homRef3AllelesSAC).make()).make(),
@ -1433,6 +1568,11 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
public void testUpdatePLsAndADData(final VariantContext originalVC, public void testUpdatePLsAndADData(final VariantContext originalVC,
final VariantContext selectedVC, final VariantContext selectedVC,
final List<Genotype> expectedGenotypes) { final List<Genotype> expectedGenotypes) {
// initialize cache of allele anyploid indices
for (final Genotype genotype : originalVC.getGenotypes()) {
GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(originalVC.getNAlleles() - 1, genotype.getPloidy());
}
final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make(); final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make();
final GenotypesContext actual = GATKVariantContextUtils.updatePLsSACsAD(selectedVCwithGTs, originalVC); final GenotypesContext actual = GATKVariantContextUtils.updatePLsSACsAD(selectedVCwithGTs, originalVC);