Change the behavior of SelectVariants for PL/AD when it encounters a record that has lost one or more alternate alleles.

Previously, we would strip out the PLs and AD values since they were no longer accurate.  However, this is not ideal because
then that information is just lost and 1) users complain on the forum and post it as a bug and 2) it gives us problems in both
the current and future (single sample) calling pipelines because we subset samples/alleles all the time and lose info.

Now the PLs and AD get correctly selected down.

While I was in there I also refactored some related code in subsetDiploidAlleles().  There were no real changes there - I just
broke it out into smaller chunks as per our best practices.

Added unit tests and updated integration tests.
Addressed reviews.
This commit is contained in:
Eric Banks 2013-12-02 12:46:51 -05:00
parent b1073fb17b
commit 6bee6a1b53
5 changed files with 336 additions and 51 deletions

View File

@ -316,7 +316,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
1,
Arrays.asList("f14d75892b99547d8e9ba3a03bfb04ea")
Arrays.asList("69862fb97e8e895fe65c7abb14b03cee")
);
executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
}

View File

@ -299,7 +299,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
private int maxIndelSize = Integer.MAX_VALUE;
@Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
@Argument(doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;
@ -657,6 +657,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
*
* @param vc the VariantContext record to subset
* @param excludeNonVariants should we exclude sites that have AC=0 for any alternate alleles?
* @return the subsetted VariantContext
*/
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
@ -665,14 +666,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used
VariantContextBuilder builder = new VariantContextBuilder(sub);
final VariantContextBuilder builder = new VariantContextBuilder(sub);
GenotypesContext newGC = sub.getGenotypes();
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate)
final boolean lostAllelesInSelection = vc.getAlleles().size() != sub.getAlleles().size();
if ( lostAllelesInSelection )
newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());
// if there are fewer alternate alleles now in the selected VC, we need to fix the PL and AD values
GenotypesContext newGC = GATKVariantContextUtils.updatePLsAndAD(sub, vc);
// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
if ( vc.getNSamples() != sub.getNSamples() ) {
@ -682,11 +679,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
// Remove a fraction of the genotypes if needed
if ( fractionGenotypes > 0 ){
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
final ArrayList<Genotype> genotypes = new ArrayList<>();
for ( Genotype genotype : newGC ) {
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
final List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
}
else{
@ -698,7 +695,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
builder.genotypes(newGC);
addAnnotations(builder, sub, lostAllelesInSelection);
addAnnotations(builder, sub, vc.getAlleles().size() != sub.getAlleles().size());
return builder.make();
}

View File

@ -976,6 +976,16 @@ public class MathUtils {
return count;
}
public static int countOccurrences(final boolean element, final boolean[] array) {
int count = 0;
for (final boolean b : array) {
if (element == b)
count++;
}
return count;
}
/**
* Returns n random indices drawn with replacement from the range 0..(k-1)

View File

@ -453,7 +453,12 @@ public class GATKVariantContextUtils {
* rather than the undetermined behavior when using the PLs to assign, which could result
* in hom-var or hom-ref for each, depending on the exact PL values.
*/
BEST_MATCH_TO_ORIGINAL
BEST_MATCH_TO_ORIGINAL,
/**
* do not even bother changing the GTs
*/
DO_NOT_ASSIGN_GENOTYPES
}
/**
@ -461,8 +466,8 @@ public class GATKVariantContextUtils {
*
* @param vc variant context with genotype likelihoods
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
* @return genotypes
* @param assignGenotypes assignment strategy for the (subsetted) PLs
* @return a new non-null GenotypesContext
*/
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
final List<Allele> allelesToUse,
@ -470,50 +475,109 @@ public class GATKVariantContextUtils {
if ( allelesToUse.get(0).isNonReference() ) throw new IllegalArgumentException("First allele must be the reference allele");
if ( allelesToUse.size() == 1 ) throw new IllegalArgumentException("Cannot subset to only 1 alt allele");
// the genotypes with PLs
final GenotypesContext oldGTs = vc.getGenotypes();
// the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create();
// optimization: if no input genotypes, just exit
if (oldGTs.isEmpty()) return newGTs;
// samples
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
if (vc.getGenotypes().isEmpty()) return GenotypesContext.create();
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2);
final int numNewAltAlleles = allelesToUse.size() - 1;
final List<Integer> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(vc, allelesToUse);
// which PLs should be carried forward?
ArrayList<Integer> likelihoodIndexesToUse = null;
// create the new genotypes
return createGenotypesWithSubsettedLikelihoods(vc.getGenotypes(), vc, allelesToUse, likelihoodIndexesToUse, assignGenotypes);
}
/**
* Figure out which likelihood indexes to use for a selected down set of alleles
*
* @param originalVC the original VariantContext
* @param allelesToUse the subset of alleles to use
* @return a list of PL indexes to use or null if none
*/
private static List<Integer> determineLikelihoodIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
// the bitset representing the allele indexes we want to keep
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
// then we can keep the PLs as is; otherwise, we determine which ones to keep
if ( numNewAltAlleles != numOriginalAltAlleles ) {
likelihoodIndexesToUse = new ArrayList<>(30);
if ( MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length )
return null;
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
altAlleleIndexToUse[i] = true;
}
return getLikelihoodIndexes(originalVC, alleleIndexesToUse);
}
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
// consider this entry only if both of the alleles are good
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
likelihoodIndexesToUse.add(PLindex);
}
/**
* Get the actual likelihoods indexes to use given the corresponding allele indexes
*
* @param originalVC the original VariantContext
* @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
* @return a non-null List
*/
private static List<Integer> getLikelihoodIndexes(final VariantContext originalVC, final boolean[] alleleIndexesToUse) {
final List<Integer> result = new ArrayList<>(30);
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), DEFAULT_PLOIDY);
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
// consider this entry only if both of the alleles are good
if ( alleleIndexesToUse[alleles.alleleIndex1] && alleleIndexesToUse[alleles.alleleIndex2] )
result.add(PLindex);
}
return result;
}
/**
* Given an original VariantContext and a list of alleles from that VC to keep,
* returns a bitset representing which allele indexes should be kept
*
* @param originalVC the original VC
* @param allelesToKeep the list of alleles to keep
* @return non-null bitset
*/
private static boolean[] getAlleleIndexBitset(final VariantContext originalVC, final List<Allele> allelesToKeep) {
final int numOriginalAltAlleles = originalVC.getNAlleles() - 1;
final boolean[] alleleIndexesToKeep = new boolean[numOriginalAltAlleles + 1];
// the reference Allele is definitely still used
alleleIndexesToKeep[0] = true;
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
if ( allelesToKeep.contains(originalVC.getAlternateAllele(i)) )
alleleIndexesToKeep[i+1] = true;
}
return alleleIndexesToKeep;
}
/**
* Create the new GenotypesContext with the subsetted PLs
*
* @param originalGs the original GenotypesContext
* @param vc the original VariantContext
* @param allelesToUse the actual alleles to use with the new Genotypes
* @param likelihoodIndexesToUse the indexes in the PL to use given the allelesToUse (@see #determineLikelihoodIndexesToUse())
* @param assignGenotypes assignment strategy for the (subsetted) PLs
* @return a new non-null GenotypesContext
*/
private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs,
final VariantContext vc,
final List<Allele> allelesToUse,
final List<Integer> likelihoodIndexesToUse,
final GenotypeAssignmentMethod assignGenotypes) {
// the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
// make sure we are seeing the expected number of likelihoods per sample
final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2);
// the samples
final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
// create the new genotypes
for ( int k = 0; k < oldGTs.size(); k++ ) {
final Genotype g = oldGTs.get(sampleIndices.get(k));
for ( int k = 0; k < originalGs.size(); k++ ) {
final Genotype g = originalGs.get(sampleIndices.get(k));
final GenotypeBuilder gb = new GenotypeBuilder(g);
// create the new likelihoods array from the alleles we are allowed to use
@ -570,13 +634,16 @@ public class GATKVariantContextUtils {
final GenotypeAssignmentMethod assignmentMethod,
final double[] newLikelihoods,
final List<Allele> allelesToUse) {
gb.noAD();
switch ( assignmentMethod ) {
case DO_NOT_ASSIGN_GENOTYPES:
break;
case SET_TO_NO_CALL:
gb.alleles(NO_CALL_ALLELES);
gb.noAD();
gb.noGQ();
break;
case USE_PLS_TO_ASSIGN:
gb.noAD();
if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) {
// if there is no mass on the (new) likelihoods, then just no-call the sample
gb.alleles(NO_CALL_ALLELES);
@ -597,6 +664,7 @@ public class GATKVariantContextUtils {
}
gb.noGQ();
gb.noPL();
gb.noAD();
gb.alleles(best);
break;
}
@ -622,7 +690,7 @@ public class GATKVariantContextUtils {
if (oldGTs.isEmpty()) return oldGTs;
// the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create();
final GenotypesContext newGTs = GenotypesContext.create(oldGTs.size());
final Allele ref = vc.getReference();
final List<Allele> diploidRefAlleles = Arrays.asList(ref, ref);
@ -1083,7 +1151,7 @@ public class GATKVariantContextUtils {
}
public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
final GenotypesContext newGs = GenotypesContext.create(genotypes.size());
for ( final Genotype g : genotypes ) {
newGs.add(removePLsAndAD(g));
@ -1092,6 +1160,108 @@ public class GATKVariantContextUtils {
return newGs;
}
/**
* Updates the PLs and AD of the Genotypes in the newly selected VariantContext to reflect the fact that some alleles
* from the original VariantContext are no longer present.
*
* @param selectedVC the selected (new) VariantContext
* @param originalVC the original VariantContext
* @return a new non-null GenotypesContext
*/
public static GenotypesContext updatePLsAndAD(final VariantContext selectedVC, final VariantContext originalVC) {
final int numNewAlleles = selectedVC.getAlleles().size();
final int numOriginalAlleles = originalVC.getAlleles().size();
// if we have more alternate alleles in the selected VC than in the original VC, then something is wrong
if ( numNewAlleles > numOriginalAlleles )
throw new IllegalArgumentException("Attempting to fix PLs and AD from what appears to be a *combined* VCF and not a selected one");
final GenotypesContext oldGs = selectedVC.getGenotypes();
// if we have the same number of alternate alleles in the selected VC as in the original VC, then we don't need to fix anything
if ( numNewAlleles == numOriginalAlleles )
return oldGs;
final GenotypesContext newGs = fixPLsFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles());
return fixADFromSubsettedAlleles(newGs, originalVC, selectedVC.getAlleles());
}
/**
* Fix the PLs for the GenotypesContext of a VariantContext that has been subset
*
* @param originalGs the original GenotypesContext
* @param originalVC the original VariantContext
* @param allelesToUse the new (sub)set of alleles to use
* @return a new non-null GenotypesContext
*/
static private GenotypesContext fixPLsFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
final List<Integer> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(originalVC, allelesToUse);
// create the new genotypes
return createGenotypesWithSubsettedLikelihoods(originalGs, originalVC, allelesToUse, likelihoodIndexesToUse, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES);
}
/**
* Fix the AD for the GenotypesContext of a VariantContext that has been subset
*
* @param originalGs the original GenotypesContext
* @param originalVC the original VariantContext
* @param allelesToUse the new (sub)set of alleles to use
* @return a new non-null GenotypesContext
*/
static private GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
// the bitset representing the allele indexes we want to keep
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
// the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
// the samples
final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
// create the new genotypes
for ( int k = 0; k < originalGs.size(); k++ ) {
final Genotype g = originalGs.get(sampleIndices.get(k));
newGTs.add(fixAD(g, alleleIndexesToUse, allelesToUse.size()));
}
return newGTs;
}
/**
* Fix the AD for the given Genotype
*
* @param genotype the original Genotype
* @param alleleIndexesToUse a bitset describing whether or not to keep a given index
* @param nAllelesToUse how many alleles we are keeping
* @return a non-null Genotype
*/
private static Genotype fixAD(final Genotype genotype, final boolean[] alleleIndexesToUse, final int nAllelesToUse) {
// if it ain't broke don't fix it
if ( !genotype.hasAD() )
return genotype;
final GenotypeBuilder builder = new GenotypeBuilder(genotype);
final int[] oldAD = genotype.getAD();
if ( oldAD.length != alleleIndexesToUse.length ) {
builder.noAD();
} else {
final int[] newAD = new int[nAllelesToUse];
int currentIndex = 0;
for ( int i = 0; i < oldAD.length; i++ ) {
if ( alleleIndexesToUse[i] )
newAD[currentIndex++] = oldAD[i];
}
builder.AD(newAD);
}
return builder.make();
}
static private Allele determineReferenceAllele(List<VariantContext> VCs) {
Allele ref = null;
@ -1277,7 +1447,7 @@ public class GATKVariantContextUtils {
@Requires("originalGenotypes != null && alleleMapper != null")
protected static GenotypesContext updateGenotypesWithMappedAlleles(final GenotypesContext originalGenotypes, final AlleleMapper alleleMapper) {
final GenotypesContext updatedGenotypes = GenotypesContext.create();
final GenotypesContext updatedGenotypes = GenotypesContext.create(originalGenotypes.size());
for ( final Genotype genotype : originalGenotypes ) {
final List<Allele> updatedAlleles = alleleMapper.remap(genotype.getAlleles());

View File

@ -1320,4 +1320,112 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
}
}
}
// --------------------------------------------------------------------------------
//
// Test updatePLsAndAD
//
// --------------------------------------------------------------------------------
@DataProvider(name = "updatePLsAndADData")
public Object[][] makeUpdatePLsAndADData() {
List<Object[]> tests = new ArrayList<>();
final Allele A = Allele.create("A", true);
final Allele C = Allele.create("C");
final Allele G = Allele.create("G");
final List<Allele> AA = Arrays.asList(A,A);
final List<Allele> AC = Arrays.asList(A,C);
final List<Allele> CC = Arrays.asList(C,C);
final List<Allele> AG = Arrays.asList(A,G);
final List<Allele> CG = Arrays.asList(C,G);
final List<Allele> GG = Arrays.asList(G,G);
final List<Allele> ACG = Arrays.asList(A,C,G);
final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01});
final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01});
final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9});
final double[] uninformative = new double[]{0, 0, 0};
final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(100).make();
// make sure we don't screw up the simple case where no selection happens
final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).GQ(8).make();
final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10,2}).PL(hetPL).GQ(8).make();
final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10,2}).PL(homVarPL).GQ(8).make();
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(aaGT).make())});
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(acGT).make())});
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(ccGT).make())});
// uninformative test cases
final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make();
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(uninformativeGT)});
final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noAD().noPL().noGQ().make();
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(emptyGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(emptyGT)});
// actually subsetting down from multiple alt values
final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50};
final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50};
final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50};
final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50};
final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG
final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG
final int[] homRef3AllelesAD = new int[]{20, 0, 1};
final int[] hetRefC3AllelesAD = new int[]{10, 10, 1};
final int[] homC3AllelesAD = new int[]{0, 20, 1};
final int[] hetRefG3AllelesAD = new int[]{10, 0, 11};
final int[] hetCG3AllelesAD = new int[]{0, 12, 11}; // AA, AC, CC, AG, CG, GG
final int[] homG3AllelesAD = new int[]{0, 1, 21}; // AA, AC, CC, AG, CG, GG
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL).make()).make(),
new VariantContextBuilder(vcBase).alleles(AC).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).AD(new int[]{20, 0}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefC3AllelesAD).PL(hetRefC3AllelesPL).make()).make(),
new VariantContextBuilder(vcBase).alleles(AC).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-10, 0, -20}).AD(new int[]{10, 10}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homC3AllelesAD).PL(homC3AllelesPL).make()).make(),
new VariantContextBuilder(vcBase).alleles(AC).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -10, 0}).AD(new int[]{0, 20}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefG3AllelesAD).PL(hetRefG3AllelesPL).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, 0, -50}).AD(new int[]{10, 11}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetCG3AllelesAD).PL(hetCG3AllelesPL).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -20, -30}).AD(new int[]{0, 11}).GQ(100).make())});
tests.add(new Object[]{
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homG3AllelesAD).PL(homG3AllelesPL).make()).make(),
new VariantContextBuilder(vcBase).alleles(AG).make(),
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -40, 0}).AD(new int[]{0, 21}).GQ(100).make())});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "updatePLsAndADData")
public void testUpdatePLsAndADData(final VariantContext originalVC,
final VariantContext selectedVC,
final List<Genotype> expectedGenotypes) {
final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make();
final GenotypesContext actual = GATKVariantContextUtils.updatePLsAndAD(selectedVCwithGTs, originalVC);
Assert.assertEquals(actual.size(), expectedGenotypes.size());
for ( final Genotype expected : expectedGenotypes ) {
final Genotype actualGT = actual.get(expected.getSampleName());
Assert.assertNotNull(actualGT);
assertGenotypesAreEqual(actualGT, expected);
}
}
}