diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 4b1483cb6..884b46692 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -316,7 +316,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile, 1, - Arrays.asList("f14d75892b99547d8e9ba3a03bfb04ea") + Arrays.asList("69862fb97e8e895fe65c7abb14b03cee") ); executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 1f2b6d09b..e688b7f17 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -299,7 +299,7 @@ public class SelectVariants extends RodWalker implements TreeR @Argument(doc="indel size select",required=false,fullName="maxIndelSize") private int maxIndelSize = Integer.MAX_VALUE; - @Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES") + @Argument(doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES") private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false; @@ -657,6 +657,7 @@ public class SelectVariants extends RodWalker implements TreeR * Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF). * * @param vc the VariantContext record to subset + * @param excludeNonVariants should we exclude sites that have AC=0 for any alternate alleles? * @return the subsetted VariantContext */ private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) { @@ -665,14 +666,10 @@ public class SelectVariants extends RodWalker implements TreeR final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used - VariantContextBuilder builder = new VariantContextBuilder(sub); + final VariantContextBuilder builder = new VariantContextBuilder(sub); - GenotypesContext newGC = sub.getGenotypes(); - - // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate) - final boolean lostAllelesInSelection = vc.getAlleles().size() != sub.getAlleles().size(); - if ( lostAllelesInSelection ) - newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes()); + // if there are fewer alternate alleles now in the selected VC, we need to fix the PL and AD values + GenotypesContext newGC = GATKVariantContextUtils.updatePLsAndAD(sub, vc); // if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags if ( vc.getNSamples() != sub.getNSamples() ) { @@ -682,11 +679,11 @@ public class SelectVariants extends RodWalker implements TreeR // Remove a fraction of the genotypes if needed if ( fractionGenotypes > 0 ){ - ArrayList genotypes = new ArrayList(); + final ArrayList genotypes = new ArrayList<>(); for ( Genotype genotype : newGC ) { //Set genotype to no call if it falls in the fraction. if(fractionGenotypes>0 && randomGenotypes.nextDouble() alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + final List alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make()); } else{ @@ -698,7 +695,7 @@ public class SelectVariants extends RodWalker implements TreeR builder.genotypes(newGC); - addAnnotations(builder, sub, lostAllelesInSelection); + addAnnotations(builder, sub, vc.getAlleles().size() != sub.getAlleles().size()); return builder.make(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 06d3d8fd8..0b6cd4b1d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -976,6 +976,16 @@ public class MathUtils { return count; } + public static int countOccurrences(final boolean element, final boolean[] array) { + int count = 0; + for (final boolean b : array) { + if (element == b) + count++; + } + + return count; + } + /** * Returns n random indices drawn with replacement from the range 0..(k-1) diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index 03bb9763c..1ae34e268 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -453,7 +453,12 @@ public class GATKVariantContextUtils { * rather than the undetermined behavior when using the PLs to assign, which could result * in hom-var or hom-ref for each, depending on the exact PL values. */ - BEST_MATCH_TO_ORIGINAL + BEST_MATCH_TO_ORIGINAL, + + /** + * do not even bother changing the GTs + */ + DO_NOT_ASSIGN_GENOTYPES } /** @@ -461,8 +466,8 @@ public class GATKVariantContextUtils { * * @param vc variant context with genotype likelihoods * @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC *** - * @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs - * @return genotypes + * @param assignGenotypes assignment strategy for the (subsetted) PLs + * @return a new non-null GenotypesContext */ public static GenotypesContext subsetDiploidAlleles(final VariantContext vc, final List allelesToUse, @@ -470,50 +475,109 @@ public class GATKVariantContextUtils { if ( allelesToUse.get(0).isNonReference() ) throw new IllegalArgumentException("First allele must be the reference allele"); if ( allelesToUse.size() == 1 ) throw new IllegalArgumentException("Cannot subset to only 1 alt allele"); - // the genotypes with PLs - final GenotypesContext oldGTs = vc.getGenotypes(); - - // the new genotypes to create - final GenotypesContext newGTs = GenotypesContext.create(); - // optimization: if no input genotypes, just exit - if (oldGTs.isEmpty()) return newGTs; - - // samples - final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); + if (vc.getGenotypes().isEmpty()) return GenotypesContext.create(); // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward - final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); - final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2); - final int numNewAltAlleles = allelesToUse.size() - 1; + final List likelihoodIndexesToUse = determineLikelihoodIndexesToUse(vc, allelesToUse); - // which PLs should be carried forward? - ArrayList likelihoodIndexesToUse = null; + // create the new genotypes + return createGenotypesWithSubsettedLikelihoods(vc.getGenotypes(), vc, allelesToUse, likelihoodIndexesToUse, assignGenotypes); + } + + /** + * Figure out which likelihood indexes to use for a selected down set of alleles + * + * @param originalVC the original VariantContext + * @param allelesToUse the subset of alleles to use + * @return a list of PL indexes to use or null if none + */ + private static List determineLikelihoodIndexesToUse(final VariantContext originalVC, final List allelesToUse) { + + // the bitset representing the allele indexes we want to keep + final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse); // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, // then we can keep the PLs as is; otherwise, we determine which ones to keep - if ( numNewAltAlleles != numOriginalAltAlleles ) { - likelihoodIndexesToUse = new ArrayList<>(30); + if ( MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length ) + return null; - final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles]; - for ( int i = 0; i < numOriginalAltAlleles; i++ ) { - if ( allelesToUse.contains(vc.getAlternateAllele(i)) ) - altAlleleIndexToUse[i] = true; - } + return getLikelihoodIndexes(originalVC, alleleIndexesToUse); + } - // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2 - final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY); - for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) { - final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); - // consider this entry only if both of the alleles are good - if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) ) - likelihoodIndexesToUse.add(PLindex); - } + /** + * Get the actual likelihoods indexes to use given the corresponding allele indexes + * + * @param originalVC the original VariantContext + * @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset) + * @return a non-null List + */ + private static List getLikelihoodIndexes(final VariantContext originalVC, final boolean[] alleleIndexesToUse) { + + final List result = new ArrayList<>(30); + + // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2 + final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), DEFAULT_PLOIDY); + + for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) { + final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); + // consider this entry only if both of the alleles are good + if ( alleleIndexesToUse[alleles.alleleIndex1] && alleleIndexesToUse[alleles.alleleIndex2] ) + result.add(PLindex); } + return result; + } + + /** + * Given an original VariantContext and a list of alleles from that VC to keep, + * returns a bitset representing which allele indexes should be kept + * + * @param originalVC the original VC + * @param allelesToKeep the list of alleles to keep + * @return non-null bitset + */ + private static boolean[] getAlleleIndexBitset(final VariantContext originalVC, final List allelesToKeep) { + final int numOriginalAltAlleles = originalVC.getNAlleles() - 1; + final boolean[] alleleIndexesToKeep = new boolean[numOriginalAltAlleles + 1]; + + // the reference Allele is definitely still used + alleleIndexesToKeep[0] = true; + for ( int i = 0; i < numOriginalAltAlleles; i++ ) { + if ( allelesToKeep.contains(originalVC.getAlternateAllele(i)) ) + alleleIndexesToKeep[i+1] = true; + } + + return alleleIndexesToKeep; + } + + /** + * Create the new GenotypesContext with the subsetted PLs + * + * @param originalGs the original GenotypesContext + * @param vc the original VariantContext + * @param allelesToUse the actual alleles to use with the new Genotypes + * @param likelihoodIndexesToUse the indexes in the PL to use given the allelesToUse (@see #determineLikelihoodIndexesToUse()) + * @param assignGenotypes assignment strategy for the (subsetted) PLs + * @return a new non-null GenotypesContext + */ + private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs, + final VariantContext vc, + final List allelesToUse, + final List likelihoodIndexesToUse, + final GenotypeAssignmentMethod assignGenotypes) { + // the new genotypes to create + final GenotypesContext newGTs = GenotypesContext.create(originalGs.size()); + + // make sure we are seeing the expected number of likelihoods per sample + final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2); + + // the samples + final List sampleIndices = originalGs.getSampleNamesOrderedByName(); + // create the new genotypes - for ( int k = 0; k < oldGTs.size(); k++ ) { - final Genotype g = oldGTs.get(sampleIndices.get(k)); + for ( int k = 0; k < originalGs.size(); k++ ) { + final Genotype g = originalGs.get(sampleIndices.get(k)); final GenotypeBuilder gb = new GenotypeBuilder(g); // create the new likelihoods array from the alleles we are allowed to use @@ -570,13 +634,16 @@ public class GATKVariantContextUtils { final GenotypeAssignmentMethod assignmentMethod, final double[] newLikelihoods, final List allelesToUse) { - gb.noAD(); switch ( assignmentMethod ) { + case DO_NOT_ASSIGN_GENOTYPES: + break; case SET_TO_NO_CALL: gb.alleles(NO_CALL_ALLELES); + gb.noAD(); gb.noGQ(); break; case USE_PLS_TO_ASSIGN: + gb.noAD(); if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) { // if there is no mass on the (new) likelihoods, then just no-call the sample gb.alleles(NO_CALL_ALLELES); @@ -597,6 +664,7 @@ public class GATKVariantContextUtils { } gb.noGQ(); gb.noPL(); + gb.noAD(); gb.alleles(best); break; } @@ -622,7 +690,7 @@ public class GATKVariantContextUtils { if (oldGTs.isEmpty()) return oldGTs; // the new genotypes to create - final GenotypesContext newGTs = GenotypesContext.create(); + final GenotypesContext newGTs = GenotypesContext.create(oldGTs.size()); final Allele ref = vc.getReference(); final List diploidRefAlleles = Arrays.asList(ref, ref); @@ -1083,7 +1151,7 @@ public class GATKVariantContextUtils { } public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) { - GenotypesContext newGs = GenotypesContext.create(genotypes.size()); + final GenotypesContext newGs = GenotypesContext.create(genotypes.size()); for ( final Genotype g : genotypes ) { newGs.add(removePLsAndAD(g)); @@ -1092,6 +1160,108 @@ public class GATKVariantContextUtils { return newGs; } + /** + * Updates the PLs and AD of the Genotypes in the newly selected VariantContext to reflect the fact that some alleles + * from the original VariantContext are no longer present. + * + * @param selectedVC the selected (new) VariantContext + * @param originalVC the original VariantContext + * @return a new non-null GenotypesContext + */ + public static GenotypesContext updatePLsAndAD(final VariantContext selectedVC, final VariantContext originalVC) { + final int numNewAlleles = selectedVC.getAlleles().size(); + final int numOriginalAlleles = originalVC.getAlleles().size(); + + // if we have more alternate alleles in the selected VC than in the original VC, then something is wrong + if ( numNewAlleles > numOriginalAlleles ) + throw new IllegalArgumentException("Attempting to fix PLs and AD from what appears to be a *combined* VCF and not a selected one"); + + final GenotypesContext oldGs = selectedVC.getGenotypes(); + + // if we have the same number of alternate alleles in the selected VC as in the original VC, then we don't need to fix anything + if ( numNewAlleles == numOriginalAlleles ) + return oldGs; + + final GenotypesContext newGs = fixPLsFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles()); + + return fixADFromSubsettedAlleles(newGs, originalVC, selectedVC.getAlleles()); + } + + /** + * Fix the PLs for the GenotypesContext of a VariantContext that has been subset + * + * @param originalGs the original GenotypesContext + * @param originalVC the original VariantContext + * @param allelesToUse the new (sub)set of alleles to use + * @return a new non-null GenotypesContext + */ + static private GenotypesContext fixPLsFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List allelesToUse) { + + // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward + final List likelihoodIndexesToUse = determineLikelihoodIndexesToUse(originalVC, allelesToUse); + + // create the new genotypes + return createGenotypesWithSubsettedLikelihoods(originalGs, originalVC, allelesToUse, likelihoodIndexesToUse, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES); + } + + /** + * Fix the AD for the GenotypesContext of a VariantContext that has been subset + * + * @param originalGs the original GenotypesContext + * @param originalVC the original VariantContext + * @param allelesToUse the new (sub)set of alleles to use + * @return a new non-null GenotypesContext + */ + static private GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List allelesToUse) { + + // the bitset representing the allele indexes we want to keep + final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse); + + // the new genotypes to create + final GenotypesContext newGTs = GenotypesContext.create(originalGs.size()); + + // the samples + final List sampleIndices = originalGs.getSampleNamesOrderedByName(); + + // create the new genotypes + for ( int k = 0; k < originalGs.size(); k++ ) { + final Genotype g = originalGs.get(sampleIndices.get(k)); + newGTs.add(fixAD(g, alleleIndexesToUse, allelesToUse.size())); + } + + return newGTs; + } + + /** + * Fix the AD for the given Genotype + * + * @param genotype the original Genotype + * @param alleleIndexesToUse a bitset describing whether or not to keep a given index + * @param nAllelesToUse how many alleles we are keeping + * @return a non-null Genotype + */ + private static Genotype fixAD(final Genotype genotype, final boolean[] alleleIndexesToUse, final int nAllelesToUse) { + // if it ain't broke don't fix it + if ( !genotype.hasAD() ) + return genotype; + + final GenotypeBuilder builder = new GenotypeBuilder(genotype); + + final int[] oldAD = genotype.getAD(); + if ( oldAD.length != alleleIndexesToUse.length ) { + builder.noAD(); + } else { + final int[] newAD = new int[nAllelesToUse]; + int currentIndex = 0; + for ( int i = 0; i < oldAD.length; i++ ) { + if ( alleleIndexesToUse[i] ) + newAD[currentIndex++] = oldAD[i]; + } + builder.AD(newAD); + } + return builder.make(); + } + static private Allele determineReferenceAllele(List VCs) { Allele ref = null; @@ -1277,7 +1447,7 @@ public class GATKVariantContextUtils { @Requires("originalGenotypes != null && alleleMapper != null") protected static GenotypesContext updateGenotypesWithMappedAlleles(final GenotypesContext originalGenotypes, final AlleleMapper alleleMapper) { - final GenotypesContext updatedGenotypes = GenotypesContext.create(); + final GenotypesContext updatedGenotypes = GenotypesContext.create(originalGenotypes.size()); for ( final Genotype genotype : originalGenotypes ) { final List updatedAlleles = alleleMapper.remap(genotype.getAlleles()); diff --git a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java index 575fe4936..30f112241 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -1320,4 +1320,112 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { } } } + + // -------------------------------------------------------------------------------- + // + // Test updatePLsAndAD + // + // -------------------------------------------------------------------------------- + + @DataProvider(name = "updatePLsAndADData") + public Object[][] makeUpdatePLsAndADData() { + List tests = new ArrayList<>(); + + final Allele A = Allele.create("A", true); + final Allele C = Allele.create("C"); + final Allele G = Allele.create("G"); + + final List AA = Arrays.asList(A,A); + final List AC = Arrays.asList(A,C); + final List CC = Arrays.asList(C,C); + final List AG = Arrays.asList(A,G); + final List CG = Arrays.asList(C,G); + final List GG = Arrays.asList(G,G); + final List ACG = Arrays.asList(A,C,G); + + final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make(); + + final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01}); + final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01}); + final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9}); + final double[] uninformative = new double[]{0, 0, 0}; + + final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(100).make(); + + // make sure we don't screw up the simple case where no selection happens + final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).GQ(8).make(); + final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10,2}).PL(hetPL).GQ(8).make(); + final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10,2}).PL(homVarPL).GQ(8).make(); + + tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(aaGT).make())}); + tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(acGT).make())}); + tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(ccGT).make())}); + + // uninformative test cases + final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make(); + tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(uninformativeGT)}); + final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noAD().noPL().noGQ().make(); + tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(emptyGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(emptyGT)}); + + // actually subsetting down from multiple alt values + final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50}; + final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50}; + final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50}; + final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50}; + final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG + final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG + + final int[] homRef3AllelesAD = new int[]{20, 0, 1}; + final int[] hetRefC3AllelesAD = new int[]{10, 10, 1}; + final int[] homC3AllelesAD = new int[]{0, 20, 1}; + final int[] hetRefG3AllelesAD = new int[]{10, 0, 11}; + final int[] hetCG3AllelesAD = new int[]{0, 12, 11}; // AA, AC, CC, AG, CG, GG + final int[] homG3AllelesAD = new int[]{0, 1, 21}; // AA, AC, CC, AG, CG, GG + + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL).make()).make(), + new VariantContextBuilder(vcBase).alleles(AC).make(), + Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).AD(new int[]{20, 0}).GQ(100).make())}); + + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefC3AllelesAD).PL(hetRefC3AllelesPL).make()).make(), + new VariantContextBuilder(vcBase).alleles(AC).make(), + Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-10, 0, -20}).AD(new int[]{10, 10}).GQ(100).make())}); + + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homC3AllelesAD).PL(homC3AllelesPL).make()).make(), + new VariantContextBuilder(vcBase).alleles(AC).make(), + Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -10, 0}).AD(new int[]{0, 20}).GQ(100).make())}); + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefG3AllelesAD).PL(hetRefG3AllelesPL).make()).make(), + new VariantContextBuilder(vcBase).alleles(AG).make(), + Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, 0, -50}).AD(new int[]{10, 11}).GQ(100).make())}); + + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetCG3AllelesAD).PL(hetCG3AllelesPL).make()).make(), + new VariantContextBuilder(vcBase).alleles(AG).make(), + Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -20, -30}).AD(new int[]{0, 11}).GQ(100).make())}); + + tests.add(new Object[]{ + new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homG3AllelesAD).PL(homG3AllelesPL).make()).make(), + new VariantContextBuilder(vcBase).alleles(AG).make(), + Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -40, 0}).AD(new int[]{0, 21}).GQ(100).make())}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "updatePLsAndADData") + public void testUpdatePLsAndADData(final VariantContext originalVC, + final VariantContext selectedVC, + final List expectedGenotypes) { + final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make(); + final GenotypesContext actual = GATKVariantContextUtils.updatePLsAndAD(selectedVCwithGTs, originalVC); + + Assert.assertEquals(actual.size(), expectedGenotypes.size()); + for ( final Genotype expected : expectedGenotypes ) { + final Genotype actualGT = actual.get(expected.getSampleName()); + Assert.assertNotNull(actualGT); + assertGenotypesAreEqual(actualGT, expected); + } + } } \ No newline at end of file