Merge pull request #440 from broadinstitute/eb_selection_should_keep_pls
Eb selection should keep pls
This commit is contained in:
commit
d90b295570
|
|
@ -316,7 +316,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b37KGReference + " -o %s --no_cmdline_in_header -sf " + samplesFile + " --excludeNonVariants --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("f14d75892b99547d8e9ba3a03bfb04ea")
|
||||
Arrays.asList("69862fb97e8e895fe65c7abb14b03cee")
|
||||
);
|
||||
executeTest("test select from multi allelic with excludeNonVariants --" + testfile, spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
@Argument(doc="indel size select",required=false,fullName="maxIndelSize")
|
||||
private int maxIndelSize = Integer.MAX_VALUE;
|
||||
|
||||
@Argument(doc="Allow a samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
|
||||
@Argument(doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.",required=false,fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES")
|
||||
private boolean ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES = false;
|
||||
|
||||
|
||||
|
|
@ -657,6 +657,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
* Helper method to subset a VC record, modifying some metadata stored in the INFO field (i.e. AN, AC, AF).
|
||||
*
|
||||
* @param vc the VariantContext record to subset
|
||||
* @param excludeNonVariants should we exclude sites that have AC=0 for any alternate alleles?
|
||||
* @return the subsetted VariantContext
|
||||
*/
|
||||
private VariantContext subsetRecord(final VariantContext vc, final boolean excludeNonVariants) {
|
||||
|
|
@ -665,14 +666,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
final VariantContext sub = vc.subContextFromSamples(samples, excludeNonVariants); // strip out the alternate alleles that aren't being used
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(sub);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(sub);
|
||||
|
||||
GenotypesContext newGC = sub.getGenotypes();
|
||||
|
||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate)
|
||||
final boolean lostAllelesInSelection = vc.getAlleles().size() != sub.getAlleles().size();
|
||||
if ( lostAllelesInSelection )
|
||||
newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());
|
||||
// if there are fewer alternate alleles now in the selected VC, we need to fix the PL and AD values
|
||||
GenotypesContext newGC = GATKVariantContextUtils.updatePLsAndAD(sub, vc);
|
||||
|
||||
// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
|
||||
if ( vc.getNSamples() != sub.getNSamples() ) {
|
||||
|
|
@ -682,11 +679,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
// Remove a fraction of the genotypes if needed
|
||||
if ( fractionGenotypes > 0 ){
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
final ArrayList<Genotype> genotypes = new ArrayList<>();
|
||||
for ( Genotype genotype : newGC ) {
|
||||
//Set genotype to no call if it falls in the fraction.
|
||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||
List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
final List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
|
||||
}
|
||||
else{
|
||||
|
|
@ -698,7 +695,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
builder.genotypes(newGC);
|
||||
|
||||
addAnnotations(builder, sub, lostAllelesInSelection);
|
||||
addAnnotations(builder, sub, vc.getAlleles().size() != sub.getAlleles().size());
|
||||
|
||||
return builder.make();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -976,6 +976,16 @@ public class MathUtils {
|
|||
return count;
|
||||
}
|
||||
|
||||
public static int countOccurrences(final boolean element, final boolean[] array) {
|
||||
int count = 0;
|
||||
for (final boolean b : array) {
|
||||
if (element == b)
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns n random indices drawn with replacement from the range 0..(k-1)
|
||||
|
|
|
|||
|
|
@ -453,7 +453,12 @@ public class GATKVariantContextUtils {
|
|||
* rather than the undetermined behavior when using the PLs to assign, which could result
|
||||
* in hom-var or hom-ref for each, depending on the exact PL values.
|
||||
*/
|
||||
BEST_MATCH_TO_ORIGINAL
|
||||
BEST_MATCH_TO_ORIGINAL,
|
||||
|
||||
/**
|
||||
* do not even bother changing the GTs
|
||||
*/
|
||||
DO_NOT_ASSIGN_GENOTYPES
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -461,8 +466,8 @@ public class GATKVariantContextUtils {
|
|||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
|
||||
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
|
||||
* @return genotypes
|
||||
* @param assignGenotypes assignment strategy for the (subsetted) PLs
|
||||
* @return a new non-null GenotypesContext
|
||||
*/
|
||||
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
|
|
@ -470,50 +475,109 @@ public class GATKVariantContextUtils {
|
|||
if ( allelesToUse.get(0).isNonReference() ) throw new IllegalArgumentException("First allele must be the reference allele");
|
||||
if ( allelesToUse.size() == 1 ) throw new IllegalArgumentException("Cannot subset to only 1 alt allele");
|
||||
|
||||
// the genotypes with PLs
|
||||
final GenotypesContext oldGTs = vc.getGenotypes();
|
||||
|
||||
// the new genotypes to create
|
||||
final GenotypesContext newGTs = GenotypesContext.create();
|
||||
|
||||
// optimization: if no input genotypes, just exit
|
||||
if (oldGTs.isEmpty()) return newGTs;
|
||||
|
||||
// samples
|
||||
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
|
||||
if (vc.getGenotypes().isEmpty()) return GenotypesContext.create();
|
||||
|
||||
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2);
|
||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||
final List<Integer> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(vc, allelesToUse);
|
||||
|
||||
// which PLs should be carried forward?
|
||||
ArrayList<Integer> likelihoodIndexesToUse = null;
|
||||
// create the new genotypes
|
||||
return createGenotypesWithSubsettedLikelihoods(vc.getGenotypes(), vc, allelesToUse, likelihoodIndexesToUse, assignGenotypes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Figure out which likelihood indexes to use for a selected down set of alleles
|
||||
*
|
||||
* @param originalVC the original VariantContext
|
||||
* @param allelesToUse the subset of alleles to use
|
||||
* @return a list of PL indexes to use or null if none
|
||||
*/
|
||||
private static List<Integer> determineLikelihoodIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||
|
||||
// the bitset representing the allele indexes we want to keep
|
||||
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
|
||||
|
||||
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
||||
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
||||
if ( numNewAltAlleles != numOriginalAltAlleles ) {
|
||||
likelihoodIndexesToUse = new ArrayList<>(30);
|
||||
if ( MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length )
|
||||
return null;
|
||||
|
||||
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
|
||||
altAlleleIndexToUse[i] = true;
|
||||
}
|
||||
return getLikelihoodIndexes(originalVC, alleleIndexesToUse);
|
||||
}
|
||||
|
||||
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
|
||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
// consider this entry only if both of the alleles are good
|
||||
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
|
||||
likelihoodIndexesToUse.add(PLindex);
|
||||
}
|
||||
/**
|
||||
* Get the actual likelihoods indexes to use given the corresponding allele indexes
|
||||
*
|
||||
* @param originalVC the original VariantContext
|
||||
* @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
|
||||
* @return a non-null List
|
||||
*/
|
||||
private static List<Integer> getLikelihoodIndexes(final VariantContext originalVC, final boolean[] alleleIndexesToUse) {
|
||||
|
||||
final List<Integer> result = new ArrayList<>(30);
|
||||
|
||||
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), DEFAULT_PLOIDY);
|
||||
|
||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
// consider this entry only if both of the alleles are good
|
||||
if ( alleleIndexesToUse[alleles.alleleIndex1] && alleleIndexesToUse[alleles.alleleIndex2] )
|
||||
result.add(PLindex);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an original VariantContext and a list of alleles from that VC to keep,
|
||||
* returns a bitset representing which allele indexes should be kept
|
||||
*
|
||||
* @param originalVC the original VC
|
||||
* @param allelesToKeep the list of alleles to keep
|
||||
* @return non-null bitset
|
||||
*/
|
||||
private static boolean[] getAlleleIndexBitset(final VariantContext originalVC, final List<Allele> allelesToKeep) {
|
||||
final int numOriginalAltAlleles = originalVC.getNAlleles() - 1;
|
||||
final boolean[] alleleIndexesToKeep = new boolean[numOriginalAltAlleles + 1];
|
||||
|
||||
// the reference Allele is definitely still used
|
||||
alleleIndexesToKeep[0] = true;
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||
if ( allelesToKeep.contains(originalVC.getAlternateAllele(i)) )
|
||||
alleleIndexesToKeep[i+1] = true;
|
||||
}
|
||||
|
||||
return alleleIndexesToKeep;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the new GenotypesContext with the subsetted PLs
|
||||
*
|
||||
* @param originalGs the original GenotypesContext
|
||||
* @param vc the original VariantContext
|
||||
* @param allelesToUse the actual alleles to use with the new Genotypes
|
||||
* @param likelihoodIndexesToUse the indexes in the PL to use given the allelesToUse (@see #determineLikelihoodIndexesToUse())
|
||||
* @param assignGenotypes assignment strategy for the (subsetted) PLs
|
||||
* @return a new non-null GenotypesContext
|
||||
*/
|
||||
private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs,
|
||||
final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final List<Integer> likelihoodIndexesToUse,
|
||||
final GenotypeAssignmentMethod assignGenotypes) {
|
||||
// the new genotypes to create
|
||||
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
|
||||
|
||||
// make sure we are seeing the expected number of likelihoods per sample
|
||||
final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2);
|
||||
|
||||
// the samples
|
||||
final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
|
||||
|
||||
// create the new genotypes
|
||||
for ( int k = 0; k < oldGTs.size(); k++ ) {
|
||||
final Genotype g = oldGTs.get(sampleIndices.get(k));
|
||||
for ( int k = 0; k < originalGs.size(); k++ ) {
|
||||
final Genotype g = originalGs.get(sampleIndices.get(k));
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
|
||||
// create the new likelihoods array from the alleles we are allowed to use
|
||||
|
|
@ -570,13 +634,16 @@ public class GATKVariantContextUtils {
|
|||
final GenotypeAssignmentMethod assignmentMethod,
|
||||
final double[] newLikelihoods,
|
||||
final List<Allele> allelesToUse) {
|
||||
gb.noAD();
|
||||
switch ( assignmentMethod ) {
|
||||
case DO_NOT_ASSIGN_GENOTYPES:
|
||||
break;
|
||||
case SET_TO_NO_CALL:
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
gb.noAD();
|
||||
gb.noGQ();
|
||||
break;
|
||||
case USE_PLS_TO_ASSIGN:
|
||||
gb.noAD();
|
||||
if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) {
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
|
|
@ -597,6 +664,7 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
gb.noGQ();
|
||||
gb.noPL();
|
||||
gb.noAD();
|
||||
gb.alleles(best);
|
||||
break;
|
||||
}
|
||||
|
|
@ -622,7 +690,7 @@ public class GATKVariantContextUtils {
|
|||
if (oldGTs.isEmpty()) return oldGTs;
|
||||
|
||||
// the new genotypes to create
|
||||
final GenotypesContext newGTs = GenotypesContext.create();
|
||||
final GenotypesContext newGTs = GenotypesContext.create(oldGTs.size());
|
||||
|
||||
final Allele ref = vc.getReference();
|
||||
final List<Allele> diploidRefAlleles = Arrays.asList(ref, ref);
|
||||
|
|
@ -1083,7 +1151,7 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
|
||||
public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
|
||||
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
||||
final GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
||||
|
||||
for ( final Genotype g : genotypes ) {
|
||||
newGs.add(removePLsAndAD(g));
|
||||
|
|
@ -1092,6 +1160,108 @@ public class GATKVariantContextUtils {
|
|||
return newGs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the PLs and AD of the Genotypes in the newly selected VariantContext to reflect the fact that some alleles
|
||||
* from the original VariantContext are no longer present.
|
||||
*
|
||||
* @param selectedVC the selected (new) VariantContext
|
||||
* @param originalVC the original VariantContext
|
||||
* @return a new non-null GenotypesContext
|
||||
*/
|
||||
public static GenotypesContext updatePLsAndAD(final VariantContext selectedVC, final VariantContext originalVC) {
|
||||
final int numNewAlleles = selectedVC.getAlleles().size();
|
||||
final int numOriginalAlleles = originalVC.getAlleles().size();
|
||||
|
||||
// if we have more alternate alleles in the selected VC than in the original VC, then something is wrong
|
||||
if ( numNewAlleles > numOriginalAlleles )
|
||||
throw new IllegalArgumentException("Attempting to fix PLs and AD from what appears to be a *combined* VCF and not a selected one");
|
||||
|
||||
final GenotypesContext oldGs = selectedVC.getGenotypes();
|
||||
|
||||
// if we have the same number of alternate alleles in the selected VC as in the original VC, then we don't need to fix anything
|
||||
if ( numNewAlleles == numOriginalAlleles )
|
||||
return oldGs;
|
||||
|
||||
final GenotypesContext newGs = fixPLsFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles());
|
||||
|
||||
return fixADFromSubsettedAlleles(newGs, originalVC, selectedVC.getAlleles());
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix the PLs for the GenotypesContext of a VariantContext that has been subset
|
||||
*
|
||||
* @param originalGs the original GenotypesContext
|
||||
* @param originalVC the original VariantContext
|
||||
* @param allelesToUse the new (sub)set of alleles to use
|
||||
* @return a new non-null GenotypesContext
|
||||
*/
|
||||
static private GenotypesContext fixPLsFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||
|
||||
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
||||
final List<Integer> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(originalVC, allelesToUse);
|
||||
|
||||
// create the new genotypes
|
||||
return createGenotypesWithSubsettedLikelihoods(originalGs, originalVC, allelesToUse, likelihoodIndexesToUse, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix the AD for the GenotypesContext of a VariantContext that has been subset
|
||||
*
|
||||
* @param originalGs the original GenotypesContext
|
||||
* @param originalVC the original VariantContext
|
||||
* @param allelesToUse the new (sub)set of alleles to use
|
||||
* @return a new non-null GenotypesContext
|
||||
*/
|
||||
static private GenotypesContext fixADFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||
|
||||
// the bitset representing the allele indexes we want to keep
|
||||
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
|
||||
|
||||
// the new genotypes to create
|
||||
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
|
||||
|
||||
// the samples
|
||||
final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
|
||||
|
||||
// create the new genotypes
|
||||
for ( int k = 0; k < originalGs.size(); k++ ) {
|
||||
final Genotype g = originalGs.get(sampleIndices.get(k));
|
||||
newGTs.add(fixAD(g, alleleIndexesToUse, allelesToUse.size()));
|
||||
}
|
||||
|
||||
return newGTs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix the AD for the given Genotype
|
||||
*
|
||||
* @param genotype the original Genotype
|
||||
* @param alleleIndexesToUse a bitset describing whether or not to keep a given index
|
||||
* @param nAllelesToUse how many alleles we are keeping
|
||||
* @return a non-null Genotype
|
||||
*/
|
||||
private static Genotype fixAD(final Genotype genotype, final boolean[] alleleIndexesToUse, final int nAllelesToUse) {
|
||||
// if it ain't broke don't fix it
|
||||
if ( !genotype.hasAD() )
|
||||
return genotype;
|
||||
|
||||
final GenotypeBuilder builder = new GenotypeBuilder(genotype);
|
||||
|
||||
final int[] oldAD = genotype.getAD();
|
||||
if ( oldAD.length != alleleIndexesToUse.length ) {
|
||||
builder.noAD();
|
||||
} else {
|
||||
final int[] newAD = new int[nAllelesToUse];
|
||||
int currentIndex = 0;
|
||||
for ( int i = 0; i < oldAD.length; i++ ) {
|
||||
if ( alleleIndexesToUse[i] )
|
||||
newAD[currentIndex++] = oldAD[i];
|
||||
}
|
||||
builder.AD(newAD);
|
||||
}
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
static private Allele determineReferenceAllele(List<VariantContext> VCs) {
|
||||
Allele ref = null;
|
||||
|
||||
|
|
@ -1277,7 +1447,7 @@ public class GATKVariantContextUtils {
|
|||
|
||||
@Requires("originalGenotypes != null && alleleMapper != null")
|
||||
protected static GenotypesContext updateGenotypesWithMappedAlleles(final GenotypesContext originalGenotypes, final AlleleMapper alleleMapper) {
|
||||
final GenotypesContext updatedGenotypes = GenotypesContext.create();
|
||||
final GenotypesContext updatedGenotypes = GenotypesContext.create(originalGenotypes.size());
|
||||
|
||||
for ( final Genotype genotype : originalGenotypes ) {
|
||||
final List<Allele> updatedAlleles = alleleMapper.remap(genotype.getAlleles());
|
||||
|
|
|
|||
|
|
@ -1320,4 +1320,112 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Test updatePLsAndAD
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "updatePLsAndADData")
|
||||
public Object[][] makeUpdatePLsAndADData() {
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
final Allele A = Allele.create("A", true);
|
||||
final Allele C = Allele.create("C");
|
||||
final Allele G = Allele.create("G");
|
||||
|
||||
final List<Allele> AA = Arrays.asList(A,A);
|
||||
final List<Allele> AC = Arrays.asList(A,C);
|
||||
final List<Allele> CC = Arrays.asList(C,C);
|
||||
final List<Allele> AG = Arrays.asList(A,G);
|
||||
final List<Allele> CG = Arrays.asList(C,G);
|
||||
final List<Allele> GG = Arrays.asList(G,G);
|
||||
final List<Allele> ACG = Arrays.asList(A,C,G);
|
||||
|
||||
final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
|
||||
|
||||
final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01});
|
||||
final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01});
|
||||
final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9});
|
||||
final double[] uninformative = new double[]{0, 0, 0};
|
||||
|
||||
final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(100).make();
|
||||
|
||||
// make sure we don't screw up the simple case where no selection happens
|
||||
final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).GQ(8).make();
|
||||
final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10,2}).PL(hetPL).GQ(8).make();
|
||||
final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10,2}).PL(homVarPL).GQ(8).make();
|
||||
|
||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(aaGT).make())});
|
||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(acGT).make())});
|
||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(new GenotypeBuilder(ccGT).make())});
|
||||
|
||||
// uninformative test cases
|
||||
final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make();
|
||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(uninformativeGT)});
|
||||
final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noAD().noPL().noGQ().make();
|
||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(emptyGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(emptyGT)});
|
||||
|
||||
// actually subsetting down from multiple alt values
|
||||
final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50};
|
||||
final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50};
|
||||
final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50};
|
||||
final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50};
|
||||
final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG
|
||||
final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG
|
||||
|
||||
final int[] homRef3AllelesAD = new int[]{20, 0, 1};
|
||||
final int[] hetRefC3AllelesAD = new int[]{10, 10, 1};
|
||||
final int[] homC3AllelesAD = new int[]{0, 20, 1};
|
||||
final int[] hetRefG3AllelesAD = new int[]{10, 0, 11};
|
||||
final int[] hetCG3AllelesAD = new int[]{0, 12, 11}; // AA, AC, CC, AG, CG, GG
|
||||
final int[] homG3AllelesAD = new int[]{0, 1, 21}; // AA, AC, CC, AG, CG, GG
|
||||
|
||||
tests.add(new Object[]{
|
||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL).make()).make(),
|
||||
new VariantContextBuilder(vcBase).alleles(AC).make(),
|
||||
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).AD(new int[]{20, 0}).GQ(100).make())});
|
||||
|
||||
tests.add(new Object[]{
|
||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefC3AllelesAD).PL(hetRefC3AllelesPL).make()).make(),
|
||||
new VariantContextBuilder(vcBase).alleles(AC).make(),
|
||||
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-10, 0, -20}).AD(new int[]{10, 10}).GQ(100).make())});
|
||||
|
||||
tests.add(new Object[]{
|
||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homC3AllelesAD).PL(homC3AllelesPL).make()).make(),
|
||||
new VariantContextBuilder(vcBase).alleles(AC).make(),
|
||||
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -10, 0}).AD(new int[]{0, 20}).GQ(100).make())});
|
||||
tests.add(new Object[]{
|
||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetRefG3AllelesAD).PL(hetRefG3AllelesPL).make()).make(),
|
||||
new VariantContextBuilder(vcBase).alleles(AG).make(),
|
||||
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, 0, -50}).AD(new int[]{10, 11}).GQ(100).make())});
|
||||
|
||||
tests.add(new Object[]{
|
||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(hetCG3AllelesAD).PL(hetCG3AllelesPL).make()).make(),
|
||||
new VariantContextBuilder(vcBase).alleles(AG).make(),
|
||||
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -20, -30}).AD(new int[]{0, 11}).GQ(100).make())});
|
||||
|
||||
tests.add(new Object[]{
|
||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homG3AllelesAD).PL(homG3AllelesPL).make()).make(),
|
||||
new VariantContextBuilder(vcBase).alleles(AG).make(),
|
||||
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{-20, -40, 0}).AD(new int[]{0, 21}).GQ(100).make())});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "updatePLsAndADData")
|
||||
public void testUpdatePLsAndADData(final VariantContext originalVC,
|
||||
final VariantContext selectedVC,
|
||||
final List<Genotype> expectedGenotypes) {
|
||||
final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make();
|
||||
final GenotypesContext actual = GATKVariantContextUtils.updatePLsAndAD(selectedVCwithGTs, originalVC);
|
||||
|
||||
Assert.assertEquals(actual.size(), expectedGenotypes.size());
|
||||
for ( final Genotype expected : expectedGenotypes ) {
|
||||
final Genotype actualGT = actual.get(expected.getSampleName());
|
||||
Assert.assertNotNull(actualGT);
|
||||
assertGenotypesAreEqual(actualGT, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue