SelectVariants works with non-diploids
This commit is contained in:
parent
d3862459e9
commit
427645162b
|
|
@ -110,7 +110,7 @@ public abstract class DiploidExactAFCalculator extends ExactAFCalculator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse) {
|
protected GenotypesContext reduceScopeGenotypes(final VariantContext vc, final int defaultPloidy, final List<Allele> allelesToUse) {
|
||||||
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
|
return GATKVariantContextUtils.subsetAlleles(vc, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -346,8 +346,8 @@ public abstract class DiploidExactAFCalculator extends ExactAFCalculator {
|
||||||
if (defaultPloidy != 2)
|
if (defaultPloidy != 2)
|
||||||
throw new IllegalArgumentException("cannot support ploidy different than 2 and the default ploidy is " + defaultPloidy);
|
throw new IllegalArgumentException("cannot support ploidy different than 2 and the default ploidy is " + defaultPloidy);
|
||||||
return allelesToUse.size() == 1
|
return allelesToUse.size() == 1
|
||||||
? GATKVariantContextUtils.subsetToRefOnly(vc, 2)
|
? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy)
|
||||||
: GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse,
|
: GATKVariantContextUtils.subsetAlleles(vc, allelesToUse,
|
||||||
assignGenotypes ? GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN : GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
|
assignGenotypes ? GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN : GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -165,7 +165,7 @@ public class FamilyLikelihoodsUtils {
|
||||||
//final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors);
|
//final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors);
|
||||||
|
|
||||||
//update genotype types based on posteriors
|
//update genotype types based on posteriors
|
||||||
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc.getAlleles(), builder,
|
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc.getAlleles(), genotype.getPloidy(), builder,
|
||||||
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, vc.getAlleles());
|
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, vc.getAlleles());
|
||||||
|
|
||||||
builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY,
|
builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY,
|
||||||
|
|
|
||||||
|
|
@ -153,7 +153,7 @@ public class PosteriorLikelihoodsUtils {
|
||||||
final GenotypeBuilder builder = new GenotypeBuilder(vc1.getGenotype(genoIdx));
|
final GenotypeBuilder builder = new GenotypeBuilder(vc1.getGenotype(genoIdx));
|
||||||
builder.phased(vc1.getGenotype(genoIdx).isPhased());
|
builder.phased(vc1.getGenotype(genoIdx).isPhased());
|
||||||
if ( posteriors.get(genoIdx) != null ) {
|
if ( posteriors.get(genoIdx) != null ) {
|
||||||
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc1.getAlleles(), builder,
|
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc1.getAlleles(), vc1.getMaxPloidy(2), builder,
|
||||||
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, posteriors.get(genoIdx), vc1.getAlleles());
|
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, posteriors.get(genoIdx), vc1.getAlleles());
|
||||||
builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY,
|
builder.attribute(GATKVCFConstants.PHRED_SCALED_POSTERIORS_KEY,
|
||||||
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(posteriors.get(genoIdx)).getAsPLs()));
|
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(posteriors.get(genoIdx)).getAsPLs()));
|
||||||
|
|
|
||||||
|
|
@ -304,7 +304,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + b36KGReference + " -selectType INDEL --variant " + testFile + " -o %s --no_cmdline_in_header --minIndelSize 2",
|
"-T SelectVariants -R " + b36KGReference + " -selectType INDEL --variant " + testFile + " -o %s --no_cmdline_in_header --minIndelSize 2",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("ed9dc00d0551630a2eed9e81a2a357d3")
|
Arrays.asList("ad0965edb1dbd30060afd21ba9f11bf7")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("testMinIndelLengthSelection--" + testFile, spec);
|
executeTest("testMinIndelLengthSelection--" + testFile, spec);
|
||||||
|
|
@ -395,7 +395,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
"-T SelectVariants -R " + b37KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("c78a65b41edbdd386211042e8f65220b")
|
Arrays.asList("1fc77d7f47e75a24222a358c69de7f3d")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("testNoGTs--" + testFile, spec);
|
executeTest("testNoGTs--" + testFile, spec);
|
||||||
|
|
@ -606,7 +606,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(" -IDs " + idFile + " --variant " + testFile),
|
baseTestString(" -IDs " + idFile + " --variant " + testFile),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("c6632b63617162455f02670174a2322a")
|
Arrays.asList("da1117cba380345c622a6d8e52c2270b")
|
||||||
);
|
);
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testKeepSelectionID--" + testFile, spec);
|
executeTest("testKeepSelectionID--" + testFile, spec);
|
||||||
|
|
@ -641,7 +641,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + b36KGReference + " -xlSelectType SNP --variant " + testFile + " -o %s --no_cmdline_in_header",
|
"-T SelectVariants -R " + b36KGReference + " -xlSelectType SNP --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("ed9dc00d0551630a2eed9e81a2a357d3")
|
Arrays.asList("ad0965edb1dbd30060afd21ba9f11bf7")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("testExcludeSelectionType--" + testFile, spec);
|
executeTest("testExcludeSelectionType--" + testFile, spec);
|
||||||
|
|
@ -782,7 +782,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12891 -trimAlternates",
|
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12891 -trimAlternates",
|
||||||
1,
|
1,
|
||||||
ReviewedGATKException.class);
|
Arrays.asList("7880f8a1dfae1804998b6a994574e734"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testSACNonDiploid", spec);
|
executeTest("testSACNonDiploid", spec);
|
||||||
}
|
}
|
||||||
|
|
@ -838,4 +838,43 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
executeTest("testMaxNoCall0_5", spec);
|
executeTest("testMaxNoCall0_5", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHaploid() {
|
||||||
|
final String testfile = privateTestDir + "haploid-multisample.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn HG00610 -select 'DP > 7'",
|
||||||
|
1,
|
||||||
|
Arrays.asList("bc6caa81836f4c94a1216babd0c1ac72"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
|
||||||
|
executeTest("testHaploid", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTetraploid() {
|
||||||
|
final String testfile = privateTestDir + "tetraploid-multisample.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA18486 -select 'DP > 19'",
|
||||||
|
1,
|
||||||
|
Arrays.asList("4fcfa5e0ba5d39ca9f0593aa0c0f7a63"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
|
||||||
|
executeTest("testTetraploid", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTetraDiploid() {
|
||||||
|
final String testfile = privateTestDir + "tetra-diploid.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header -sn NA12878 -select 'DP > 48' -trimAlternates",
|
||||||
|
1,
|
||||||
|
Arrays.asList("709782f7a07cd500d41370e6a275fcdf"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
|
||||||
|
executeTest("testTetraDiploid", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,7 @@ public class SelectVariantsParallelIntegrationTest extends WalkerTest {
|
||||||
{ // new tests on b37 using testdir VCF
|
{ // new tests on b37 using testdir VCF
|
||||||
final String testfile = privateTestDir + "NA12878.hg19.example1.vcf";
|
final String testfile = privateTestDir + "NA12878.hg19.example1.vcf";
|
||||||
final String args = "-select 'DP > 30' -V " + testfile;
|
final String args = "-select 'DP > 30' -V " + testfile;
|
||||||
new ParallelSelectTestProvider(b37KGReference, args, "64f9258e9e3024b6361abbeeeefafee9", nt);
|
new ParallelSelectTestProvider(b37KGReference, args, "51645037428729c3a9fa0e25fc2104ad", nt);
|
||||||
}
|
}
|
||||||
{ // AD and PL decoding race condition
|
{ // AD and PL decoding race condition
|
||||||
final String testfile = privateTestDir + "race_condition.vcf";
|
final String testfile = privateTestDir + "race_condition.vcf";
|
||||||
|
|
|
||||||
|
|
@ -87,7 +87,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
||||||
" --no_cmdline_in_header " +
|
" --no_cmdline_in_header " +
|
||||||
" -o %s",
|
" -o %s",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("f9f6418698f967ba7ca451ac1fb4cc8d")
|
Arrays.asList("94057d7a98c1af0a7490540ea1d9b247")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("testSimpleVCFStreaming", spec);
|
executeTest("testSimpleVCFStreaming", spec);
|
||||||
|
|
|
||||||
|
|
@ -626,7 +626,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
private Set<String> IDsToRemove = null;
|
private Set<String> IDsToRemove = null;
|
||||||
private Map<String, VCFHeader> vcfRods;
|
private Map<String, VCFHeader> vcfRods;
|
||||||
|
|
||||||
private final List<Allele> diploidNoCallAlleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
private final List<Allele> diploidNoCallAlleles = GATKVariantContextUtils.noCallAlleles(2);
|
||||||
|
private final Map<Integer, Integer> ploidyToNumberOfAlleles = new HashMap<Integer, Integer>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||||
|
|
@ -844,12 +845,24 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (considerNoCallGenotypes()) {
|
if (considerNoCallGenotypes()) {
|
||||||
final int numNoCallSamples = numNoCallGenotypes(vc);
|
final int numNoCallSamples = numNoCallGenotypes(vc);
|
||||||
final double fractionNoCallGenotypes = samples.isEmpty() ? 0.0 : ((double) numNoCallSamples) / samples.size();
|
final double fractionNoCallGenotypes = samples.isEmpty() ? 0.0 : ((double) numNoCallSamples) / samples.size();
|
||||||
if (numNoCallSamples > maxNOCALLnumber || fractionNoCallGenotypes > maxNOCALLfraction)
|
if (numNoCallSamples > maxNOCALLnumber || fractionNoCallGenotypes > maxNOCALLfraction)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize cache of PL index to a list of alleles for any ploidy
|
||||||
|
if (vc.getType() != VariantContext.Type.NO_VARIATION) {
|
||||||
|
for (final Genotype g : vc.getGenotypes()) {
|
||||||
|
if (g.getPloidy() != 0) {
|
||||||
|
if (!ploidyToNumberOfAlleles.containsKey(g.getPloidy()) || ploidyToNumberOfAlleles.get(g.getPloidy()) < vc.getNAlleles()) {
|
||||||
|
GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(vc.getNAlleles() - 1, g.getPloidy());
|
||||||
|
ploidyToNumberOfAlleles.put(g.getPloidy(), vc.getNAlleles());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
VariantContext sub = subsetRecord(vc, preserveAlleles, removeUnusedAlternates);
|
VariantContext sub = subsetRecord(vc, preserveAlleles, removeUnusedAlternates);
|
||||||
|
|
||||||
|
|
@ -1089,7 +1102,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
for ( Genotype genotype : newGC ) {
|
for ( Genotype genotype : newGC ) {
|
||||||
//Set genotype to no call if it falls in the fraction.
|
//Set genotype to no call if it falls in the fraction.
|
||||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||||
genotypes.add(new GenotypeBuilder(genotype).alleles(diploidNoCallAlleles).noGQ().make());
|
final List<Allele> noCallAlleles = (genotype.getPloidy() == 2 ? diploidNoCallAlleles :
|
||||||
|
GATKVariantContextUtils.noCallAlleles(genotype.getPloidy()));
|
||||||
|
genotypes.add(new GenotypeBuilder(genotype).alleles(noCallAlleles).noGQ().make());
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
genotypes.add(genotype);
|
genotypes.add(genotype);
|
||||||
|
|
@ -1137,7 +1152,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
for ( final Genotype g : vc.getGenotypes() ) {
|
for ( final Genotype g : vc.getGenotypes() ) {
|
||||||
if ( g.isCalled() && g.isFiltered() ) {
|
if ( g.isCalled() && g.isFiltered() ) {
|
||||||
haveFilteredNoCallAlleles = true;
|
haveFilteredNoCallAlleles = true;
|
||||||
genotypes.add(new GenotypeBuilder(g).alleles(diploidNoCallAlleles).make());
|
final List<Allele> noCallAlleles = (g.getPloidy() == 2 ? diploidNoCallAlleles :
|
||||||
|
GATKVariantContextUtils.noCallAlleles(g.getPloidy()));
|
||||||
|
genotypes.add(new GenotypeBuilder(g).alleles(noCallAlleles).make());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// increment the number called alleles and called alternate alleles
|
// increment the number called alleles and called alternate alleles
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||||
|
|
||||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("122340e3dc333d2b4b79c5c0c443a3fe"));
|
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("8e35e142fea8891f72e577e7b1526756"));
|
||||||
executeTest("Test reading and writing samtools vcf", spec1);
|
executeTest("Test reading and writing samtools vcf", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -118,7 +118,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
String testVCF = privateTestDir + "ex2.vcf";
|
String testVCF = privateTestDir + "ex2.vcf";
|
||||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("db565efb14b2fe5f00a11762751d2476"));
|
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("5245b967ac66b29c8e836bfa5b5b4643"));
|
||||||
executeTest("Test writing samtools WEx BCF example", spec1);
|
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -360,7 +360,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
" -o %s ";
|
" -o %s ";
|
||||||
final String name = "testBlockCompressedInput: " + testSpec.toString();
|
final String name = "testBlockCompressedInput: " + testSpec.toString();
|
||||||
|
|
||||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList("ce9c0bf31ee9452ac4a12a59d5814545"));
|
final WalkerTestSpec spec = new WalkerTestSpec(commandLine, 1, Arrays.asList("1cc0ab636f33b9105d54bdaf2b80ad28"));
|
||||||
|
|
||||||
executeTest(name, spec);
|
executeTest(name, spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -48,14 +48,6 @@ public class GATKVariantContextUtils {
|
||||||
|
|
||||||
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||||
|
|
||||||
/**
|
|
||||||
* Diploid NO_CALL allele list...
|
|
||||||
*
|
|
||||||
* @deprecated you should use {@link #noCallAlleles(int)} instead. It indicates the presence of a hardcoded diploid assumption which is bad.
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public final static List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
|
||||||
|
|
||||||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||||
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||||
|
|
@ -600,11 +592,11 @@ public class GATKVariantContextUtils {
|
||||||
* @param vc variant context with genotype likelihoods
|
* @param vc variant context with genotype likelihoods
|
||||||
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
|
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
|
||||||
* @param assignGenotypes assignment strategy for the (subsetted) PLs
|
* @param assignGenotypes assignment strategy for the (subsetted) PLs
|
||||||
* @return a new non-null GenotypesContext
|
* @return a new non-null GenotypesContext with subsetted alleles
|
||||||
*/
|
*/
|
||||||
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
|
public static GenotypesContext subsetAlleles(final VariantContext vc,
|
||||||
final List<Allele> allelesToUse,
|
final List<Allele> allelesToUse,
|
||||||
final GenotypeAssignmentMethod assignGenotypes) {
|
final GenotypeAssignmentMethod assignGenotypes) {
|
||||||
if ( vc == null ) throw new IllegalArgumentException("the VariantContext cannot be null");
|
if ( vc == null ) throw new IllegalArgumentException("the VariantContext cannot be null");
|
||||||
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
|
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
|
||||||
if ( allelesToUse.isEmpty() ) throw new IllegalArgumentException("must have alleles to use");
|
if ( allelesToUse.isEmpty() ) throw new IllegalArgumentException("must have alleles to use");
|
||||||
|
|
@ -615,7 +607,7 @@ public class GATKVariantContextUtils {
|
||||||
if (vc.getGenotypes().isEmpty()) return GenotypesContext.create();
|
if (vc.getGenotypes().isEmpty()) return GenotypesContext.create();
|
||||||
|
|
||||||
// find the likelihoods indexes to use from the used alternate alleles
|
// find the likelihoods indexes to use from the used alternate alleles
|
||||||
final List<Integer> likelihoodIndexesToUse = determineDiploidLikelihoodIndexesToUse(vc, allelesToUse);
|
final List<List<Integer>> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(vc, allelesToUse);
|
||||||
|
|
||||||
// find the strand allele count indexes to use from the used alternate alleles
|
// find the strand allele count indexes to use from the used alternate alleles
|
||||||
final List<Integer> sacIndexesToUse = determineSACIndexesToUse(vc, allelesToUse);
|
final List<Integer> sacIndexesToUse = determineSACIndexesToUse(vc, allelesToUse);
|
||||||
|
|
@ -625,26 +617,26 @@ public class GATKVariantContextUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the likelihood indexes to use for a selected set of diploid alleles
|
* Find the likelihood indexes to use for a selected set of alleles
|
||||||
*
|
*
|
||||||
* @param originalVC the original VariantContext
|
* @param originalVC the original VariantContext
|
||||||
* @param allelesToUse the subset of alleles to use
|
* @param allelesToUse the subset of alleles to use
|
||||||
* @return a list of PL indexes to use or null if none
|
* @return a list of PL indexes to use or null if none
|
||||||
*/
|
*/
|
||||||
private static List<Integer> determineDiploidLikelihoodIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
|
private static List<List<Integer>> determineLikelihoodIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||||
|
|
||||||
if ( originalVC == null) throw new IllegalArgumentException("the original VariantContext cannot be null");
|
if ( originalVC == null) throw new IllegalArgumentException("the original VariantContext cannot be null");
|
||||||
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
|
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
|
||||||
|
|
||||||
// the bitset representing the allele indexes we want to keep
|
// the bitset representing the allele indexes we want to keep
|
||||||
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
|
final BitSet alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
|
||||||
|
|
||||||
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
||||||
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
||||||
if ( MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length )
|
if ( alleleIndexesToUse.cardinality() == alleleIndexesToUse.size() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
return getDiploidLikelihoodIndexes(originalVC, alleleIndexesToUse);
|
return getLikelihoodIndexes(originalVC, alleleIndexesToUse);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -654,17 +646,17 @@ public class GATKVariantContextUtils {
|
||||||
* @param allelesToUse the subset of alleles to use
|
* @param allelesToUse the subset of alleles to use
|
||||||
* @return a list of SAC indexes to use or null if none
|
* @return a list of SAC indexes to use or null if none
|
||||||
*/
|
*/
|
||||||
public static List<Integer> determineSACIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
|
public static List<Integer> determineSACIndexesToUse(final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||||
|
|
||||||
if ( originalVC == null ) throw new IllegalArgumentException("the original VC cannot be null");
|
if ( originalVC == null ) throw new IllegalArgumentException("the original VC cannot be null");
|
||||||
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
|
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
|
||||||
|
|
||||||
// the bitset representing the allele indexes we want to keep
|
// the bitset representing the allele indexes we want to keep
|
||||||
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
|
final BitSet alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
|
||||||
|
|
||||||
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
||||||
// then we can keep the SACs as is; otherwise, we determine which ones to keep
|
// then we can keep the SACs as is; otherwise, we determine which ones to keep
|
||||||
if (MathUtils.countOccurrences(true, alleleIndexesToUse) == alleleIndexesToUse.length)
|
if (alleleIndexesToUse.cardinality() == alleleIndexesToUse.size())
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
return getSACIndexes(alleleIndexesToUse);
|
return getSACIndexes(alleleIndexesToUse);
|
||||||
|
|
@ -675,32 +667,24 @@ public class GATKVariantContextUtils {
|
||||||
*
|
*
|
||||||
* @param originalVC the original VariantContext
|
* @param originalVC the original VariantContext
|
||||||
* @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
|
* @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
|
||||||
* @return a non-null List
|
* @return likelihoods indexes for each genotype
|
||||||
*/
|
*/
|
||||||
private static List<Integer> getDiploidLikelihoodIndexes(final VariantContext originalVC, final boolean[] alleleIndexesToUse) {
|
private static List<List<Integer>> getLikelihoodIndexes(final VariantContext originalVC, BitSet alleleIndexesToUse) {
|
||||||
|
|
||||||
if (originalVC == null) throw new IllegalArgumentException("the original VC cannot be null");
|
final List<List<Integer>> likelihoodIndexesPerGenotype = new ArrayList<List<Integer>>(10);
|
||||||
if (alleleIndexesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null");
|
|
||||||
|
|
||||||
// All samples must be diploid
|
for (final Genotype g : originalVC.getGenotypes()) {
|
||||||
for ( final Genotype g : originalVC.getGenotypes() ){
|
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), g.getPloidy());
|
||||||
if ( g.getPloidy() != DEFAULT_PLOIDY )
|
final List<Integer> likelihoodIndexes = new ArrayList<>(30);
|
||||||
throw new ReviewedGATKException("All samples must be diploid");
|
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||||
|
// consider this entry only if all the alleles are good
|
||||||
|
if ( GenotypeLikelihoods.getAlleles(PLindex, g.getPloidy()).stream().allMatch(i -> alleleIndexesToUse.get(i)) )
|
||||||
|
likelihoodIndexes.add(PLindex);
|
||||||
|
}
|
||||||
|
likelihoodIndexesPerGenotype.add(likelihoodIndexes);
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<Integer> result = new ArrayList<>(30);
|
return likelihoodIndexesPerGenotype;
|
||||||
|
|
||||||
// numLikelihoods takes total # of alleles.
|
|
||||||
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), DEFAULT_PLOIDY);
|
|
||||||
|
|
||||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
|
||||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
|
||||||
// consider this entry only if both of the alleles are good
|
|
||||||
if ( alleleIndexesToUse[alleles.alleleIndex1] && alleleIndexesToUse[alleles.alleleIndex2] )
|
|
||||||
result.add(PLindex);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -709,15 +693,15 @@ public class GATKVariantContextUtils {
|
||||||
* @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
|
* @param alleleIndexesToUse the bitset representing the alleles to use (@see #getAlleleIndexBitset)
|
||||||
* @return a non-null List
|
* @return a non-null List
|
||||||
*/
|
*/
|
||||||
private static List<Integer> getSACIndexes(final boolean[] alleleIndexesToUse) {
|
private static List<Integer> getSACIndexes(final BitSet alleleIndexesToUse) {
|
||||||
|
|
||||||
if (alleleIndexesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null");
|
if (alleleIndexesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null");
|
||||||
if (alleleIndexesToUse.length == 0) throw new IllegalArgumentException("cannot have no alleles to use");
|
if (alleleIndexesToUse.isEmpty()) throw new IllegalArgumentException("cannot have no alleles to use");
|
||||||
|
|
||||||
final List<Integer> result = new ArrayList<>(2 * alleleIndexesToUse.length);
|
final List<Integer> result = new ArrayList<>(2 * alleleIndexesToUse.size());
|
||||||
|
|
||||||
for (int SACindex = 0; SACindex < alleleIndexesToUse.length; SACindex++) {
|
for (int SACindex = 0; SACindex < alleleIndexesToUse.size(); SACindex++) {
|
||||||
if (alleleIndexesToUse[SACindex]) {
|
if (alleleIndexesToUse.get(SACindex)) {
|
||||||
result.add(2 * SACindex);
|
result.add(2 * SACindex);
|
||||||
result.add(2 * SACindex + 1);
|
result.add(2 * SACindex + 1);
|
||||||
}
|
}
|
||||||
|
|
@ -734,19 +718,19 @@ public class GATKVariantContextUtils {
|
||||||
* @param allelesToUse the list of alleles to keep
|
* @param allelesToUse the list of alleles to keep
|
||||||
* @return non-null bitset
|
* @return non-null bitset
|
||||||
*/
|
*/
|
||||||
private static boolean[] getAlleleIndexBitset(final VariantContext originalVC, final List<Allele> allelesToUse) {
|
private static BitSet getAlleleIndexBitset(final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||||
|
|
||||||
if (originalVC == null) throw new IllegalArgumentException("the original VC cannot be null");
|
if (originalVC == null) throw new IllegalArgumentException("the original VC cannot be null");
|
||||||
if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null");
|
if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use cannot be null");
|
||||||
|
|
||||||
final int numOriginalAltAlleles = originalVC.getNAlleles() - 1;
|
final int numOriginalAltAlleles = originalVC.getNAlleles() - 1;
|
||||||
final boolean[] alleleIndexesToKeep = new boolean[numOriginalAltAlleles + 1];
|
final BitSet alleleIndexesToKeep = new BitSet(numOriginalAltAlleles + 1);
|
||||||
|
|
||||||
// the reference Allele is definitely still used
|
// the reference Allele is always used
|
||||||
alleleIndexesToKeep[0] = true;
|
alleleIndexesToKeep.set(0);
|
||||||
for (int i = 0; i < numOriginalAltAlleles; i++) {
|
for (int i = 0; i < numOriginalAltAlleles; i++) {
|
||||||
if (allelesToUse.contains(originalVC.getAlternateAllele(i)))
|
if (allelesToUse.contains(originalVC.getAlternateAllele(i)))
|
||||||
alleleIndexesToKeep[i + 1] = true;
|
alleleIndexesToKeep.set(i + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return alleleIndexesToKeep;
|
return alleleIndexesToKeep;
|
||||||
|
|
@ -813,7 +797,7 @@ public class GATKVariantContextUtils {
|
||||||
* @param originalGs the original GenotypesContext
|
* @param originalGs the original GenotypesContext
|
||||||
* @param originalVC the original VariantContext
|
* @param originalVC the original VariantContext
|
||||||
* @param allelesToUse the actual alleles to use with the new Genotypes
|
* @param allelesToUse the actual alleles to use with the new Genotypes
|
||||||
* @param likelihoodIndexesToUse the indexes in the PL to use given the allelesToUse (@see #determineDiploidLikelihoodIndexesToUse())
|
* @param likelihoodIndexesToUse the indexes in the PL to use given the allelesToUse for each genotype (@see #determineLikelihoodIndexesToUse())
|
||||||
* @param sacIndexesToUse the indexes in the SAC to use given the allelesToUse (@see #determineSACIndexesToUse())
|
* @param sacIndexesToUse the indexes in the SAC to use given the allelesToUse (@see #determineSACIndexesToUse())
|
||||||
* @param assignGenotypes assignment strategy for the (subsetted) PLs
|
* @param assignGenotypes assignment strategy for the (subsetted) PLs
|
||||||
* @return a new non-null GenotypesContext
|
* @return a new non-null GenotypesContext
|
||||||
|
|
@ -821,7 +805,7 @@ public class GATKVariantContextUtils {
|
||||||
private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs,
|
private static GenotypesContext createGenotypesWithSubsettedLikelihoods(final GenotypesContext originalGs,
|
||||||
final VariantContext originalVC,
|
final VariantContext originalVC,
|
||||||
final List<Allele> allelesToUse,
|
final List<Allele> allelesToUse,
|
||||||
final List<Integer> likelihoodIndexesToUse,
|
final List<List<Integer>> likelihoodIndexesToUse,
|
||||||
final List<Integer> sacIndexesToUse,
|
final List<Integer> sacIndexesToUse,
|
||||||
final GenotypeAssignmentMethod assignGenotypes) {
|
final GenotypeAssignmentMethod assignGenotypes) {
|
||||||
|
|
||||||
|
|
@ -832,9 +816,6 @@ public class GATKVariantContextUtils {
|
||||||
// the new genotypes to create
|
// the new genotypes to create
|
||||||
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
|
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
|
||||||
|
|
||||||
// make sure we are seeing the expected number of likelihoods per sample
|
|
||||||
final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), 2);
|
|
||||||
|
|
||||||
// the samples
|
// the samples
|
||||||
final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
|
final List<String> sampleIndices = originalGs.getSampleNamesOrderedByName();
|
||||||
|
|
||||||
|
|
@ -850,6 +831,8 @@ public class GATKVariantContextUtils {
|
||||||
newLikelihoods = null;
|
newLikelihoods = null;
|
||||||
gb.noPL();
|
gb.noPL();
|
||||||
} else {
|
} else {
|
||||||
|
// make sure we are seeing the expected number of likelihoods per sample
|
||||||
|
final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(originalVC.getNAlleles(), g.getPloidy());
|
||||||
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
|
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
|
||||||
if ( likelihoodIndexesToUse == null ) {
|
if ( likelihoodIndexesToUse == null ) {
|
||||||
newLikelihoods = originalLikelihoods;
|
newLikelihoods = originalLikelihoods;
|
||||||
|
|
@ -857,19 +840,20 @@ public class GATKVariantContextUtils {
|
||||||
logger.debug("Wrong number of likelihoods in sample " + g.getSampleName() + " at " + originalVC + " got " + g.getLikelihoodsString() + " but expected " + expectedNumLikelihoods);
|
logger.debug("Wrong number of likelihoods in sample " + g.getSampleName() + " at " + originalVC + " got " + g.getLikelihoodsString() + " but expected " + expectedNumLikelihoods);
|
||||||
newLikelihoods = null;
|
newLikelihoods = null;
|
||||||
} else {
|
} else {
|
||||||
newLikelihoods = new double[likelihoodIndexesToUse.size()];
|
newLikelihoods = new double[likelihoodIndexesToUse.get(k).size()];
|
||||||
int newIndex = 0;
|
int newIndex = 0;
|
||||||
for ( final int oldIndex : likelihoodIndexesToUse )
|
for ( final int oldIndex : likelihoodIndexesToUse.get(k) )
|
||||||
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
|
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
|
||||||
|
|
||||||
// might need to re-normalize
|
// might need to re-normalize
|
||||||
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( newLikelihoods == null || (originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0) == 0 && likelihoodsAreUninformative(newLikelihoods) ))
|
if ( newLikelihoods == null || (originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0) == 0 && likelihoodsAreUninformative(newLikelihoods) )) {
|
||||||
gb.noPL();
|
gb.noPL();
|
||||||
else
|
} else {
|
||||||
gb.PL(newLikelihoods);
|
gb.PL(newLikelihoods);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the new strand allele counts array from the used alleles
|
// create the new strand allele counts array from the used alleles
|
||||||
|
|
@ -878,7 +862,7 @@ public class GATKVariantContextUtils {
|
||||||
gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs);
|
gb.attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, newSACs);
|
||||||
}
|
}
|
||||||
|
|
||||||
updateGenotypeAfterSubsetting(g.getAlleles(), gb, assignGenotypes, newLikelihoods, allelesToUse);
|
updateGenotypeAfterSubsetting(g.getAlleles(), g.getPloidy(), gb, assignGenotypes, newLikelihoods, allelesToUse);
|
||||||
newGTs.add(gb.make());
|
newGTs.add(gb.make());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -893,12 +877,14 @@ public class GATKVariantContextUtils {
|
||||||
* Add the genotype call (GT) field to GenotypeBuilder using the requested algorithm assignmentMethod
|
* Add the genotype call (GT) field to GenotypeBuilder using the requested algorithm assignmentMethod
|
||||||
*
|
*
|
||||||
* @param originalGT the original genotype calls, cannot be null
|
* @param originalGT the original genotype calls, cannot be null
|
||||||
|
* @param ploidy the number of sets of chromosomes
|
||||||
* @param gb the builder where we should put our newly called alleles, cannot be null
|
* @param gb the builder where we should put our newly called alleles, cannot be null
|
||||||
* @param assignmentMethod the method to use to do the assignment, cannot be null
|
* @param assignmentMethod the method to use to do the assignment, cannot be null
|
||||||
* @param newLikelihoods a vector of likelihoods to use if the method requires PLs, should be log10 likelihoods, cannot be null
|
* @param newLikelihoods a vector of likelihoods to use if the method requires PLs, should be log10 likelihoods, cannot be null
|
||||||
* @param allelesToUse the alleles we are using for our subsetting
|
* @param allelesToUse the alleles we are using for our subsetting
|
||||||
*/
|
*/
|
||||||
public static void updateGenotypeAfterSubsetting(final List<Allele> originalGT,
|
public static void updateGenotypeAfterSubsetting(final List<Allele> originalGT,
|
||||||
|
final int ploidy,
|
||||||
final GenotypeBuilder gb,
|
final GenotypeBuilder gb,
|
||||||
final GenotypeAssignmentMethod assignmentMethod,
|
final GenotypeAssignmentMethod assignmentMethod,
|
||||||
final double[] newLikelihoods,
|
final double[] newLikelihoods,
|
||||||
|
|
@ -911,11 +897,11 @@ public class GATKVariantContextUtils {
|
||||||
case DO_NOT_ASSIGN_GENOTYPES:
|
case DO_NOT_ASSIGN_GENOTYPES:
|
||||||
break;
|
break;
|
||||||
case SET_TO_NO_CALL:
|
case SET_TO_NO_CALL:
|
||||||
gb.alleles(NO_CALL_ALLELES);
|
gb.alleles(noCallAlleles(ploidy));
|
||||||
gb.noGQ();
|
gb.noGQ();
|
||||||
break;
|
break;
|
||||||
case SET_TO_NO_CALL_NO_ANNOTATIONS:
|
case SET_TO_NO_CALL_NO_ANNOTATIONS:
|
||||||
gb.alleles(NO_CALL_ALLELES);
|
gb.alleles(noCallAlleles(ploidy));
|
||||||
gb.noGQ();
|
gb.noGQ();
|
||||||
gb.noAD();
|
gb.noAD();
|
||||||
gb.noPL();
|
gb.noPL();
|
||||||
|
|
@ -924,13 +910,16 @@ public class GATKVariantContextUtils {
|
||||||
case USE_PLS_TO_ASSIGN:
|
case USE_PLS_TO_ASSIGN:
|
||||||
if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) {
|
if ( newLikelihoods == null || likelihoodsAreUninformative(newLikelihoods) ) {
|
||||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||||
gb.alleles(NO_CALL_ALLELES);
|
gb.alleles(noCallAlleles(ploidy));
|
||||||
gb.noGQ();
|
gb.noGQ();
|
||||||
} else {
|
} else {
|
||||||
// find the genotype with maximum likelihoods
|
// find the genotype with maximum likelihoods
|
||||||
final int PLindex = MathUtils.maxElementIndex(newLikelihoods);
|
final int PLindex = MathUtils.maxElementIndex(newLikelihoods);
|
||||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
final List<Allele> alleles = new ArrayList<>();
|
||||||
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
|
for ( final Integer alleleIndex : GenotypeLikelihoods.getAlleles(PLindex, ploidy)) {
|
||||||
|
alleles.add(allelesToUse.get(alleleIndex) );
|
||||||
|
}
|
||||||
|
gb.alleles(alleles);
|
||||||
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
@ -973,7 +962,7 @@ public class GATKVariantContextUtils {
|
||||||
// create the new genotypes
|
// create the new genotypes
|
||||||
for ( final Genotype g : vc.getGenotypes() ) {
|
for ( final Genotype g : vc.getGenotypes() ) {
|
||||||
final int gPloidy = g.getPloidy() == 0 ? ploidy : g.getPloidy();
|
final int gPloidy = g.getPloidy() == 0 ? ploidy : g.getPloidy();
|
||||||
final List<Allele> refAlleles = gPloidy == 2 ? diploidRefAlleles : Collections.nCopies(gPloidy, ref);
|
final List<Allele> refAlleles = Collections.nCopies(gPloidy, vc.getReference());
|
||||||
final GenotypeBuilder gb = new GenotypeBuilder(g.getSampleName(), refAlleles);
|
final GenotypeBuilder gb = new GenotypeBuilder(g.getSampleName(), refAlleles);
|
||||||
if ( g.hasDP() ) gb.DP(g.getDP());
|
if ( g.hasDP() ) gb.DP(g.getDP());
|
||||||
if ( g.hasGQ() ) gb.GQ(g.getGQ());
|
if ( g.hasGQ() ) gb.GQ(g.getGQ());
|
||||||
|
|
@ -990,7 +979,7 @@ public class GATKVariantContextUtils {
|
||||||
* @return genotypes context
|
* @return genotypes context
|
||||||
*/
|
*/
|
||||||
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
|
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
|
||||||
return subsetDiploidAlleles(vc, vc.getAlleles(), GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN);
|
return subsetAlleles(vc, vc.getAlleles(), GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -1079,7 +1068,7 @@ public class GATKVariantContextUtils {
|
||||||
genotypeAssignmentMethodUsed != GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL )
|
genotypeAssignmentMethodUsed != GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL )
|
||||||
addInfoFiledAnnotations(vc, builder, alt, keepOriginalChrCounts);
|
addInfoFiledAnnotations(vc, builder, alt, keepOriginalChrCounts);
|
||||||
|
|
||||||
builder.genotypes(subsetDiploidAlleles(vc, alleles, genotypeAssignmentMethodUsed));
|
builder.genotypes(subsetAlleles(vc, alleles, genotypeAssignmentMethodUsed));
|
||||||
final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true);
|
final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true);
|
||||||
biallelics.add(trimmed);
|
biallelics.add(trimmed);
|
||||||
}
|
}
|
||||||
|
|
@ -1400,7 +1389,7 @@ public class GATKVariantContextUtils {
|
||||||
if ( numNewAlleles == numOriginalAlleles )
|
if ( numNewAlleles == numOriginalAlleles )
|
||||||
return oldGs;
|
return oldGs;
|
||||||
|
|
||||||
return fixDiploidGenotypesFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles());
|
return fixGenotypesFromSubsettedAlleles(oldGs, originalVC, selectedVC.getAlleles());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -1411,14 +1400,14 @@ public class GATKVariantContextUtils {
|
||||||
* @param allelesToUse the new subset of alleles to use
|
* @param allelesToUse the new subset of alleles to use
|
||||||
* @return a new non-null GenotypesContext
|
* @return a new non-null GenotypesContext
|
||||||
*/
|
*/
|
||||||
static private GenotypesContext fixDiploidGenotypesFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
|
static private GenotypesContext fixGenotypesFromSubsettedAlleles(final GenotypesContext originalGs, final VariantContext originalVC, final List<Allele> allelesToUse) {
|
||||||
|
|
||||||
if ( originalGs == null ) throw new IllegalArgumentException("the selected GenotypesContext cannot be null");
|
if ( originalGs == null ) throw new IllegalArgumentException("the selected GenotypesContext cannot be null");
|
||||||
if ( originalVC == null ) throw new IllegalArgumentException("the original VariantContext cannot be null");
|
if ( originalVC == null ) throw new IllegalArgumentException("the original VariantContext cannot be null");
|
||||||
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
|
if ( allelesToUse == null ) throw new IllegalArgumentException("the alleles to use cannot be null");
|
||||||
|
|
||||||
// find the likelihoods indexes to use from the used alternate alleles
|
// find the likelihoods indexes to use from the used alternate alleles
|
||||||
final List<Integer> likelihoodIndexesToUse = determineDiploidLikelihoodIndexesToUse(originalVC, allelesToUse);
|
final List<List<Integer>> likelihoodIndexesToUse = determineLikelihoodIndexesToUse(originalVC, allelesToUse);
|
||||||
|
|
||||||
// find the strand allele count indexes to use from the used alternate alleles
|
// find the strand allele count indexes to use from the used alternate alleles
|
||||||
final List<Integer> sacIndexesToUse = determineSACIndexesToUse(originalVC, allelesToUse);
|
final List<Integer> sacIndexesToUse = determineSACIndexesToUse(originalVC, allelesToUse);
|
||||||
|
|
@ -1441,7 +1430,7 @@ public class GATKVariantContextUtils {
|
||||||
if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use list cannot be null");
|
if (allelesToUse == null) throw new IllegalArgumentException("the alleles to use list cannot be null");
|
||||||
|
|
||||||
// the bitset representing the allele indexes we want to keep
|
// the bitset representing the allele indexes we want to keep
|
||||||
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
|
final BitSet alleleIndexesToUse = getAlleleIndexBitset(originalVC, allelesToUse);
|
||||||
|
|
||||||
// the new genotypes to create
|
// the new genotypes to create
|
||||||
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
|
final GenotypesContext newGTs = GenotypesContext.create(originalGs.size());
|
||||||
|
|
@ -1452,7 +1441,7 @@ public class GATKVariantContextUtils {
|
||||||
// create the new genotypes
|
// create the new genotypes
|
||||||
for ( int k = 0; k < originalGs.size(); k++ ) {
|
for ( int k = 0; k < originalGs.size(); k++ ) {
|
||||||
final Genotype g = originalGs.get(sampleIndices.get(k));
|
final Genotype g = originalGs.get(sampleIndices.get(k));
|
||||||
newGTs.add(fixAD(g, alleleIndexesToUse, allelesToUse.size()));
|
newGTs.add(fixAD(g, alleleIndexesToUse));
|
||||||
}
|
}
|
||||||
|
|
||||||
return newGTs;
|
return newGTs;
|
||||||
|
|
@ -1463,10 +1452,9 @@ public class GATKVariantContextUtils {
|
||||||
*
|
*
|
||||||
* @param genotype the original Genotype
|
* @param genotype the original Genotype
|
||||||
* @param alleleIndexesToUse a bitset describing whether or not to keep a given index
|
* @param alleleIndexesToUse a bitset describing whether or not to keep a given index
|
||||||
* @param nAllelesToUse how many alleles we are keeping
|
|
||||||
* @return a non-null Genotype
|
* @return a non-null Genotype
|
||||||
*/
|
*/
|
||||||
private static Genotype fixAD(final Genotype genotype, final boolean[] alleleIndexesToUse, final int nAllelesToUse) {
|
private static Genotype fixAD(final Genotype genotype, final BitSet alleleIndexesToUse) {
|
||||||
// if it ain't broke don't fix it
|
// if it ain't broke don't fix it
|
||||||
if ( !genotype.hasAD() )
|
if ( !genotype.hasAD() )
|
||||||
return genotype;
|
return genotype;
|
||||||
|
|
@ -1474,18 +1462,14 @@ public class GATKVariantContextUtils {
|
||||||
final GenotypeBuilder builder = new GenotypeBuilder(genotype);
|
final GenotypeBuilder builder = new GenotypeBuilder(genotype);
|
||||||
|
|
||||||
final int[] oldAD = genotype.getAD();
|
final int[] oldAD = genotype.getAD();
|
||||||
if ( oldAD.length != alleleIndexesToUse.length ) {
|
final int[] newAD = new int[alleleIndexesToUse.cardinality()];
|
||||||
builder.noAD();
|
|
||||||
} else {
|
int currentIndex = 0;
|
||||||
final int[] newAD = new int[nAllelesToUse];
|
for ( int i = alleleIndexesToUse.nextSetBit(0); i >= 0; i = alleleIndexesToUse.nextSetBit(i+1) ) {
|
||||||
int currentIndex = 0;
|
newAD[currentIndex++] = oldAD[i];
|
||||||
for ( int i = 0; i < oldAD.length; i++ ) {
|
|
||||||
if ( alleleIndexesToUse[i] )
|
|
||||||
newAD[currentIndex++] = oldAD[i];
|
|
||||||
}
|
|
||||||
builder.AD(newAD);
|
|
||||||
}
|
}
|
||||||
return builder.make();
|
|
||||||
|
return builder.AD(newAD).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Allele determineReferenceAllele(final List<VariantContext> VCs) {
|
private static Allele determineReferenceAllele(final List<VariantContext> VCs) {
|
||||||
|
|
@ -2247,11 +2231,11 @@ public class GATKVariantContextUtils {
|
||||||
final Object[] splitOriginalField = getVAttributeValues(vc.getAttribute(infoFieldName));
|
final Object[] splitOriginalField = getVAttributeValues(vc.getAttribute(infoFieldName));
|
||||||
|
|
||||||
// subset the field
|
// subset the field
|
||||||
final boolean[] alleleIndexesToUse = getAlleleIndexBitset(vc, Arrays.asList(altAllele));
|
final BitSet alleleIndexesToUse = getAlleleIndexBitset(vc, Arrays.asList(altAllele));
|
||||||
|
|
||||||
// skip the first allele, which is the reference
|
// skip the first allele, which is the reference
|
||||||
for (int i = 1; i < alleleIndexesToUse.length; i++) {
|
for (int i = 1; i < alleleIndexesToUse.size(); i++) {
|
||||||
if (alleleIndexesToUse[i] == true)
|
if (alleleIndexesToUse.get(i))
|
||||||
return splitOriginalField[i-1];
|
return splitOriginalField[i-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -554,8 +554,8 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
// final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2));
|
// final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2));
|
||||||
//
|
//
|
||||||
// final VariantContext merged = VariantContextUtils.simpleMerge(
|
// final VariantContext merged = VariantContextUtils.simpleMerge(
|
||||||
// Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
// Arrays.asList(vc1, vc2), null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
// VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false, false);
|
// GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
|
|
@ -835,7 +835,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test(enabled = !DEBUG)
|
@Test(enabled = !DEBUG)
|
||||||
public void testRepeatAllele() {
|
public void testRepeatAllele() {
|
||||||
Allele nullR = Allele.create("A", true);
|
Allele nullR = Aref;
|
||||||
Allele nullA = Allele.create("A", false);
|
Allele nullA = Allele.create("A", false);
|
||||||
Allele atc = Allele.create("AATC", false);
|
Allele atc = Allele.create("AATC", false);
|
||||||
Allele atcatc = Allele.create("AATCATC", false);
|
Allele atcatc = Allele.create("AATCATC", false);
|
||||||
|
|
@ -1126,24 +1126,25 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
@DataProvider(name = "subsetDiploidAllelesData")
|
@DataProvider(name = "SubsetAllelesData")
|
||||||
public Object[][] makesubsetDiploidAllelesData() {
|
public Object[][] makesubsetAllelesData() {
|
||||||
List<Object[]> tests = new ArrayList<>();
|
List<Object[]> tests = new ArrayList<>();
|
||||||
|
|
||||||
final Allele A = Allele.create("A", true);
|
final List<Allele> AA = Arrays.asList(Aref,Aref);
|
||||||
final Allele C = Allele.create("C");
|
final List<Allele> AC = Arrays.asList(Aref,C);
|
||||||
final Allele G = Allele.create("G");
|
|
||||||
|
|
||||||
final List<Allele> AA = Arrays.asList(A,A);
|
|
||||||
final List<Allele> AC = Arrays.asList(A,C);
|
|
||||||
final List<Allele> CC = Arrays.asList(C,C);
|
final List<Allele> CC = Arrays.asList(C,C);
|
||||||
final List<Allele> AG = Arrays.asList(A,G);
|
final List<Allele> AG = Arrays.asList(Aref,G);
|
||||||
final List<Allele> CG = Arrays.asList(C,G);
|
|
||||||
final List<Allele> GG = Arrays.asList(G,G);
|
final List<Allele> GG = Arrays.asList(G,G);
|
||||||
final List<Allele> ACG = Arrays.asList(A,C,G);
|
final List<Allele> ACG = Arrays.asList(Aref,C,G);
|
||||||
|
|
||||||
final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
|
final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
|
||||||
|
|
||||||
|
// haploid, one alt allele
|
||||||
|
final double[] haploidRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.1});
|
||||||
|
final double[] haploidAltPL = MathUtils.normalizeFromRealSpace(new double[]{0.1, 0.9});
|
||||||
|
final double[] haploidUninformative = new double[]{0, 0};
|
||||||
|
|
||||||
|
// diploid, one alt allele
|
||||||
final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01});
|
final double[] homRefPL = MathUtils.normalizeFromRealSpace(new double[]{0.9, 0.09, 0.01});
|
||||||
final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01});
|
final double[] hetPL = MathUtils.normalizeFromRealSpace(new double[]{0.09, 0.9, 0.01});
|
||||||
final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9});
|
final double[] homVarPL = MathUtils.normalizeFromRealSpace(new double[]{0.01, 0.09, 0.9});
|
||||||
|
|
@ -1151,37 +1152,47 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(50).make();
|
final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(50).make();
|
||||||
|
|
||||||
// make sure we don't screw up the simple case
|
// the simple case where no selection occurs
|
||||||
final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
|
final Genotype aHaploidGT = new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).AD(new int[]{10,2}).PL(haploidRefPL).GQ(8).make();
|
||||||
final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10, 2}).PL(hetPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
|
final Genotype cHaploidGT = new GenotypeBuilder(base).alleles(Arrays.asList(C)).AD(new int[]{10,2}).PL(haploidAltPL).GQ(8).make();
|
||||||
final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10, 2}).PL(homVarPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
|
final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).GQ(8).make();
|
||||||
|
final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10,2}).PL(hetPL).GQ(8).make();
|
||||||
|
final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10,2}).PL(homVarPL).GQ(8).make();
|
||||||
|
|
||||||
|
// haploid
|
||||||
|
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aHaploidGT).make(), AC, Arrays.asList(new GenotypeBuilder(aHaploidGT).make())});
|
||||||
|
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(cHaploidGT).make(), AC, Arrays.asList(new GenotypeBuilder(cHaploidGT).make())});
|
||||||
|
// diploid
|
||||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), AC, Arrays.asList(new GenotypeBuilder(aaGT).make())});
|
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(aaGT).make(), AC, Arrays.asList(new GenotypeBuilder(aaGT).make())});
|
||||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), AC, Arrays.asList(new GenotypeBuilder(acGT).make())});
|
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(acGT).make(), AC, Arrays.asList(new GenotypeBuilder(acGT).make())});
|
||||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), AC, Arrays.asList(new GenotypeBuilder(ccGT).make())});
|
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(ccGT).make(), AC, Arrays.asList(new GenotypeBuilder(ccGT).make())});
|
||||||
|
|
||||||
// uninformative test case
|
// uninformative test cases
|
||||||
|
// diploid
|
||||||
final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).PL(uninformative).GQ(0).make();
|
final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).PL(uninformative).GQ(0).make();
|
||||||
final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noPL().noGQ().make();
|
final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.noCallAlleles(2)).noPL().noGQ().make();
|
||||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), AC, Arrays.asList(emptyGT)});
|
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), AC, Arrays.asList(emptyGT)});
|
||||||
|
// haploid
|
||||||
|
final Genotype haploidUninformativeGT = new GenotypeBuilder(base).alleles(Arrays.asList(C)).PL(haploidUninformative).GQ(0).make();
|
||||||
|
final Genotype haplpoidEmptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.noCallAlleles(1)).noPL().noGQ().make();
|
||||||
|
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(haploidUninformativeGT).make(), AC, Arrays.asList(haplpoidEmptyGT)});
|
||||||
|
|
||||||
// actually subsetting down from multiple alt values
|
// subsetting from 3 to 2 alleles
|
||||||
|
// diploid PL order is: AA, AC, CC, AG, CG, GG (00, 01, 11, 02, 12, 22)
|
||||||
final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50};
|
final double[] homRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50};
|
||||||
final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50};
|
final double[] hetRefC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50};
|
||||||
final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50};
|
final double[] homC3AllelesPL = new double[]{-20, -10, 0, -30, -40, -50};
|
||||||
final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50};
|
final double[] hetRefG3AllelesPL = new double[]{-20, -10, -30, 0, -40, -50};
|
||||||
final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG
|
final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50};
|
||||||
final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG
|
final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0};
|
||||||
tests.add(new Object[]{
|
tests.add(new Object[]{
|
||||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(homRef3AllelesPL).make()).make(),
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(homRef3AllelesPL).make()).make(),
|
||||||
AC,
|
AC,
|
||||||
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).GQ(100).make())});
|
Arrays.asList(new GenotypeBuilder(base).alleles(AA).PL(new double[]{0, -10, -20}).GQ(100).make())});
|
||||||
|
|
||||||
tests.add(new Object[]{
|
tests.add(new Object[]{
|
||||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetRefC3AllelesPL).make()).make(),
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetRefC3AllelesPL).make()).make(),
|
||||||
AC,
|
AC,
|
||||||
Arrays.asList(new GenotypeBuilder(base).alleles(AC).PL(new double[]{-10, 0, -20}).GQ(100).make())});
|
Arrays.asList(new GenotypeBuilder(base).alleles(AC).PL(new double[]{-10, 0, -20}).GQ(100).make())});
|
||||||
|
|
||||||
tests.add(new Object[]{
|
tests.add(new Object[]{
|
||||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(homC3AllelesPL).make()).make(),
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(homC3AllelesPL).make()).make(),
|
||||||
AC,
|
AC,
|
||||||
|
|
@ -1190,7 +1201,6 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetRefG3AllelesPL).make()).make(),
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetRefG3AllelesPL).make()).make(),
|
||||||
AG,
|
AG,
|
||||||
Arrays.asList(new GenotypeBuilder(base).alleles(AG).PL(new double[]{-20, 0, -50}).GQ(200).make())});
|
Arrays.asList(new GenotypeBuilder(base).alleles(AG).PL(new double[]{-20, 0, -50}).GQ(200).make())});
|
||||||
|
|
||||||
// wow, scary -- bad output but discussed with Eric and we think this is the only thing that can be done
|
// wow, scary -- bad output but discussed with Eric and we think this is the only thing that can be done
|
||||||
tests.add(new Object[]{
|
tests.add(new Object[]{
|
||||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetCG3AllelesPL).make()).make(),
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).PL(hetCG3AllelesPL).make()).make(),
|
||||||
|
|
@ -1202,14 +1212,36 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
AG,
|
AG,
|
||||||
Arrays.asList(new GenotypeBuilder(base).alleles(GG).PL(new double[]{-20, -40, 0}).GQ(200).make())});
|
Arrays.asList(new GenotypeBuilder(base).alleles(GG).PL(new double[]{-20, -40, 0}).GQ(200).make())});
|
||||||
|
|
||||||
|
// haploid PL order is: A, C, G (0, 1, 2)
|
||||||
|
final double[] haploidRef3AllelesPL = new double[]{0, -10, -20};
|
||||||
|
final double[] haploidAltC3AllelesPL = new double[]{-10, 0, -20};
|
||||||
|
final double[] haploidAltG3AllelesPL = new double[]{-20, -10, 0};
|
||||||
|
tests.add(new Object[]{
|
||||||
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(haploidRef3AllelesPL).make()).make(),
|
||||||
|
AC,
|
||||||
|
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{0, -10}).GQ(100).make())});
|
||||||
|
tests.add(new Object[]{
|
||||||
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(C)).PL(haploidAltC3AllelesPL).make()).make(),
|
||||||
|
AC,
|
||||||
|
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(C)).PL(new double[]{-10, 0}).GQ(100).make())});
|
||||||
|
tests.add(new Object[]{
|
||||||
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(G)).PL(haploidAltG3AllelesPL).make()).make(),
|
||||||
|
AG,
|
||||||
|
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(G)).PL(new double[]{-20, 0}).GQ(200).make())});
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "subsetDiploidAllelesData")
|
@Test(dataProvider = "SubsetAllelesData")
|
||||||
public void testsubsetDiploidAllelesData(final VariantContext inputVC,
|
public void testSubsetAlleles(final VariantContext inputVC,
|
||||||
final List<Allele> allelesToUse,
|
final List<Allele> allelesToUse,
|
||||||
final List<Genotype> expectedGenotypes) {
|
final List<Genotype> expectedGenotypes) {
|
||||||
final GenotypesContext actual = GATKVariantContextUtils.subsetDiploidAlleles(inputVC, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN);
|
// initialize cache of allele anyploid indices
|
||||||
|
for (final Genotype genotype : inputVC.getGenotypes()) {
|
||||||
|
GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(inputVC.getNAlleles() - 1, genotype.getPloidy());
|
||||||
|
}
|
||||||
|
|
||||||
|
final GenotypesContext actual = GATKVariantContextUtils.subsetAlleles(inputVC, allelesToUse, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN);
|
||||||
|
|
||||||
Assert.assertEquals(actual.size(), expectedGenotypes.size());
|
Assert.assertEquals(actual.size(), expectedGenotypes.size());
|
||||||
for ( final Genotype expected : expectedGenotypes ) {
|
for ( final Genotype expected : expectedGenotypes ) {
|
||||||
|
|
@ -1221,71 +1253,134 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
@DataProvider(name = "UpdateGenotypeAfterSubsettingData")
|
@DataProvider(name = "UpdateGenotypeAfterSubsettingData")
|
||||||
public Object[][] makeUpdateGenotypeAfterSubsettingData() {
|
public Object[][] makeUpdateGenotypeAfterSubsettingData() {
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
final List<Object[]> tests = new ArrayList<>();
|
||||||
|
|
||||||
final Allele A = Allele.create("A", true);
|
final List<Allele> AA = Arrays.asList(Aref,Aref);
|
||||||
final Allele C = Allele.create("C");
|
final List<Allele> AC = Arrays.asList(Aref,C);
|
||||||
final Allele G = Allele.create("G");
|
|
||||||
|
|
||||||
final List<Allele> AA = Arrays.asList(A,A);
|
|
||||||
final List<Allele> AC = Arrays.asList(A,C);
|
|
||||||
final List<Allele> CC = Arrays.asList(C,C);
|
final List<Allele> CC = Arrays.asList(C,C);
|
||||||
final List<Allele> AG = Arrays.asList(A,G);
|
final List<Allele> AG = Arrays.asList(Aref,G);
|
||||||
final List<Allele> CG = Arrays.asList(C,G);
|
final List<Allele> CG = Arrays.asList(C,G);
|
||||||
final List<Allele> GG = Arrays.asList(G,G);
|
final List<Allele> GG = Arrays.asList(G,G);
|
||||||
final List<Allele> ACG = Arrays.asList(A,C,G);
|
final List<Allele> AAA = Arrays.asList(Aref,Aref,Aref);
|
||||||
final List<List<Allele>> allSubsetAlleles = Arrays.asList(AC,AG,ACG);
|
final List<Allele> AAC = Arrays.asList(Aref,Aref,C);
|
||||||
|
final List<Allele> ACC = Arrays.asList(Aref,C,C);
|
||||||
|
final List<Allele> CCC = Arrays.asList(C,C,C);
|
||||||
|
final List<Allele> AAG = Arrays.asList(Aref,Aref,G);
|
||||||
|
final List<Allele> ACG = Arrays.asList(Aref,C,G);
|
||||||
|
final List<Allele> CCG = Arrays.asList(C,C,G);
|
||||||
|
final List<Allele> AGG = Arrays.asList(Aref,G,G);
|
||||||
|
final List<Allele> CGG = Arrays.asList(C,G,G);
|
||||||
|
final List<Allele> GGG = Arrays.asList(G,G,G);
|
||||||
|
final List<List<Allele>> allDiploidSubsetAlleles = Arrays.asList(AC,AG,ACG);
|
||||||
|
final List<List<Allele>> allTriploidSubsetAlleles = Arrays.asList(AAA,AAC,ACC,CCC,AAG,ACG,CCG,AGG,CGG,GGG);
|
||||||
|
|
||||||
|
// for P=1, the index of the genotype a is a
|
||||||
|
final double[] aRefPL = new double[]{0.9, 0.09, 0.01};
|
||||||
|
final double[] cPL = new double[]{0.09, 0.9, 0.01};
|
||||||
|
final double[] gPL = new double[]{0.01, 0.09, 0.9};
|
||||||
|
final List<double[]> allHaploidPLs = Arrays.asList(aRefPL, cPL, gPL);
|
||||||
|
final List<List<Allele>> allHaploidSubsetAlleles = Arrays.asList(Arrays.asList(Aref), Arrays.asList(G));
|
||||||
|
|
||||||
|
// for P=2 and N=1, the ordering is 00,01,11
|
||||||
final double[] homRefPL = new double[]{0.9, 0.09, 0.01};
|
final double[] homRefPL = new double[]{0.9, 0.09, 0.01};
|
||||||
final double[] hetPL = new double[]{0.09, 0.9, 0.01};
|
final double[] hetPL = new double[]{0.09, 0.9, 0.01};
|
||||||
final double[] homVarPL = new double[]{0.01, 0.09, 0.9};
|
final double[] homVarPL = new double[]{0.01, 0.09, 0.9};
|
||||||
final double[] uninformative = new double[]{0.33, 0.33, 0.33};
|
final double[] uninformative = new double[]{0.33, 0.33, 0.33};
|
||||||
final List<double[]> allPLs = Arrays.asList(homRefPL, hetPL, homVarPL, uninformative);
|
final List<double[]> allDiploidPLs = Arrays.asList(homRefPL, hetPL, homVarPL, uninformative);
|
||||||
|
|
||||||
for ( final List<Allele> alleles : allSubsetAlleles ) {
|
// for P=3 and N=2, the ordering is 000, 001, 011, 111, 002, 012, 112, 022, 122, 222
|
||||||
for ( final double[] pls : allPLs ) {
|
final double[] aaaPL = new double[]{0.9, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL, pls, AA, alleles, GATKVariantContextUtils.NO_CALL_ALLELES});
|
final double[] aacPL = new double[]{0.01, 0.9, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
|
||||||
}
|
final double[] accPL = new double[]{0.01, 0.02, 0.9, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
|
||||||
|
final double[] cccPL = new double[]{0.01, 0.02, 0.03, 0.9, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09};
|
||||||
|
final double[] aagPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.9, 0.05, 0.06, 0.07, 0.08, 0.09};
|
||||||
|
final double[] acgPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.9, 0.06, 0.07, 0.08, 0.09};
|
||||||
|
final double[] ccgPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.9, 0.07, 0.08, 0.09};
|
||||||
|
final double[] aggPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.9, 0.08, 0.09};
|
||||||
|
final double[] cggPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.9, 0.09};
|
||||||
|
final double[] gggPL = new double[]{0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.9};
|
||||||
|
final double[] uninformativeTriploid = new double[]{0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
|
||||||
|
final List<double[]> allTriploidPLs = Arrays.asList(homRefPL, hetPL, homVarPL, uninformativeTriploid);
|
||||||
|
|
||||||
|
|
||||||
|
for ( final List<Allele> alleles : allHaploidSubsetAlleles ) {
|
||||||
|
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL, allHaploidPLs.get(0), Arrays.asList(Aref), alleles, GATKVariantContextUtils.noCallAlleles(1)});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for ( final List<Allele> alleles : allDiploidSubsetAlleles ) {
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL, allDiploidPLs.get(0), AA, alleles, GATKVariantContextUtils.noCallAlleles(2)});
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( final List<Allele> alleles : allTriploidSubsetAlleles ) {
|
||||||
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.SET_TO_NO_CALL, allTriploidPLs.get(0), AAA, alleles, GATKVariantContextUtils.noCallAlleles(3)});
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<Allele> originalHaploidGT = Arrays.asList(Aref, C, G );
|
||||||
|
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aRefPL, originalHaploidGT, originalHaploidGT, Arrays.asList(Aref)});
|
||||||
|
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, cPL, originalHaploidGT, originalHaploidGT, Arrays.asList(C)});
|
||||||
|
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, gPL, originalHaploidGT, originalHaploidGT, Arrays.asList(G)});
|
||||||
|
|
||||||
for ( final List<Allele> originalGT : Arrays.asList(AA, AC, CC, AG, CG, GG) ) {
|
for ( final List<Allele> originalGT : Arrays.asList(AA, AC, CC, AG, CG, GG) ) {
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homRefPL, originalGT, AC, AA});
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homRefPL, originalGT, AC, AA});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, hetPL, originalGT, AC, AC});
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, hetPL, originalGT, AC, AC});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homVarPL, originalGT, AC, CC});
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, homVarPL, originalGT, AC, CC});
|
||||||
// tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, uninformative, AA, AC, GATKVariantContextUtils.NO_CALL_ALLELES});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( final double[] pls : allPLs ) {
|
for ( final List<Allele> originalGT : allTriploidSubsetAlleles) {
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AA, AC, AA});
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aaaPL, originalGT, ACG, AAA});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AC, AC, AC});
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aacPL, originalGT, ACG, AAC});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CC, AC, CC});
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, accPL, originalGT, ACG, ACC});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CG, AC, AC});
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, cccPL, originalGT, ACG, CCC});
|
||||||
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aagPL, originalGT, ACG, AAG});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AA, AG, AA});
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, acgPL, originalGT, ACG, ACG});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AC, AG, AA});
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, ccgPL, originalGT, ACG, CCG});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CC, AG, AA});
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, aggPL, originalGT, ACG, AGG});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CG, AG, AG});
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, cggPL, originalGT, ACG, CCG});
|
||||||
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, gggPL, originalGT, ACG, GGG});
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AA, ACG, AA});
|
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AC, ACG, AC});
|
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CC, ACG, CC});
|
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, AG, ACG, AG});
|
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, CG, ACG, CG});
|
|
||||||
tests.add(new Object[]{GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, pls, GG, ACG, GG});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allHaploidPLs.get(0), Arrays.asList(Aref, C, G), Arrays.asList(Aref), Arrays.asList(Aref)});
|
||||||
|
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allHaploidPLs.get(0), Arrays.asList(Aref, C, G), Arrays.asList(C), Arrays.asList(C)});
|
||||||
|
tests.add(new Object[]{1, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allHaploidPLs.get(0), Arrays.asList(Aref, C, G), Arrays.asList(G), Arrays.asList(G)});
|
||||||
|
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AA, AC, AA});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AC, AC, AC});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CC, AC, CC});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CG, AC, AC});
|
||||||
|
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AA, AG, AA});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AC, AG, AA});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CC, AG, AA});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CG, AG, AG});
|
||||||
|
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AA, ACG, AA});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AC, ACG, AC});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CC, ACG, CC});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), AG, ACG, AG});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), CG, ACG, CG});
|
||||||
|
tests.add(new Object[]{2, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allDiploidPLs.get(0), GG, ACG, GG});
|
||||||
|
|
||||||
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), AAA, AAC, AAA});
|
||||||
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), ACC, AAC, ACC});
|
||||||
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), AAC, AAC, AAC});
|
||||||
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), AAC, ACG, AAC});
|
||||||
|
tests.add(new Object[]{3, GATKVariantContextUtils.GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, allTriploidPLs.get(0), GGG, AAA, AAA});
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = !DEBUG, dataProvider = "UpdateGenotypeAfterSubsettingData")
|
@Test(enabled = !DEBUG, dataProvider = "UpdateGenotypeAfterSubsettingData")
|
||||||
public void testUpdateGenotypeAfterSubsetting(final GATKVariantContextUtils.GenotypeAssignmentMethod mode,
|
public void testUpdateGenotypeAfterSubsetting(final int ploidy,
|
||||||
|
final GATKVariantContextUtils.GenotypeAssignmentMethod mode,
|
||||||
final double[] likelihoods,
|
final double[] likelihoods,
|
||||||
final List<Allele> originalGT,
|
final List<Allele> originalGT,
|
||||||
final List<Allele> allelesToUse,
|
final List<Allele> allelesToUse,
|
||||||
final List<Allele> expectedAlleles) {
|
final List<Allele> expectedAlleles) {
|
||||||
|
final int numAltAlleles = originalGT.size() > 1 ? originalGT.size() - 1 : 1;
|
||||||
|
GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(numAltAlleles, ploidy);
|
||||||
final GenotypeBuilder gb = new GenotypeBuilder("test");
|
final GenotypeBuilder gb = new GenotypeBuilder("test");
|
||||||
final double[] log10Likelhoods = MathUtils.normalizeFromLog10(likelihoods, true, false);
|
final double[] log10Likelhoods = MathUtils.normalizeFromLog10(likelihoods, true, false);
|
||||||
GATKVariantContextUtils.updateGenotypeAfterSubsetting(originalGT, gb, mode, log10Likelhoods, allelesToUse);
|
GATKVariantContextUtils.updateGenotypeAfterSubsetting(originalGT, ploidy, gb, mode, log10Likelhoods, allelesToUse);
|
||||||
final Genotype g = gb.make();
|
final Genotype g = gb.make();
|
||||||
Assert.assertEquals(new HashSet<>(g.getAlleles()), new HashSet<>(expectedAlleles));
|
Assert.assertEquals(new HashSet<>(g.getAlleles()), new HashSet<>(expectedAlleles));
|
||||||
}
|
}
|
||||||
|
|
@ -1331,17 +1426,11 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
public Object[][] makeUpdatePLsSACsAndADData() {
|
public Object[][] makeUpdatePLsSACsAndADData() {
|
||||||
List<Object[]> tests = new ArrayList<>();
|
List<Object[]> tests = new ArrayList<>();
|
||||||
|
|
||||||
final Allele A = Allele.create("A", true);
|
final List<Allele> AA = Arrays.asList(Aref,Aref);
|
||||||
final Allele C = Allele.create("C");
|
final List<Allele> AC = Arrays.asList(Aref,C);
|
||||||
final Allele G = Allele.create("G");
|
|
||||||
|
|
||||||
final List<Allele> AA = Arrays.asList(A,A);
|
|
||||||
final List<Allele> AC = Arrays.asList(A,C);
|
|
||||||
final List<Allele> CC = Arrays.asList(C,C);
|
final List<Allele> CC = Arrays.asList(C,C);
|
||||||
final List<Allele> AG = Arrays.asList(A,G);
|
final List<Allele> AG = Arrays.asList(Aref,G);
|
||||||
final List<Allele> CG = Arrays.asList(C,G);
|
final List<Allele> ACG = Arrays.asList(Aref,C,G);
|
||||||
final List<Allele> GG = Arrays.asList(G,G);
|
|
||||||
final List<Allele> ACG = Arrays.asList(A,C,G);
|
|
||||||
|
|
||||||
final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
|
final VariantContext vcBase = new VariantContextBuilder("test", "20", 10, 10, AC).make();
|
||||||
|
|
||||||
|
|
@ -1352,7 +1441,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(100).make();
|
final Genotype base = new GenotypeBuilder("NA12878").DP(10).GQ(100).make();
|
||||||
|
|
||||||
// make sure we don't screw up the simple case where no selection happens
|
// make sure we don't screw up the simple case where no selection occurs
|
||||||
final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
|
final Genotype aaGT = new GenotypeBuilder(base).alleles(AA).AD(new int[]{10,2}).PL(homRefPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
|
||||||
final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10, 2}).PL(hetPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
|
final Genotype acGT = new GenotypeBuilder(base).alleles(AC).AD(new int[]{10, 2}).PL(hetPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
|
||||||
final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10, 2}).PL(homVarPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
|
final Genotype ccGT = new GenotypeBuilder(base).alleles(CC).AD(new int[]{10, 2}).PL(homVarPL).attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).GQ(8).make();
|
||||||
|
|
@ -1364,7 +1453,7 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
// uninformative test cases
|
// uninformative test cases
|
||||||
final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make();
|
final Genotype uninformativeGT = new GenotypeBuilder(base).alleles(CC).noAD().PL(uninformative).GQ(0).make();
|
||||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(uninformativeGT)});
|
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(uninformativeGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(uninformativeGT)});
|
||||||
final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.NO_CALL_ALLELES).noAD().noPL().noGQ().make();
|
final Genotype emptyGT = new GenotypeBuilder(base).alleles(GATKVariantContextUtils.noCallAlleles(2)).noAD().noPL().noGQ().make();
|
||||||
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(emptyGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(emptyGT)});
|
tests.add(new Object[]{new VariantContextBuilder(vcBase).genotypes(emptyGT).make(), new VariantContextBuilder(vcBase).alleles(AC).make(), Arrays.asList(emptyGT)});
|
||||||
|
|
||||||
// actually subsetting down from multiple alt values
|
// actually subsetting down from multiple alt values
|
||||||
|
|
@ -1375,6 +1464,14 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG
|
final double[] hetCG3AllelesPL = new double[]{-20, -10, -30, -40, 0, -50}; // AA, AC, CC, AG, CG, GG
|
||||||
final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG
|
final double[] homG3AllelesPL = new double[]{-20, -10, -30, -40, -50, 0}; // AA, AC, CC, AG, CG, GG
|
||||||
|
|
||||||
|
final double[] haploidRef3AllelesPL = new double[]{0, -10, -20};
|
||||||
|
final double[] haploidAltC3AllelesPL = new double[]{-10, 0, -20};
|
||||||
|
final double[] haploidAltG3AllelesPL = new double[]{-20, -10, 0};
|
||||||
|
|
||||||
|
// for P=3 and N=2, the ordering is 000, 001, 011, 111, 002, 012, 112, 022, 122, 222
|
||||||
|
final double[] triploidRef3AllelesPL = new double[]{0, -10, -20, -30, -40, -50, -60, -70, -80, -90};
|
||||||
|
final double[] tripoidAltC3AllelesPL = new double[]{-10, 0, -20, -30, -40, -50, -60, -70, -80, -90};
|
||||||
|
|
||||||
final int[] homRef3AllelesAD = new int[]{20, 0, 1};
|
final int[] homRef3AllelesAD = new int[]{20, 0, 1};
|
||||||
final int[] hetRefC3AllelesAD = new int[]{10, 10, 1};
|
final int[] hetRefC3AllelesAD = new int[]{10, 10, 1};
|
||||||
final int[] homC3AllelesAD = new int[]{0, 20, 1};
|
final int[] homC3AllelesAD = new int[]{0, 20, 1};
|
||||||
|
|
@ -1389,6 +1486,44 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
final int[] hetCG3AllelesSAC = new int[]{0, 0, 12, 12, 11, 11}; // AA, AC, CC, AG, CG, GG
|
final int[] hetCG3AllelesSAC = new int[]{0, 0, 12, 12, 11, 11}; // AA, AC, CC, AG, CG, GG
|
||||||
final int[] homG3AllelesSAC = new int[]{0, 0, 1, 1, 21, 21}; // AA, AC, CC, AG, CG, GG
|
final int[] homG3AllelesSAC = new int[]{0, 0, 1, 1, 21, 21}; // AA, AC, CC, AG, CG, GG
|
||||||
|
|
||||||
|
tests.add(new Object[]{
|
||||||
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).AD(homRef3AllelesAD).PL(haploidRef3AllelesPL).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homRef3AllelesSAC).make()).make(),
|
||||||
|
new VariantContextBuilder(vcBase).alleles(AC).make(),
|
||||||
|
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{0, -10}).AD(new int[]{20, 0}).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{20, 19, 0, 1}).GQ(100).make())});
|
||||||
|
tests.add(new Object[]{
|
||||||
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).AD(hetRefC3AllelesAD).PL(haploidAltC3AllelesPL).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, hetRefC3AllelesSAC).make()).make(),
|
||||||
|
new VariantContextBuilder(vcBase).alleles(AC).make(),
|
||||||
|
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{-10, 0}).AD(new int[]{10, 10}).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{10, 9, 10, 9}).GQ(100).make())});
|
||||||
|
tests.add(new Object[]{
|
||||||
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).AD(homC3AllelesAD).PL(haploidAltG3AllelesPL).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homC3AllelesSAC).make()).make(),
|
||||||
|
new VariantContextBuilder(vcBase).alleles(AG).make(),
|
||||||
|
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref)).PL(new double[]{-20, 0}).AD(new int[]{0, 1}).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 1, 1}).GQ(100).make())});
|
||||||
|
|
||||||
|
tests.add(new Object[]{
|
||||||
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, Aref)).AD(homRef3AllelesAD).PL(triploidRef3AllelesPL).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homRef3AllelesSAC).make()).make(),
|
||||||
|
new VariantContextBuilder(vcBase).alleles(AC).make(),
|
||||||
|
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, Aref)).PL(new double[]{0, -10, -20, -30}).AD(new int[]{20, 0}).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{20, 19, 0, 1}).GQ(100).make())});
|
||||||
|
tests.add(new Object[]{
|
||||||
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, C)).AD(hetRefC3AllelesAD).PL(tripoidAltC3AllelesPL).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, hetRefC3AllelesSAC).make()).make(),
|
||||||
|
new VariantContextBuilder(vcBase).alleles(AC).make(),
|
||||||
|
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, C)).PL(new double[]{-10, 0, -20, -30}).AD(new int[]{10, 10}).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{10, 9, 10, 9}).GQ(100).make())});
|
||||||
|
tests.add(new Object[]{
|
||||||
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, G)).AD(homRef3AllelesAD).PL(triploidRef3AllelesPL).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homC3AllelesSAC).make()).make(),
|
||||||
|
new VariantContextBuilder(vcBase).alleles(AG).make(),
|
||||||
|
Arrays.asList(new GenotypeBuilder(base).alleles(Arrays.asList(Aref, Aref, G)).PL(new double[]{0, -40, -70, -90}).AD(new int[]{20, 1}).
|
||||||
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{0, 0, 1, 1}).GQ(100).make())});
|
||||||
|
|
||||||
tests.add(new Object[]{
|
tests.add(new Object[]{
|
||||||
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL).
|
new VariantContextBuilder(vcBase).alleles(ACG).genotypes(new GenotypeBuilder(base).alleles(AA).AD(homRef3AllelesAD).PL(homRef3AllelesPL).
|
||||||
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homRef3AllelesSAC).make()).make(),
|
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, homRef3AllelesSAC).make()).make(),
|
||||||
|
|
@ -1433,6 +1568,11 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||||
public void testUpdatePLsAndADData(final VariantContext originalVC,
|
public void testUpdatePLsAndADData(final VariantContext originalVC,
|
||||||
final VariantContext selectedVC,
|
final VariantContext selectedVC,
|
||||||
final List<Genotype> expectedGenotypes) {
|
final List<Genotype> expectedGenotypes) {
|
||||||
|
// initialize cache of allele anyploid indices
|
||||||
|
for (final Genotype genotype : originalVC.getGenotypes()) {
|
||||||
|
GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(originalVC.getNAlleles() - 1, genotype.getPloidy());
|
||||||
|
}
|
||||||
|
|
||||||
final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make();
|
final VariantContext selectedVCwithGTs = new VariantContextBuilder(selectedVC).genotypes(originalVC.getGenotypes()).make();
|
||||||
final GenotypesContext actual = GATKVariantContextUtils.updatePLsSACsAD(selectedVCwithGTs, originalVC);
|
final GenotypesContext actual = GATKVariantContextUtils.updatePLsSACsAD(selectedVCwithGTs, originalVC);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue