diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java index fcb3dca0a..2780c3806 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/ReferenceConfidenceVariantContextMerger.java @@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.variantutils; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFConstants; +import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculator; import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.MathUtils; @@ -431,13 +432,72 @@ public class ReferenceConfidenceVariantContextMerger { // create the index mapping, using the allele whenever such a mapping doesn't exist for ( int i = 1; i < targetAlleles.size(); i++ ) { - final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAlleles.get(i)); + final Allele targetAllele = targetAlleles.get(i); + + // if there’s more than 1 DEL allele then we need to use the best one + if ( targetAllele == Allele.SPAN_DEL && g.hasPL() ) { + final int occurrences = Collections.frequency(remappedAlleles, Allele.SPAN_DEL); + if ( occurrences > 1 ) { + final int indexOfBestDel = indexOfBestDel(remappedAlleles, g.getPL(), g.getPloidy()); + indexMapping[i] = ( indexOfBestDel == -1 ? indexOfNonRef : indexOfBestDel ); + continue; + } + } + + final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAllele); indexMapping[i] = indexOfRemappedAllele == -1 ? indexOfNonRef : indexOfRemappedAllele; } return indexMapping; } + /** + * Returns the index of the best spanning deletion allele based on AD counts + * + * @param alleles the list of alleles + * @param PLs the list of corresponding PL values + * @param ploidy the ploidy of the sample + * @return the best index or -1 if not found + */ + private static int indexOfBestDel(final List alleles, final int[] PLs, final int ploidy) { + int bestIndex = -1; + int bestPL = Integer.MAX_VALUE; + + for ( int i = 0; i < alleles.size(); i++ ) { + if ( alleles.get(i) == Allele.SPAN_DEL ) { + final int homAltIndex = findHomIndex(i, ploidy, alleles.size()); + final int PL = PLs[homAltIndex]; + if ( PL < bestPL ) { + bestIndex = i; + bestPL = PL; + } + } + } + + return bestIndex; + } + + /** + * Returns the index of the PL that represents the homozygous genotype of the given i'th allele + * + * @param i the index of the allele with the list of alleles + * @param ploidy the ploidy of the sample + * @param numAlleles the total number of alleles + * @return the hom index + */ + private static int findHomIndex(final int i, final int ploidy, final int numAlleles) { + // some quick optimizations for the common case + if ( ploidy == 2 ) + return i + (i * (i + 1) / 2); // this is straight from the VCF spec on PLs + if ( ploidy == 1 ) + return i; + + final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, numAlleles); + final int[] alleleIndexes = new int[ploidy]; + Arrays.fill(alleleIndexes, i); + return calculator.allelesToIndex(alleleIndexes); + } + /** * Generates a new AD array by adding zeros for missing alleles given the set of indexes of the Genotype's current * alleles from the original AD. diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java index c5a1c8c84..8ac40aa74 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -222,6 +222,39 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { executeTest("testSpanningDeletions", spec); } + @Test + public void testMultipleSpanningDeletionsForOneSample() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + + " -V " + privateTestDir + "spanningDel.many.g.vcf", + 1, + Arrays.asList("5c88e10211def13ba847c29d0fe9e191")); + spec.disableShadowBCF(); + executeTest("testMultipleSpanningDeletionsForOneSample", spec); + } + + @Test + public void testMultipleSpanningDeletionsForOneSampleHaploid() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + + " -V " + privateTestDir + "spanningDel.many.haploid.g.vcf", + 1, + Arrays.asList("76fc5f6b949ac0b893061828af800bf8")); + spec.disableShadowBCF(); + executeTest("testMultipleSpanningDeletionsForOneSampleHaploid", spec); + } + + @Test + public void testMultipleSpanningDeletionsForOneSampleTetraploid() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference + + " -V " + privateTestDir + "spanningDel.many.tetraploid.g.vcf", + 1, + Arrays.asList("0ec79471550ec5e30540f68cb0651b14")); + spec.disableShadowBCF(); + executeTest("testMultipleSpanningDeletionsForOneSampleTetraploid", spec); + } + @Test public void testWrongReferenceBaseBugFix() throws Exception { final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf"