Merge pull request #1020 from broadinstitute/eb_handle_multiple_spanning_dels

Handle cases where a given sample has multiple spanning deletions.
This commit is contained in:
Eric Banks 2015-06-16 14:20:46 -04:00
commit 29ebfc32c3
2 changed files with 94 additions and 1 deletions

View File

@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.variantutils;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculator;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.MathUtils;
@ -431,13 +432,72 @@ public class ReferenceConfidenceVariantContextMerger {
// create the index mapping, using the <NON-REF> allele whenever such a mapping doesn't exist
for ( int i = 1; i < targetAlleles.size(); i++ ) {
final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAlleles.get(i));
final Allele targetAllele = targetAlleles.get(i);
// if theres more than 1 DEL allele then we need to use the best one
if ( targetAllele == Allele.SPAN_DEL && g.hasPL() ) {
final int occurrences = Collections.frequency(remappedAlleles, Allele.SPAN_DEL);
if ( occurrences > 1 ) {
final int indexOfBestDel = indexOfBestDel(remappedAlleles, g.getPL(), g.getPloidy());
indexMapping[i] = ( indexOfBestDel == -1 ? indexOfNonRef : indexOfBestDel );
continue;
}
}
final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAllele);
indexMapping[i] = indexOfRemappedAllele == -1 ? indexOfNonRef : indexOfRemappedAllele;
}
return indexMapping;
}
/**
* Returns the index of the best spanning deletion allele based on AD counts
*
* @param alleles the list of alleles
* @param PLs the list of corresponding PL values
* @param ploidy the ploidy of the sample
* @return the best index or -1 if not found
*/
private static int indexOfBestDel(final List<Allele> alleles, final int[] PLs, final int ploidy) {
int bestIndex = -1;
int bestPL = Integer.MAX_VALUE;
for ( int i = 0; i < alleles.size(); i++ ) {
if ( alleles.get(i) == Allele.SPAN_DEL ) {
final int homAltIndex = findHomIndex(i, ploidy, alleles.size());
final int PL = PLs[homAltIndex];
if ( PL < bestPL ) {
bestIndex = i;
bestPL = PL;
}
}
}
return bestIndex;
}
/**
* Returns the index of the PL that represents the homozygous genotype of the given i'th allele
*
* @param i the index of the allele with the list of alleles
* @param ploidy the ploidy of the sample
* @param numAlleles the total number of alleles
* @return the hom index
*/
private static int findHomIndex(final int i, final int ploidy, final int numAlleles) {
// some quick optimizations for the common case
if ( ploidy == 2 )
return i + (i * (i + 1) / 2); // this is straight from the VCF spec on PLs
if ( ploidy == 1 )
return i;
final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, numAlleles);
final int[] alleleIndexes = new int[ploidy];
Arrays.fill(alleleIndexes, i);
return calculator.allelesToIndex(alleleIndexes);
}
/**
* Generates a new AD array by adding zeros for missing alleles given the set of indexes of the Genotype's current
* alleles from the original AD.

View File

@ -222,6 +222,39 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
executeTest("testSpanningDeletions", spec);
}
@Test
public void testMultipleSpanningDeletionsForOneSample() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.many.g.vcf",
1,
Arrays.asList("5c88e10211def13ba847c29d0fe9e191"));
spec.disableShadowBCF();
executeTest("testMultipleSpanningDeletionsForOneSample", spec);
}
@Test
public void testMultipleSpanningDeletionsForOneSampleHaploid() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.many.haploid.g.vcf",
1,
Arrays.asList("76fc5f6b949ac0b893061828af800bf8"));
spec.disableShadowBCF();
executeTest("testMultipleSpanningDeletionsForOneSampleHaploid", spec);
}
@Test
public void testMultipleSpanningDeletionsForOneSampleTetraploid() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.many.tetraploid.g.vcf",
1,
Arrays.asList("0ec79471550ec5e30540f68cb0651b14"));
spec.disableShadowBCF();
executeTest("testMultipleSpanningDeletionsForOneSampleTetraploid", spec);
}
@Test
public void testWrongReferenceBaseBugFix() throws Exception {
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf"