Merge pull request #1020 from broadinstitute/eb_handle_multiple_spanning_dels
Handle cases where a given sample has multiple spanning deletions.
This commit is contained in:
commit
29ebfc32c3
|
|
@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.variantutils;
|
||||||
|
|
||||||
import htsjdk.variant.variantcontext.*;
|
import htsjdk.variant.variantcontext.*;
|
||||||
import htsjdk.variant.vcf.VCFConstants;
|
import htsjdk.variant.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculator;
|
||||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
|
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators;
|
||||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||||
import org.broadinstitute.gatk.utils.MathUtils;
|
import org.broadinstitute.gatk.utils.MathUtils;
|
||||||
|
|
@ -431,13 +432,72 @@ public class ReferenceConfidenceVariantContextMerger {
|
||||||
|
|
||||||
// create the index mapping, using the <NON-REF> allele whenever such a mapping doesn't exist
|
// create the index mapping, using the <NON-REF> allele whenever such a mapping doesn't exist
|
||||||
for ( int i = 1; i < targetAlleles.size(); i++ ) {
|
for ( int i = 1; i < targetAlleles.size(); i++ ) {
|
||||||
final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAlleles.get(i));
|
final Allele targetAllele = targetAlleles.get(i);
|
||||||
|
|
||||||
|
// if there’s more than 1 DEL allele then we need to use the best one
|
||||||
|
if ( targetAllele == Allele.SPAN_DEL && g.hasPL() ) {
|
||||||
|
final int occurrences = Collections.frequency(remappedAlleles, Allele.SPAN_DEL);
|
||||||
|
if ( occurrences > 1 ) {
|
||||||
|
final int indexOfBestDel = indexOfBestDel(remappedAlleles, g.getPL(), g.getPloidy());
|
||||||
|
indexMapping[i] = ( indexOfBestDel == -1 ? indexOfNonRef : indexOfBestDel );
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final int indexOfRemappedAllele = remappedAlleles.indexOf(targetAllele);
|
||||||
indexMapping[i] = indexOfRemappedAllele == -1 ? indexOfNonRef : indexOfRemappedAllele;
|
indexMapping[i] = indexOfRemappedAllele == -1 ? indexOfNonRef : indexOfRemappedAllele;
|
||||||
}
|
}
|
||||||
|
|
||||||
return indexMapping;
|
return indexMapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the index of the best spanning deletion allele based on AD counts
|
||||||
|
*
|
||||||
|
* @param alleles the list of alleles
|
||||||
|
* @param PLs the list of corresponding PL values
|
||||||
|
* @param ploidy the ploidy of the sample
|
||||||
|
* @return the best index or -1 if not found
|
||||||
|
*/
|
||||||
|
private static int indexOfBestDel(final List<Allele> alleles, final int[] PLs, final int ploidy) {
|
||||||
|
int bestIndex = -1;
|
||||||
|
int bestPL = Integer.MAX_VALUE;
|
||||||
|
|
||||||
|
for ( int i = 0; i < alleles.size(); i++ ) {
|
||||||
|
if ( alleles.get(i) == Allele.SPAN_DEL ) {
|
||||||
|
final int homAltIndex = findHomIndex(i, ploidy, alleles.size());
|
||||||
|
final int PL = PLs[homAltIndex];
|
||||||
|
if ( PL < bestPL ) {
|
||||||
|
bestIndex = i;
|
||||||
|
bestPL = PL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bestIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the index of the PL that represents the homozygous genotype of the given i'th allele
|
||||||
|
*
|
||||||
|
* @param i the index of the allele with the list of alleles
|
||||||
|
* @param ploidy the ploidy of the sample
|
||||||
|
* @param numAlleles the total number of alleles
|
||||||
|
* @return the hom index
|
||||||
|
*/
|
||||||
|
private static int findHomIndex(final int i, final int ploidy, final int numAlleles) {
|
||||||
|
// some quick optimizations for the common case
|
||||||
|
if ( ploidy == 2 )
|
||||||
|
return i + (i * (i + 1) / 2); // this is straight from the VCF spec on PLs
|
||||||
|
if ( ploidy == 1 )
|
||||||
|
return i;
|
||||||
|
|
||||||
|
final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, numAlleles);
|
||||||
|
final int[] alleleIndexes = new int[ploidy];
|
||||||
|
Arrays.fill(alleleIndexes, i);
|
||||||
|
return calculator.allelesToIndex(alleleIndexes);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates a new AD array by adding zeros for missing alleles given the set of indexes of the Genotype's current
|
* Generates a new AD array by adding zeros for missing alleles given the set of indexes of the Genotype's current
|
||||||
* alleles from the original AD.
|
* alleles from the original AD.
|
||||||
|
|
|
||||||
|
|
@ -222,6 +222,39 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
||||||
executeTest("testSpanningDeletions", spec);
|
executeTest("testSpanningDeletions", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleSpanningDeletionsForOneSample() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
|
||||||
|
" -V " + privateTestDir + "spanningDel.many.g.vcf",
|
||||||
|
1,
|
||||||
|
Arrays.asList("5c88e10211def13ba847c29d0fe9e191"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testMultipleSpanningDeletionsForOneSample", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleSpanningDeletionsForOneSampleHaploid() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
|
||||||
|
" -V " + privateTestDir + "spanningDel.many.haploid.g.vcf",
|
||||||
|
1,
|
||||||
|
Arrays.asList("76fc5f6b949ac0b893061828af800bf8"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testMultipleSpanningDeletionsForOneSampleHaploid", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleSpanningDeletionsForOneSampleTetraploid() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
|
||||||
|
" -V " + privateTestDir + "spanningDel.many.tetraploid.g.vcf",
|
||||||
|
1,
|
||||||
|
Arrays.asList("0ec79471550ec5e30540f68cb0651b14"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testMultipleSpanningDeletionsForOneSampleTetraploid", spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWrongReferenceBaseBugFix() throws Exception {
|
public void testWrongReferenceBaseBugFix() throws Exception {
|
||||||
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf"
|
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue