Bugfix for bad AD values in UG/HC
-- In the case where we have multiple potential alternative alleles *and* we weren't calling all of them (so that n potential values < n called) we could end up trimming the alleles down which would result in the mismatch between the PerReadAlleleLikelihoodMap alleles and the VariantContext trimmed alleles. -- Fixed by doing two things (1) moving the trimming code after the annotation call and (2) updating AD annotation to check that the alleles in the VariantContext and the PerReadAlleleLikelihoodMap are concordant, which will stop us from degenerating in the future. -- delivers [#50897077]
This commit is contained in:
parent
c8845a2b63
commit
34bdf20132
|
|
@ -66,10 +66,7 @@ import org.broadinstitute.variant.variantcontext.Genotype;
|
|||
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -135,20 +132,24 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
}
|
||||
|
||||
private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final VariantContext vc, final GenotypeBuilder gb) {
|
||||
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
|
||||
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
|
||||
|
||||
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
|
||||
if ( ! perReadAlleleLikelihoodMap.getAllelesSet().containsAll(alleles) )
|
||||
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + perReadAlleleLikelihoodMap.getAllelesSet());
|
||||
|
||||
final HashMap<Allele, Integer> alleleCounts = new HashMap<>();
|
||||
for ( final Allele allele : vc.getAlleles() ) { alleleCounts.put(allele, 0); }
|
||||
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
alleleCounts.put(allele, 0);
|
||||
}
|
||||
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
|
||||
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
|
||||
if (! a.isInformative() ) continue; // read is non-informative
|
||||
final GATKSAMRecord read = el.getKey();
|
||||
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
|
||||
if (! a.isInformative() )
|
||||
continue; // read is non-informative
|
||||
if (!vc.getAlleles().contains(a.getMostLikelyAllele()))
|
||||
continue; // sanity check - shouldn't be needed
|
||||
alleleCounts.put(a.getMostLikelyAllele(), alleleCounts.get(a.getMostLikelyAllele()) + (read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1));
|
||||
final int prevCount = alleleCounts.get(a.getMostLikelyAllele());
|
||||
final int incCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1;
|
||||
alleleCounts.put(a.getMostLikelyAllele(), prevCount + incCount);
|
||||
}
|
||||
|
||||
final int[] counts = new int[alleleCounts.size()];
|
||||
counts[0] = alleleCounts.get(vc.getReference());
|
||||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
|
||||
|
|
|
|||
|
|
@ -543,11 +543,6 @@ public class UnifiedGenotyperEngine {
|
|||
builder.attributes(attributes);
|
||||
VariantContext vcCall = builder.make();
|
||||
|
||||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
|
||||
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
|
||||
if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||
|
|
@ -556,6 +551,11 @@ public class UnifiedGenotyperEngine {
|
|||
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
|
||||
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
|
||||
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -204,13 +204,12 @@ public class GenotypingEngine {
|
|||
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) );
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
|
||||
|
||||
VariantContext annotatedCall = call;
|
||||
if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
|
||||
|
||||
if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
}
|
||||
|
||||
annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, annotatedCall);
|
||||
|
||||
// maintain the set of all called haplotypes
|
||||
for ( final Allele calledAllele : call.getAlleles() )
|
||||
calledHaplotypes.addAll(alleleMapper.get(calledAllele));
|
||||
|
|
|
|||
|
|
@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "66a5a3eb657fac5c621bc0c228ea9caf");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "353c97bfb05a939b3838dc8eee50326b");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","5eabc12fc7b4f9749e6d1be0f5b45d14");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","7e4e1397d5cff68aeba3595e671574fc");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("1ab95513a3abb5b760578831c61ef94b"));
|
||||
Arrays.asList("f576d86656cc37c0a869c7ac911f4c7c"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("314b99eb146de1fdafed872ecbe1cfc2"));
|
||||
Arrays.asList("94d7a907fdca7e8c9fd6bb8a87b2bab2"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "19bc6a74250ec19efc4e1b4ee6515ac0");
|
||||
testReducedCalling("INDEL", "22110b001e2d3dd45d7872334086b2b9");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"ffd69c410dca0d2f9fe75f3cb5d08179");
|
||||
"627b5a12f2f02a874fb39982171a3982");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -366,4 +366,12 @@ public class PerReadAlleleLikelihoodMap {
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an unmodifiable set of the unique alleles in this PerReadAlleleLikelihoodMap
|
||||
* @return a non-null unmodifiable map
|
||||
*/
|
||||
public Set<Allele> getAllelesSet() {
|
||||
return Collections.unmodifiableSet(allelesSet);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue