Bugfix for bad AD values in UG/HC

-- In the case where we have multiple potential alternative alleles *and* we weren't calling all of them (so that n potential values < n called) we could end up trimming the alleles down which would result in the mismatch between the PerReadAlleleLikelihoodMap alleles and the VariantContext trimmed alleles.
-- Fixed by doing two things (1) moving the trimming code after the annotation call and (2) updating AD annotation to check that the alleles in the VariantContext and the PerReadAlleleLikelihoodMap are concordant, which will stop us from degenerating in the future.
-- delivers [#50897077]
This commit is contained in:
Mark DePristo 2013-06-05 16:37:31 -04:00
parent c8845a2b63
commit 34bdf20132
9 changed files with 37 additions and 29 deletions

View File

@ -66,10 +66,7 @@ import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
@ -135,20 +132,24 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
}
private void annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final VariantContext vc, final GenotypeBuilder gb) {
final HashMap<Allele, Integer> alleleCounts = new HashMap<Allele, Integer>();
final Set<Allele> alleles = new HashSet<>(vc.getAlleles());
// make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext
if ( ! perReadAlleleLikelihoodMap.getAllelesSet().containsAll(alleles) )
throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + perReadAlleleLikelihoodMap.getAllelesSet());
final HashMap<Allele, Integer> alleleCounts = new HashMap<>();
for ( final Allele allele : vc.getAlleles() ) { alleleCounts.put(allele, 0); }
for ( final Allele allele : vc.getAlleles() ) {
alleleCounts.put(allele, 0);
}
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles);
if (! a.isInformative() ) continue; // read is non-informative
final GATKSAMRecord read = el.getKey();
final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
if (! a.isInformative() )
continue; // read is non-informative
if (!vc.getAlleles().contains(a.getMostLikelyAllele()))
continue; // sanity check - shouldn't be needed
alleleCounts.put(a.getMostLikelyAllele(), alleleCounts.get(a.getMostLikelyAllele()) + (read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1));
final int prevCount = alleleCounts.get(a.getMostLikelyAllele());
final int incCount = read.isReducedRead() ? read.getReducedCount(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, vc.getStart(), ReadUtils.ClippingTail.RIGHT_TAIL)) : 1;
alleleCounts.put(a.getMostLikelyAllele(), prevCount + incCount);
}
final int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(vc.getReference());
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)

View File

@ -543,11 +543,6 @@ public class UnifiedGenotyperEngine {
builder.attributes(attributes);
VariantContext vcCall = builder.make();
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
final ReadBackedPileup pileup = rawContext.getBasePileup();
@ -556,6 +551,11 @@ public class UnifiedGenotyperEngine {
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap);
}
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PoFGT0));
}

View File

@ -204,13 +204,12 @@ public class GenotypingEngine {
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) );
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
VariantContext annotatedCall = call;
if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
}
annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, annotatedCall);
// maintain the set of all called haplotypes
for ( final Allele calledAllele : call.getAlleles() )
calledHaplotypes.addAll(alleleMapper.get(calledAllele));

View File

@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "66a5a3eb657fac5c621bc0c228ea9caf");
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "353c97bfb05a939b3838dc8eee50326b");
}
}

View File

@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","5eabc12fc7b4f9749e6d1be0f5b45d14");
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","7e4e1397d5cff68aeba3595e671574fc");
}
@Test(enabled = true)

View File

@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("1ab95513a3abb5b760578831c61ef94b"));
Arrays.asList("f576d86656cc37c0a869c7ac911f4c7c"));
executeTest("test Multiple SNP alleles", spec);
}
@ -112,7 +112,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("314b99eb146de1fdafed872ecbe1cfc2"));
Arrays.asList("94d7a907fdca7e8c9fd6bb8a87b2bab2"));
executeTest("test reverse trim", spec);
}

View File

@ -74,7 +74,7 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest {
@Test
public void testReducedBamINDELs() {
testReducedCalling("INDEL", "19bc6a74250ec19efc4e1b4ee6515ac0");
testReducedCalling("INDEL", "22110b001e2d3dd45d7872334086b2b9");
}

View File

@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"ffd69c410dca0d2f9fe75f3cb5d08179");
"627b5a12f2f02a874fb39982171a3982");
}
@Test

View File

@ -366,4 +366,12 @@ public class PerReadAlleleLikelihoodMap {
return true;
}
/**
* Get an unmodifiable set of the unique alleles in this PerReadAlleleLikelihoodMap
* @return a non-null unmodifiable map
*/
public Set<Allele> getAllelesSet() {
return Collections.unmodifiableSet(allelesSet);
}
}