Bug fix in the HC's allele mapping for multi-allelic events. Using the allele alone as a key isn't sufficient because alleles change when the reference allele changes during VariantContextUtils.simpleMerge for multi-allelic events.
This commit is contained in:
parent
d3c2c97fb2
commit
4f95f850b3
|
|
@ -59,16 +59,16 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
|
||||
public List<VariantContext> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
|
||||
final List<Haplotype> haplotypes,
|
||||
final List<String> samples,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||
final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final List<VariantContext> activeAllelesToGenotype ) {
|
||||
public List<VariantContext> assignGenotypeLikelihoods( final UnifiedGenotyperEngine UG_engine,
|
||||
final List<Haplotype> haplotypes,
|
||||
final List<String> samples,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||
final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final List<VariantContext> activeAllelesToGenotype ) {
|
||||
|
||||
final List<VariantContext> returnCalls = new ArrayList<VariantContext>();
|
||||
final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty();
|
||||
|
|
@ -120,7 +120,7 @@ public class GenotypingEngine {
|
|||
priorityList.clear();
|
||||
int alleleCount = 0;
|
||||
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
|
||||
HashSet<Allele> alleleSet = new HashSet<Allele>(2);
|
||||
ArrayList<Allele> alleleSet = new ArrayList<Allele>(2);
|
||||
alleleSet.add(compVC.getReference());
|
||||
alleleSet.add(compAltAllele);
|
||||
priorityList.add("Allele" + alleleCount);
|
||||
|
|
@ -133,45 +133,26 @@ public class GenotypingEngine {
|
|||
|
||||
if( eventsAtThisLoc.isEmpty() ) { continue; }
|
||||
|
||||
// Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
|
||||
Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
|
||||
// Create the event mapping object which maps the original haplotype events to the events present at just this locus
|
||||
final Map<Event, List<Haplotype>> eventMapper = createEventMapper(loc, eventsAtThisLoc, haplotypes);
|
||||
|
||||
final Allele refAllele = eventsAtThisLoc.get(0).getReference();
|
||||
final ArrayList<Allele> alleleOrdering = new ArrayList<Allele>(alleleMapper.size());
|
||||
alleleOrdering.add(refAllele);
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
alleleOrdering.add(vc.getAlternateAllele(0));
|
||||
}
|
||||
|
||||
// Sanity check the priority list
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
if( !priorityList.contains(vc.getSource()) ) {
|
||||
throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles.");
|
||||
}
|
||||
}
|
||||
for( final String name : priorityList ) {
|
||||
boolean found = false;
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
if(vc.getSource().equals(name)) { found = true; break; }
|
||||
}
|
||||
if( !found ) {
|
||||
throw new ReviewedStingException("Event added to priority list but wasn't found on any haplotype. Something went wrong in the merging of alleles.");
|
||||
}
|
||||
}
|
||||
// Sanity check the priority list for mistakes
|
||||
sanityCheckPriorityList( priorityList, eventsAtThisLoc );
|
||||
|
||||
// Merge the event to find a common reference representation
|
||||
final VariantContext mergedVC = VariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||
if( mergedVC == null ) { continue; }
|
||||
|
||||
// let's update the Allele keys in the mapper because they can change after merging when there are complex events
|
||||
final Map<Allele, List<Haplotype>> updatedAlleleMapper = new HashMap<Allele, List<Haplotype>>(alleleMapper.size());
|
||||
for ( int i = 0; i < mergedVC.getNAlleles(); i++ ) {
|
||||
final Allele oldAllele = alleleOrdering.get(i);
|
||||
final Allele newAllele = mergedVC.getAlleles().get(i);
|
||||
updatedAlleleMapper.put(newAllele, alleleMapper.get(oldAllele));
|
||||
alleleOrdering.set(i, newAllele);
|
||||
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
|
||||
throw new ReviewedStingException("Something went wrong in the merging of alleles.");
|
||||
}
|
||||
alleleMapper = updatedAlleleMapper;
|
||||
final HashMap<VariantContext, Allele> mergeMap = new HashMap<VariantContext, Allele>();
|
||||
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
|
||||
for(int iii = 0; iii < mergedVC.getAlternateAlleles().size(); iii++) {
|
||||
mergeMap.put(eventsAtThisLoc.get(iii), mergedVC.getAlternateAllele(iii)); // BUGBUG: This is assuming that the order of alleles is the same as the priority list given to simpleMerge function
|
||||
}
|
||||
|
||||
final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
|
||||
|
||||
if( DEBUG ) {
|
||||
System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
|
||||
|
|
@ -180,20 +161,7 @@ public class GenotypingEngine {
|
|||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
|
||||
|
||||
// Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
|
||||
final GenotypesContext genotypes = GenotypesContext.create(samples.size());
|
||||
for( final String sample : samples ) {
|
||||
final int numHaplotypes = alleleMapper.size();
|
||||
final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
|
||||
final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleReadMap, alleleOrdering);
|
||||
int glIndex = 0;
|
||||
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
||||
for( int jjj = 0; jjj <= iii; jjj++ ) {
|
||||
genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
|
||||
}
|
||||
}
|
||||
genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
|
||||
}
|
||||
final GenotypesContext genotypes = calculateGLsForThisEvent( samples, alleleReadMap, mergedVC );
|
||||
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
|
||||
if( call != null ) {
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
|
||||
|
|
@ -212,6 +180,41 @@ public class GenotypingEngine {
|
|||
return returnCalls;
|
||||
}
|
||||
|
||||
private GenotypesContext calculateGLsForThisEvent( final List<String> samples, final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap, final VariantContext mergedVC ) {
|
||||
final GenotypesContext genotypes = GenotypesContext.create(samples.size());
|
||||
// Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
|
||||
for( final String sample : samples ) {
|
||||
final int numHaplotypes = mergedVC.getAlleles().size();
|
||||
final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
|
||||
final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleReadMap, mergedVC.getAlleles());
|
||||
int glIndex = 0;
|
||||
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
||||
for( int jjj = 0; jjj <= iii; jjj++ ) {
|
||||
genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
|
||||
}
|
||||
}
|
||||
genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
|
||||
}
|
||||
return genotypes;
|
||||
}
|
||||
|
||||
private void sanityCheckPriorityList( final ArrayList<String> priorityList, final ArrayList<VariantContext> eventsAtThisLoc ) {
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
if( !priorityList.contains(vc.getSource()) ) {
|
||||
throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles.");
|
||||
}
|
||||
}
|
||||
for( final String name : priorityList ) {
|
||||
boolean found = false;
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
if(vc.getSource().equals(name)) { found = true; break; }
|
||||
}
|
||||
if( !found ) {
|
||||
throw new ReviewedStingException("Event added to priority list but wasn't found on any haplotype. Something went wrong in the merging of alleles.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<String, PerReadAlleleLikelihoodMap> filterToOnlyOverlappingReads( final GenomeLocParser parser,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perSampleReadMap,
|
||||
final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
|
||||
|
|
@ -437,27 +440,45 @@ public class GenotypingEngine {
|
|||
return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) );
|
||||
}
|
||||
|
||||
protected static Map<Allele, List<Haplotype>> createAlleleMapper( final Map<VariantContext, Allele> mergeMap, final Map<Event, List<Haplotype>> eventMap ) {
|
||||
final Map<Allele, List<Haplotype>> alleleMapper = new HashMap<Allele, List<Haplotype>>();
|
||||
for( final Map.Entry<VariantContext, Allele> entry : mergeMap.entrySet() ) {
|
||||
alleleMapper.put(entry.getValue(), eventMap.get(new Event(entry.getKey())));
|
||||
}
|
||||
return alleleMapper;
|
||||
}
|
||||
|
||||
@Requires({"haplotypes.size() >= eventsAtThisLoc.size() + 1"})
|
||||
@Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
|
||||
protected static Map<Allele, List<Haplotype>> createAlleleMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {
|
||||
protected static Map<Event, List<Haplotype>> createEventMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {
|
||||
|
||||
final Map<Allele, List<Haplotype>> alleleMapper = new HashMap<Allele, List<Haplotype>>(eventsAtThisLoc.size()+1);
|
||||
final Map<Event, List<Haplotype>> eventMapper = new HashMap<Event, List<Haplotype>>(eventsAtThisLoc.size()+1);
|
||||
final Allele refAllele = eventsAtThisLoc.get(0).getReference();
|
||||
alleleMapper.put(refAllele, new ArrayList<Haplotype>());
|
||||
for( final VariantContext vc : eventsAtThisLoc )
|
||||
alleleMapper.put(vc.getAlternateAllele(0), new ArrayList<Haplotype>());
|
||||
VariantContext refVC = eventsAtThisLoc.get(0);
|
||||
eventMapper.put(new Event(null), new ArrayList<Haplotype>());
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
eventMapper.put(new Event(vc), new ArrayList<Haplotype>());
|
||||
}
|
||||
|
||||
final ArrayList<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
|
||||
alleleMapper.get(h.getArtificialAllele()).add(h);
|
||||
if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) {
|
||||
final ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(refAllele);
|
||||
alleles.add(h.getArtificialAllele());
|
||||
final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype")
|
||||
.alleles(alleles)
|
||||
.loc(refVC.getChr(), refVC.getStart(), refVC.getStart() + refAllele.length() - 1).make() );
|
||||
if( eventMapper.containsKey(artificialVC) ) {
|
||||
eventMapper.get(artificialVC).add(h);
|
||||
}
|
||||
} else if( h.getEventMap().get(loc) == null ) { // no event at this location so let's investigate later
|
||||
undeterminedHaplotypes.add(h);
|
||||
} else {
|
||||
boolean haplotypeIsDetermined = false;
|
||||
for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
|
||||
if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
|
||||
alleleMapper.get(vcAtThisLoc.getAlternateAllele(0)).add(h);
|
||||
eventMapper.get(new Event(vcAtThisLoc)).add(h);
|
||||
haplotypeIsDetermined = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -469,25 +490,23 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
for( final Haplotype h : undeterminedHaplotypes ) {
|
||||
Allele matchingAllele = null;
|
||||
for( final Map.Entry<Allele, List<Haplotype>> alleleToTest : alleleMapper.entrySet() ) {
|
||||
Event matchingEvent = new Event(null);
|
||||
for( final Map.Entry<Event, List<Haplotype>> eventToTest : eventMapper.entrySet() ) {
|
||||
// don't test against the reference allele
|
||||
if( alleleToTest.getKey().equals(refAllele) )
|
||||
if( eventToTest.getKey().equals(new Event(null)) )
|
||||
continue;
|
||||
|
||||
final Haplotype artificialHaplotype = alleleToTest.getValue().get(0);
|
||||
final Haplotype artificialHaplotype = eventToTest.getValue().get(0);
|
||||
if( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap(), true) ) {
|
||||
matchingAllele = alleleToTest.getKey();
|
||||
matchingEvent = eventToTest.getKey();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( matchingAllele == null )
|
||||
matchingAllele = refAllele;
|
||||
alleleMapper.get(matchingAllele).add(h);
|
||||
eventMapper.get(matchingEvent).add(h);
|
||||
}
|
||||
|
||||
return alleleMapper;
|
||||
return eventMapper;
|
||||
}
|
||||
|
||||
protected static boolean isSubSetOf(final Map<Integer, VariantContext> subset, final Map<Integer, VariantContext> superset, final boolean resolveSupersetToSubset) {
|
||||
|
|
@ -636,4 +655,22 @@ public class GenotypingEngine {
|
|||
}
|
||||
return vcs;
|
||||
}
|
||||
|
||||
private static class Event {
|
||||
public VariantContext vc;
|
||||
|
||||
public Event( final VariantContext vc ) {
|
||||
this.vc = vc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals( final Object obj ) {
|
||||
return obj instanceof Event && ((((Event) obj).vc == null && vc == null) || (((Event) obj).vc != null && vc != null && ((Event) obj).vc.hasSameAllelesAs(vc))) ;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (vc == null ? -1 : vc.getAlleles().hashCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -416,16 +416,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
// subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
|
||||
final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap ) : haplotypes );
|
||||
|
||||
for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine,
|
||||
bestHaplotypes,
|
||||
samplesList,
|
||||
stratifiedReadMap,
|
||||
perSampleFilteredReadList,
|
||||
fullReferenceWithPadding,
|
||||
getPaddedLoc(activeRegion),
|
||||
activeRegion.getLocation(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
activeAllelesToGenotype ) ) {
|
||||
for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoods( UG_engine,
|
||||
bestHaplotypes,
|
||||
samplesList,
|
||||
stratifiedReadMap,
|
||||
perSampleFilteredReadList,
|
||||
fullReferenceWithPadding,
|
||||
getPaddedLoc(activeRegion),
|
||||
activeRegion.getLocation(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
activeAllelesToGenotype ) ) {
|
||||
vcfWriter.add( call );
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "47fdbe5f01d3ce5e53056eea8c488e45");
|
||||
HCTest(CEUTRIO_BAM, "", "35c8425b44429ac7468c2cd26f8f5a42");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -33,7 +33,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"54b7cc3da3d8349ff4302f99883ab188");
|
||||
"d918d25b22a551cae5d70ea30d7feed1");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -72,7 +72,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ece627de486aee69d02872891c6cb0ff"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("2e8e6313228b0387008437feae7f5469"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -499,7 +499,6 @@ public class VariantContextUtils {
|
|||
final VariantContext first = VCs.get(0);
|
||||
final String name = first.getSource();
|
||||
final Allele refAllele = determineReferenceAllele(VCs);
|
||||
Byte referenceBaseForIndel = null;
|
||||
|
||||
final Set<Allele> alleles = new LinkedHashSet<Allele>();
|
||||
final Set<String> filters = new HashSet<String>();
|
||||
|
|
|
|||
Loading…
Reference in New Issue