Bug fix in HC GenotypingEngine to ensure that all the merged complex events get properly added to the priority list used by VariantContextUtils when combining multiallelic events.

This commit is contained in:
Ryan Poplin 2012-09-09 10:36:09 -04:00
parent 81b27f9db2
commit 688fc9fb56
1 changed files with 25 additions and 11 deletions

View File

@ -52,7 +52,11 @@ public class GenotypingEngine {
noCall.add(Allele.NO_CALL);
}
// This function is the streamlined approach, currently not being used
// WARN
// This function is the streamlined approach, currently not being used by default
// WARN
// WARN: This function is currently only being used by Menachem. Slated for removal/merging with the rest of the code.
// WARN
@Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine,
final ArrayList<Haplotype> haplotypes,
@ -210,13 +214,9 @@ public class GenotypingEngine {
System.out.println( ">> Events = " + h.getEventMap());
}
}
// Create the VC merge priority list
final ArrayList<String> priorityList = new ArrayList<String>();
for( int iii = 0; iii < haplotypes.size(); iii++ ) {
priorityList.add("HC" + iii);
}
final ArrayList<String> priorityList = new ArrayList<String>(); // filled in later, used to merge overlapping events into common reference view
cleanUpSymbolicUnassembledEvents( haplotypes, priorityList );
cleanUpSymbolicUnassembledEvents( haplotypes );
if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 3 ) { // if not in GGA mode and have at least 3 samples try to create MNP and complex events by looking at LD structure
mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
}
@ -236,6 +236,7 @@ public class GenotypingEngine {
final VariantContext vc = eventMap.get(loc);
if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
eventsAtThisLoc.add(vc);
priorityList.add(vc.getSource());
}
}
} else { // we are in GGA mode!
@ -260,6 +261,22 @@ public class GenotypingEngine {
// Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
// Sanity check the priority list
for( final VariantContext vc : eventsAtThisLoc ) {
if( !priorityList.contains(vc.getSource()) ) {
throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles.");
}
}
for( final String name : priorityList ) {
boolean found = false;
for( final VariantContext vc : eventsAtThisLoc ) {
if(vc.getSource().equals(name)) { found = true; break; }
}
if( !found ) {
throw new ReviewedStingException("Event added to priority list but wasn't found on any haplotype. Something went wrong in the merging of alleles.");
}
}
// Merge the event to find a common reference representation
final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
if( mergedVC == null ) { continue; }
@ -299,9 +316,8 @@ public class GenotypingEngine {
return returnCalls;
}
protected static void cleanUpSymbolicUnassembledEvents( final ArrayList<Haplotype> haplotypes, final ArrayList<String> priorityList ) {
protected static void cleanUpSymbolicUnassembledEvents( final ArrayList<Haplotype> haplotypes ) {
final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
final ArrayList<String> stringsToRemove = new ArrayList<String>();
for( final Haplotype h : haplotypes ) {
for( final VariantContext vc : h.getEventMap().values() ) {
if( vc.isSymbolic() ) {
@ -309,7 +325,6 @@ public class GenotypingEngine {
for( final VariantContext vc2 : h2.getEventMap().values() ) {
if( vc.getStart() == vc2.getStart() && vc2.isIndel() ) {
haplotypesToRemove.add(h);
stringsToRemove.add(vc.getSource());
break;
}
}
@ -318,7 +333,6 @@ public class GenotypingEngine {
}
}
haplotypes.removeAll(haplotypesToRemove);
priorityList.removeAll(stringsToRemove);
}
protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {