Merge pull request #712 from broadinstitute/eb_physical_phasing_bug_PT77248992

Fixing bug in the physical phasing code, found by Valentin.
This commit is contained in:
Eric Banks 2014-08-21 15:25:51 -04:00
commit 36bdfa3918
2 changed files with 38 additions and 2 deletions

View File

@ -362,7 +362,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* @param originalCalls the original unphased calls
* @param haplotypeMap mapping from alternate allele to the set of haplotypes that contain that allele
* @param totalAvailableHaplotypes the total number of possible haplotypes used in calling
* @param phaseSetMapping the map to populate in this method
* @param phaseSetMapping the map to populate in this method;
* note that it is okay for this method NOT to populate the phaseSetMapping at all (e.g. in an impossible-to-phase situation)
* @return the next incremental unique index
*/
protected static int constructPhaseSetMapping(final List<VariantContext> originalCalls,
@ -380,6 +381,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if ( haplotypesWithCall.isEmpty() )
continue;
final boolean callIsOnAllHaps = haplotypesWithCall.size() == totalAvailableHaplotypes;
for ( int j = i+1; j < numCalls; j++ ) {
final VariantContext comp = originalCalls.get(j);
final Set<Haplotype> haplotypesWithComp = haplotypeMap.get(comp);
@ -388,12 +391,19 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// if the variants are together on all haplotypes, record that fact.
// another possibility is that one of the variants is on all possible haplotypes (i.e. it is homozygous).
final boolean callIsOnAllHaps = haplotypesWithCall.size() == totalAvailableHaplotypes;
final boolean compIsOnAllHaps = haplotypesWithComp.size() == totalAvailableHaplotypes;
if ( (haplotypesWithCall.size() == haplotypesWithComp.size() && haplotypesWithCall.containsAll(haplotypesWithComp)) || callIsOnAllHaps || compIsOnAllHaps ) {
// create a new group if these are the first entries
if ( ! phaseSetMapping.containsKey(call) ) {
// note that if the comp is already in the map then that is very bad because it means that there is
// another variant that is in phase with the comp but not with the call. Since that's an un-phasable
// situation, we should abort if we encounter it.
if ( phaseSetMapping.containsKey(comp) ) {
phaseSetMapping.clear();
return 0;
}
phaseSetMapping.put(call, new Pair<>(uniqueCounter, callIsOnAllHaps ? "1|1" : "0|1"));
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, compIsOnAllHaps ? "1|1" : "0|1"));
uniqueCounter++;
@ -413,6 +423,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if ( intersection.isEmpty() ) {
// create a new group if these are the first entries
if ( ! phaseSetMapping.containsKey(call) ) {
// note that if the comp is already in the map then that is very bad because it means that there is
// another variant that is in phase with the comp but not with the call. Since that's an un-phasable
// situation, we should abort if we encounter it.
if ( phaseSetMapping.containsKey(comp) ) {
phaseSetMapping.clear();
return 0;
}
phaseSetMapping.put(call, new Pair<>(uniqueCounter, "0|1"));
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, "1|0"));
uniqueCounter++;

View File

@ -439,6 +439,10 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
pos24.setEventMap(new EventMap(Arrays.asList(vc2, vc4)));
pos24.getEventMap().put(2, vc2);
pos24.getEventMap().put(4, vc4);
final Haplotype pos34 = new Haplotype("AACCA".getBytes());
pos34.setEventMap(new EventMap(Arrays.asList(vc3, vc4)));
pos34.getEventMap().put(3, vc3);
pos34.getEventMap().put(4, vc4);
final Haplotype pos234 = new Haplotype("ACCCA".getBytes());
pos234.setEventMap(new EventMap(Arrays.asList(vc2, vc3, vc4)));
pos234.getEventMap().put(2, vc2);
@ -509,6 +513,20 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
haplotypeMap.put(vc4, haplotypes4het);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 0});
// test no phased variants around a hom
final Set<Haplotype> haplotypes2incomplete = new HashSet<>();
haplotypes2incomplete.add(pos24);
final Set<Haplotype> haplotypes3incomplete = new HashSet<>();
haplotypes3incomplete.add(pos34);
final Set<Haplotype> haplotypes4complete = new HashSet<>();
haplotypes4complete.add(pos24);
haplotypes4complete.add(pos34);
haplotypes4complete.add(pos234);
haplotypeMap.put(vc2, haplotypes2incomplete);
haplotypeMap.put(vc3, haplotypes3incomplete);
haplotypeMap.put(vc4, haplotypes4complete);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 0, 0, 0, 0, 0});
return tests.toArray(new Object[][]{});
}