Fixing bug in the physical phasing code, found by Valentin.
It turns out that there can be some really complex situations even with a single sample where there are lots of unphasable hets around a hom. Previously we were trying to phase each of the hets against the hom, but that wasn't correct. Instead we now detect that situation and don't attempt to phase anything. Added a unit test to cover this situation.
This commit is contained in:
parent
78c2da1fef
commit
b1cb6196be
|
|
@ -348,7 +348,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
* @param originalCalls the original unphased calls
|
||||
* @param haplotypeMap mapping from alternate allele to the set of haplotypes that contain that allele
|
||||
* @param totalAvailableHaplotypes the total number of possible haplotypes used in calling
|
||||
* @param phaseSetMapping the map to populate in this method
|
||||
* @param phaseSetMapping the map to populate in this method;
|
||||
* note that it is okay for this method NOT to populate the phaseSetMapping at all (e.g. in an impossible-to-phase situation)
|
||||
* @return the next incremental unique index
|
||||
*/
|
||||
protected static int constructPhaseSetMapping(final List<VariantContext> originalCalls,
|
||||
|
|
@ -366,6 +367,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
if ( haplotypesWithCall.isEmpty() )
|
||||
continue;
|
||||
|
||||
final boolean callIsOnAllHaps = haplotypesWithCall.size() == totalAvailableHaplotypes;
|
||||
|
||||
for ( int j = i+1; j < numCalls; j++ ) {
|
||||
final VariantContext comp = originalCalls.get(j);
|
||||
final Set<Haplotype> haplotypesWithComp = haplotypeMap.get(comp);
|
||||
|
|
@ -374,12 +377,19 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
|
||||
// if the variants are together on all haplotypes, record that fact.
|
||||
// another possibility is that one of the variants is on all possible haplotypes (i.e. it is homozygous).
|
||||
final boolean callIsOnAllHaps = haplotypesWithCall.size() == totalAvailableHaplotypes;
|
||||
final boolean compIsOnAllHaps = haplotypesWithComp.size() == totalAvailableHaplotypes;
|
||||
if ( (haplotypesWithCall.size() == haplotypesWithComp.size() && haplotypesWithCall.containsAll(haplotypesWithComp)) || callIsOnAllHaps || compIsOnAllHaps ) {
|
||||
|
||||
// create a new group if these are the first entries
|
||||
if ( ! phaseSetMapping.containsKey(call) ) {
|
||||
// note that if the comp is already in the map then that is very bad because it means that there is
|
||||
// another variant that is in phase with the comp but not with the call. Since that's an un-phasable
|
||||
// situation, we should abort if we encounter it.
|
||||
if ( phaseSetMapping.containsKey(comp) ) {
|
||||
phaseSetMapping.clear();
|
||||
return 0;
|
||||
}
|
||||
|
||||
phaseSetMapping.put(call, new Pair<>(uniqueCounter, callIsOnAllHaps ? "1|1" : "0|1"));
|
||||
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, compIsOnAllHaps ? "1|1" : "0|1"));
|
||||
uniqueCounter++;
|
||||
|
|
@ -399,6 +409,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
if ( intersection.isEmpty() ) {
|
||||
// create a new group if these are the first entries
|
||||
if ( ! phaseSetMapping.containsKey(call) ) {
|
||||
// note that if the comp is already in the map then that is very bad because it means that there is
|
||||
// another variant that is in phase with the comp but not with the call. Since that's an un-phasable
|
||||
// situation, we should abort if we encounter it.
|
||||
if ( phaseSetMapping.containsKey(comp) ) {
|
||||
phaseSetMapping.clear();
|
||||
return 0;
|
||||
}
|
||||
|
||||
phaseSetMapping.put(call, new Pair<>(uniqueCounter, "0|1"));
|
||||
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, "1|0"));
|
||||
uniqueCounter++;
|
||||
|
|
|
|||
|
|
@ -439,6 +439,10 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
|
|||
pos24.setEventMap(new EventMap(Arrays.asList(vc2, vc4)));
|
||||
pos24.getEventMap().put(2, vc2);
|
||||
pos24.getEventMap().put(4, vc4);
|
||||
final Haplotype pos34 = new Haplotype("AACCA".getBytes());
|
||||
pos34.setEventMap(new EventMap(Arrays.asList(vc3, vc4)));
|
||||
pos34.getEventMap().put(3, vc3);
|
||||
pos34.getEventMap().put(4, vc4);
|
||||
final Haplotype pos234 = new Haplotype("ACCCA".getBytes());
|
||||
pos234.setEventMap(new EventMap(Arrays.asList(vc2, vc3, vc4)));
|
||||
pos234.getEventMap().put(2, vc2);
|
||||
|
|
@ -509,6 +513,20 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
|
|||
haplotypeMap.put(vc4, haplotypes4het);
|
||||
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 0});
|
||||
|
||||
// test no phased variants around a hom
|
||||
final Set<Haplotype> haplotypes2incomplete = new HashSet<>();
|
||||
haplotypes2incomplete.add(pos24);
|
||||
final Set<Haplotype> haplotypes3incomplete = new HashSet<>();
|
||||
haplotypes3incomplete.add(pos34);
|
||||
final Set<Haplotype> haplotypes4complete = new HashSet<>();
|
||||
haplotypes4complete.add(pos24);
|
||||
haplotypes4complete.add(pos34);
|
||||
haplotypes4complete.add(pos234);
|
||||
haplotypeMap.put(vc2, haplotypes2incomplete);
|
||||
haplotypeMap.put(vc3, haplotypes3incomplete);
|
||||
haplotypeMap.put(vc4, haplotypes4complete);
|
||||
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 0, 0, 0, 0, 0});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue