From d3f06024f8beeb62b93c9b06fb6351a036c59055 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 15 Aug 2014 17:47:32 -0400 Subject: [PATCH] Updated the physical phasing in the Haplotype Caller to address requests from ATGU. 1. It is now turned on by default 2. It now phases homozygous variants 3. Most importantly, it also phases variants that are always on opposite haplotypes Changed the INFO keys to be PID and PGT, as described in the header. --- .../haplotypecaller/HaplotypeCaller.java | 21 ++- .../HaplotypeCallerGenotypingEngine.java | 81 +++++++--- .../HaplotypeCallerGVCFIntegrationTest.java | 12 +- ...plotypeCallerGenotypingEngineUnitTest.java | 151 +++++++++++------- 4 files changed, 173 insertions(+), 92 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java index cf34b1fb4..2ee5752f0 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -497,10 +497,11 @@ public class HaplotypeCaller extends ActiveRegionWalker, In protected boolean mergeVariantsViaLD = false; @Advanced - @Argument(fullName="tryPhysicalPhasing", shortName="tryPhysicalPhasing", doc="If specified, we will add physical (read-based) phasing information", required = false) - protected boolean tryPhysicalPhasing = false; + @Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="If specified, we will not try to add physical (read-based) phasing information", required = false) + protected boolean doNotRunPhysicalPhasing = false; - public static final String HAPLOTYPE_CALLER_PHASING_KEY = "HCP"; + public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY = "PID"; + public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY = "PGT"; // ----------------------------------------------------------------------------------------------- // arguments for debugging / developing the haplotype caller @@ -634,12 +635,11 @@ public class HaplotypeCaller extends ActiveRegionWalker, In if ( emitReferenceConfidence() ) { if (SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) - throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and Genotyping Giving Alleles at the same time"); + throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and GENOTYPE_GIVEN_ALLELES at the same time"); SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0; SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = -0.0; - // also, we don't need to output several of the annotations annotationsToExclude.add("ChromosomeCounts"); annotationsToExclude.add("FisherStrand"); @@ -651,6 +651,9 @@ public class HaplotypeCaller extends ActiveRegionWalker, In if (!SCAC.annotateAllSitesWithPLs) logger.info("All sites annotated with PLs forced to true for reference-model confidence output"); SCAC.annotateAllSitesWithPLs = true; + } else if ( ! doNotRunPhysicalPhasing ) { + doNotRunPhysicalPhasing = true; + logger.info("Disabling physical phasing, which is supported only for reference-model confidence output"); } if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY ) @@ -678,7 +681,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode ) throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other."); - genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, tryPhysicalPhasing); + genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, !doNotRunPhysicalPhasing); // initialize the output VCF header final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); @@ -699,8 +702,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_PL_KEY); - if ( tryPhysicalPhasing ) - headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Physical phasing information, each unique ID within a given sample (but not across samples) connects alternate alleles as occurring on the same haplotype")); + if ( ! doNotRunPhysicalPhasing ) { + headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_ID_KEY, 1, VCFHeaderLineType.String, "Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group")); + headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another")); + } // FILTER fields are added unconditionally as it's not always 100% certain the circumstances // where the filters are used. For example, in emitting all sites the lowQual field is used diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 0a78367d3..77c308cfc 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -57,6 +57,7 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.Utils; +import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; import org.broadinstitute.gatk.utils.haplotype.EventMap; import org.broadinstitute.gatk.utils.haplotype.Haplotype; @@ -77,16 +78,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine sampleNames) { super(toolkit,configuration,sampleNames); - tryPhysicalPhasing = false; + doPhysicalPhasing = true; } @@ -254,7 +255,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine(uniqueCounter, compIsOnAllHaps ? "1|1" : "0|1")); uniqueCounter++; } + // otherwise it's part of an existing group so use that group's unique ID + else if ( ! phaseSetMapping.containsKey(comp) ) { + final Pair callPhase = phaseSetMapping.get(call); + phaseSetMapping.put(comp, new Pair<>(callPhase.first, compIsOnAllHaps ? "1|1" : callPhase.second)); + } + } + // if the variants are apart on *all* haplotypes, record that fact + else if ( haplotypesWithCall.size() + haplotypesWithComp.size() == totalAvailableHaplotypes ) { + + final Set intersection = new HashSet<>(); + intersection.addAll(haplotypesWithCall); + intersection.retainAll(haplotypesWithComp); + if ( intersection.isEmpty() ) { + // create a new group if these are the first entries + if ( ! phaseSetMapping.containsKey(call) ) { + phaseSetMapping.put(call, new Pair<>(uniqueCounter, "0|1")); + phaseSetMapping.put(comp, new Pair<>(uniqueCounter, "1|0")); + uniqueCounter++; + } + // otherwise it's part of an existing group so use that group's unique ID + else if ( ! phaseSetMapping.containsKey(comp) ){ + final Pair callPhase = phaseSetMapping.get(call); + phaseSetMapping.put(comp, new Pair<>(callPhase.first, callPhase.second.equals("0|1") ? "1|0" : "0|1")); + } + } } } } @@ -397,7 +428,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine constructPhaseGroups(final List originalCalls, - final Map phaseSetMapping, + final Map> phaseSetMapping, final int indexTo) { final List phasedCalls = new ArrayList<>(originalCalls); @@ -407,7 +438,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine indexes = new ArrayList<>(); for ( int index = 0; index < originalCalls.size(); index++ ) { final VariantContext call = originalCalls.get(index); - if ( phaseSetMapping.containsKey(call) && phaseSetMapping.get(call) == count ) + if ( phaseSetMapping.containsKey(call) && phaseSetMapping.get(call).first == count ) indexes.add(index); } if ( indexes.size() < 2 ) @@ -418,7 +449,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine phasedGenotypes = new ArrayList<>(); for ( final Genotype g : vc.getGenotypes() ) - phasedGenotypes.add(new GenotypeBuilder(g).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY, ID).make()); + phasedGenotypes.add(new GenotypeBuilder(g).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY, ID).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_GT_KEY, phaseGT).make()); return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make(); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index fca9b91fb..14924bdc3 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -68,11 +68,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { // this functionality can be adapted to provide input data for whatever you might want in your data tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "9db87ae56df22456f3c08024384f3e5e"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "4a006f9e39cacc3ae84384ad56ca22a2"}); - tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "fe014592c9fa8d442508a96eb09e0c93"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "90dc8950e28e042097817d785022482e"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a64e2d8e60a1be9cb0b4306c85e96393"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "330685c734e277d70a44637de85ad54d"}); - tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "b01a2c222a3f00a675f5534c3b449919"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "66019a0914f905522da6bd3b557a57d1"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "de763f6c9a5aec4586a2671941e4c96d"}); return tests.toArray(new Object[][]{}); } @@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testWrongGVCFNonVariantRecordOrderBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("d595dae0e3f993047c22c6e520e190aa")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e6a4e571abb59b925d59a38d244f0abe")); spec.disableShadowBCF(); executeTest("testMissingGVCFIndexingStrategyException", spec); } @@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { public void testNoCallGVCFMissingPLsBugFix() { final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("2a91768430a84dbae8d40242dee8c9b8")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("187e43b1034478edced1eb5c664bac34")); spec.disableShadowBCF(); executeTest("testNoCallGVCFMissingPLsBugFix", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java index 19f07eccd..635a5997d 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngineUnitTest.java @@ -56,6 +56,7 @@ import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.variant.variantcontext.*; import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.*; +import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.gatk.utils.haplotype.EventMap; import org.broadinstitute.gatk.utils.haplotype.Haplotype; @@ -416,11 +417,15 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { final Allele altC = Allele.create("C", false); final Allele altT = Allele.create("T", false); + final VariantContext vc1 = new VariantContextBuilder().chr("20").start(1).stop(1).alleles(Arrays.asList(ref, altC)).make(); final VariantContext vc2 = new VariantContextBuilder().chr("20").start(2).stop(2).alleles(Arrays.asList(ref, altC)).make(); final VariantContext vc3 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altT)).make(); final VariantContext vc4 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altC)).make(); final List calls = Arrays.asList(vc2, vc3, vc4); + final Haplotype pos1 = new Haplotype("CAAAA".getBytes()); + pos1.setEventMap(new EventMap(Arrays.asList(vc1))); + pos1.getEventMap().put(1, vc1); final Haplotype pos2 = new Haplotype("ACAAA".getBytes()); pos2.setEventMap(new EventMap(Arrays.asList(vc2))); pos2.getEventMap().put(2, vc2); @@ -440,44 +445,69 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { pos234.getEventMap().put(3, vc3); pos234.getEventMap().put(4, vc4); - // test no phased variants - final Set haplotypes2NoPhase = new HashSet<>(); - haplotypes2NoPhase.add(pos2); - haplotypes2NoPhase.add(pos24); - final Set haplotypes3NoPhase = new HashSet<>(); - haplotypes3NoPhase.add(pos3); - final Set haplotypes4NoPhase = new HashSet<>(); - haplotypes4NoPhase.add(pos4); - haplotypes4NoPhase.add(pos24); - final Map> haplotypeMapNoPhase = new HashMap<>(); - haplotypeMapNoPhase.put(vc2, haplotypes2NoPhase); - haplotypeMapNoPhase.put(vc3, haplotypes3NoPhase); - haplotypeMapNoPhase.put(vc4, haplotypes4NoPhase); - tests.add(new Object[]{calls, haplotypeMapNoPhase, 0, 0}); + final Map> haplotypeMap = new HashMap<>(); - // test 2 phased variants - final Set haplotypes2SomePhase = new HashSet<>(); - haplotypes2SomePhase.add(pos24); - final Set haplotypes4SomePhase = new HashSet<>(); - haplotypes4SomePhase.add(pos24); - final Map> haplotypeMapSomePhase = new HashMap<>(); - haplotypeMapSomePhase.put(vc2, haplotypes2SomePhase); - haplotypeMapSomePhase.put(vc3, haplotypes3NoPhase); - haplotypeMapSomePhase.put(vc4, haplotypes4SomePhase); - tests.add(new Object[]{calls, haplotypeMapSomePhase, 2, 1}); + // test no phased variants #1 + final Set haplotypes2 = new HashSet<>(); + haplotypes2.add(pos2); + haplotypeMap.put(vc2, haplotypes2); + tests.add(new Object[]{Arrays.asList(vc2), new HashMap<>(haplotypeMap), 2, 0, 0, 0, 0}); - // test all phased variants - final Set haplotypes2AllPhase = new HashSet<>(); - haplotypes2AllPhase.add(pos234); - final Set haplotypes3AllPhase = new HashSet<>(); - haplotypes3AllPhase.add(pos234); - final Set haplotypes4AllPhase = new HashSet<>(); - haplotypes4AllPhase.add(pos234); - final Map> haplotypeMapAllPhase = new HashMap<>(); - haplotypeMapAllPhase.put(vc2, haplotypes2AllPhase); - haplotypeMapAllPhase.put(vc3, haplotypes3AllPhase); - haplotypeMapAllPhase.put(vc4, haplotypes4AllPhase); - tests.add(new Object[]{calls, haplotypeMapAllPhase, 3, 1}); + // test no phased variants #2 + final Set haplotypes3 = new HashSet<>(); + haplotypes3.add(pos3); + haplotypeMap.put(vc3, haplotypes3); + tests.add(new Object[]{Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0}); + + // test opposite phase + tests.add(new Object[]{Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 2, 2, 1, 1, 1}); + + // test no phased variants #3 + final Set haplotypes4 = new HashSet<>(); + haplotypes4.add(pos4); + haplotypeMap.put(vc4, haplotypes4); + tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0}); + + // test mixture + final Set haplotypes24 = new HashSet<>(); + haplotypes24.add(pos24); + haplotypeMap.put(vc2, haplotypes24); + haplotypeMap.put(vc4, haplotypes24); + tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 1}); + + // test 2 with opposite phase + final Set haplotypes1 = new HashSet<>(); + haplotypes1.add(pos1); + haplotypeMap.put(vc1, haplotypes1); + haplotypeMap.remove(vc3); + tests.add(new Object[]{Arrays.asList(vc1, vc2, vc4), new HashMap<>(haplotypeMap), 2, 3, 1, 1, 2}); + + // test homs around a het + final Set haplotypes2hom = new HashSet<>(); + haplotypes2hom.add(pos24); + haplotypes2hom.add(pos234); + final Set haplotypes4hom = new HashSet<>(); + haplotypes4hom.add(pos24); + haplotypes4hom.add(pos234); + final Set haplotypes3het = new HashSet<>(); + haplotypes3het.add(pos234); + haplotypeMap.put(vc2, haplotypes2hom); + haplotypeMap.put(vc3, haplotypes3het); + haplotypeMap.put(vc4, haplotypes4hom); + tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 1, 0}); + + // test hets around a hom + final Set haplotypes2het = new HashSet<>(); + haplotypes2het.add(pos234); + final Set haplotypes4het = new HashSet<>(); + haplotypes4het.add(pos234); + final Set haplotypes3hom = new HashSet<>(); + haplotypes3hom.add(pos3); + haplotypes3hom.add(pos234); + haplotypeMap.put(vc2, haplotypes2het); + haplotypeMap.put(vc3, haplotypes3hom); + haplotypeMap.put(vc4, haplotypes4het); + tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 0}); return tests.toArray(new Object[][]{}); } @@ -485,12 +515,25 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { @Test(dataProvider="ConstructPhaseSetMappingProvider") public void testConstructPhaseSetMapping(final List calls, final Map> haplotypeMap, + final int totalHaplotypes, final int expectedMapSize, - final int expectedNumGroups) { - final Map actualPhaseSetMapping = new HashMap<>(); - final int actualNumGroups = HaplotypeCallerGenotypingEngine.constructPhaseSetMapping(calls, haplotypeMap, actualPhaseSetMapping); + final int expectedNumGroups, + final int expectedNum01, + final int expectedNum10) { + final Map> actualPhaseSetMapping = new HashMap<>(); + final int actualNumGroups = HaplotypeCallerGenotypingEngine.constructPhaseSetMapping(calls, haplotypeMap, totalHaplotypes, actualPhaseSetMapping); Assert.assertEquals(actualNumGroups, expectedNumGroups); Assert.assertEquals(actualPhaseSetMapping.size(), expectedMapSize); + + int num01 = 0, num10 = 0; + for ( final Pair phase : actualPhaseSetMapping.values() ) { + if ( phase.second.equals("0|1") ) + num01++; + else if ( phase.second.equals("1|0") ) + num10++; + } + Assert.assertEquals(num01, expectedNum01); + Assert.assertEquals(num10, expectedNum10); } @DataProvider(name = "ConstructPhaseGroupsProvider") @@ -509,27 +552,27 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { final List calls = Arrays.asList(vc1, vc2, vc3); // test no phased variants, empty map - final Map nonePhased1 = new HashMap<>(); + final Map> nonePhased1 = new HashMap<>(); tests.add(new Object[]{calls, nonePhased1, 0, 0, 0}); // test no phased variants, full map, exception expected - final Map nonePhased2 = new HashMap<>(); - nonePhased2.put(vc1, 0); - nonePhased2.put(vc2, 1); - nonePhased2.put(vc3, 2); + final Map> nonePhased2 = new HashMap<>(); + nonePhased2.put(vc1, new Pair<>(0, "0/1")); + nonePhased2.put(vc2, new Pair<>(1, "0/1")); + nonePhased2.put(vc3, new Pair<>(2, "0/1")); tests.add(new Object[]{calls, nonePhased2, 3, -1, -1}); // test 2 phased variants - final Map twoPhased = new HashMap<>(); - twoPhased.put(vc1, 0); - twoPhased.put(vc2, 0); + final Map> twoPhased = new HashMap<>(); + twoPhased.put(vc1, new Pair<>(0, "0/1")); + twoPhased.put(vc2, new Pair<>(0, "0/1")); tests.add(new Object[]{calls, twoPhased, 1, 1, 2}); // test all phased variants - final Map allPhased = new HashMap<>(); - allPhased.put(vc1, 0); - allPhased.put(vc2, 0); - allPhased.put(vc3, 0); + final Map> allPhased = new HashMap<>(); + allPhased.put(vc1, new Pair<>(0, "0/1")); + allPhased.put(vc2, new Pair<>(0, "0/1")); + allPhased.put(vc3, new Pair<>(0, "0/1")); tests.add(new Object[]{calls, allPhased, 1, 1, 3}); return tests.toArray(new Object[][]{}); @@ -537,7 +580,7 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { @Test(dataProvider="ConstructPhaseGroupsProvider") public void testConstructPhaseGroups(final List calls, - final Map phaseMap, + final Map> phaseMap, final int endIndex, final int expectedNumGroups, final int expectedGroupSize) { @@ -553,8 +596,8 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { int counter = 0; for ( final VariantContext call : actualPhasedCalls ) { for ( final Genotype g : call.getGenotypes() ) { - if ( g.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY) ) { - uniqueGroups.add(g.getExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY).toString()); + if ( g.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY) ) { + uniqueGroups.add(g.getExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY).toString()); counter++; } }