diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index f3b7e68ec..7f19861bc 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -73,6 +73,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine NO_CALL = Collections.singletonList(Allele.NO_CALL); private static final int ALLELE_EXTENSION = 2; + private static final String phase01 = "0|1"; + private static final String phase10 = "1|0"; private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger; @@ -404,14 +406,19 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine(uniqueCounter, callIsOnAllHaps ? "1|1" : "0|1")); - phaseSetMapping.put(comp, new Pair<>(uniqueCounter, compIsOnAllHaps ? "1|1" : "0|1")); + // An important note: even for homozygous variants we are setting the phase as "0|1" here. + // We do this because we cannot possibly know for sure at this time that the genotype for this + // sample will actually be homozygous downstream: there are steps in the pipeline that are liable + // to change the genotypes. Because we can't make those assumptions here, we have decided to output + // the phase as if the call is heterozygous and then "fix" it downstream as needed. + phaseSetMapping.put(call, new Pair<>(uniqueCounter, phase01)); + phaseSetMapping.put(comp, new Pair<>(uniqueCounter, phase01)); uniqueCounter++; } // otherwise it's part of an existing group so use that group's unique ID else if ( ! phaseSetMapping.containsKey(comp) ) { final Pair callPhase = phaseSetMapping.get(call); - phaseSetMapping.put(comp, new Pair<>(callPhase.first, compIsOnAllHaps ? "1|1" : callPhase.second)); + phaseSetMapping.put(comp, new Pair<>(callPhase.first, callPhase.second)); } } // if the variants are apart on *all* haplotypes, record that fact @@ -431,14 +438,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine(uniqueCounter, "0|1")); - phaseSetMapping.put(comp, new Pair<>(uniqueCounter, "1|0")); + phaseSetMapping.put(call, new Pair<>(uniqueCounter, phase01)); + phaseSetMapping.put(comp, new Pair<>(uniqueCounter, phase10)); uniqueCounter++; } // otherwise it's part of an existing group so use that group's unique ID else if ( ! phaseSetMapping.containsKey(comp) ){ final Pair callPhase = phaseSetMapping.get(call); - phaseSetMapping.put(comp, new Pair<>(callPhase.first, callPhase.second.equals("0|1") ? "1|0" : "0|1")); + phaseSetMapping.put(comp, new Pair<>(callPhase.first, callPhase.second.equals(phase01) ? phase10 : phase01)); } } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java index 6d83bbbf0..16071e84a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFs.java @@ -52,6 +52,7 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine; import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList; import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList; import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils; +import org.broadinstitute.gatk.tools.walkers.haplotypecaller.HaplotypeCaller; import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.engine.CommandLineGATK; import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection; @@ -218,15 +219,7 @@ public class GenotypeGVCFs extends RodWalker attrs = new HashMap<>(originalVC.getAttributes()); - attrs.put(VCFConstants.MLE_ALLELE_COUNT_KEY, regenotypedVC.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY)); - attrs.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, regenotypedVC.getAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY)); - if (regenotypedVC.hasAttribute(GenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY)) - attrs.put(GenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, regenotypedVC.getAttribute(GenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY)); - - result = new VariantContextBuilder(regenotypedVC).attributes(attrs).make(); + result = addGenotypingAnnotations(originalVC.getAttributes(), regenotypedVC); } // if it turned monomorphic then we either need to ignore or fix such sites @@ -237,18 +230,47 @@ public class GenotypeGVCFs extends RodWalker originalAttributes, final VariantContext newVC) { + // we want to carry forward the attributes from the original VC but make sure to add the MLE-based annotations + final Map attrs = new HashMap<>(originalAttributes); + attrs.put(VCFConstants.MLE_ALLELE_COUNT_KEY, newVC.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY)); + attrs.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, newVC.getAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY)); + if (newVC.hasAttribute(GenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY)) + attrs.put(GenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, newVC.getAttribute(GenotypingEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY)); + + return new VariantContextBuilder(newVC).attributes(attrs).make(); + } + + /** * Cleans up genotype-level annotations that need to be updated. * 1. move MIN_DP to DP if present * 2. propagate DP to AD if not present * 3. remove SB if present + * 4. change the PGT value from "0|1" to "1|1" for homozygous variant genotypes * * @param VC the VariantContext with the Genotypes to fix * @param createRefGTs if true we will also create proper hom ref genotypes since we assume the site is monomorphic @@ -273,6 +295,11 @@ public class GenotypeGVCFs extends RodWalker(haplotypeMap), 2, 3, 1, 2, 1}); + // test 2 hets + haplotypeMap.remove(vc3); + tests.add(new Object[]{Arrays.asList(vc2, vc4), new HashMap<>(haplotypeMap), 1, 2, 1, 2, 0}); + // test 2 with opposite phase final Set haplotypes1 = new HashSet<>(); haplotypes1.add(pos1); haplotypeMap.put(vc1, haplotypes1); - haplotypeMap.remove(vc3); tests.add(new Object[]{Arrays.asList(vc1, vc2, vc4), new HashMap<>(haplotypeMap), 2, 3, 1, 1, 2}); // test homs around a het @@ -498,7 +501,7 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { haplotypeMap.put(vc2, haplotypes2hom); haplotypeMap.put(vc3, haplotypes3het); haplotypeMap.put(vc4, haplotypes4hom); - tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 1, 0}); + tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 3, 0}); // test hets around a hom final Set haplotypes2het = new HashSet<>(); @@ -511,7 +514,7 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest { haplotypeMap.put(vc2, haplotypes2het); haplotypeMap.put(vc3, haplotypes3hom); haplotypeMap.put(vc4, haplotypes4het); - tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 0}); + tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 3, 0}); // test no phased variants around a hom final Set haplotypes2incomplete = new HashSet<>(); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index 158533450..f4ebb596f 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -61,6 +61,15 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { return baseTestString(" -V " + privateTestDir + "gvcf.basepairResolution.vcf " + args, b37KGReference); } + @Test(enabled = true) + public void testUpdatePGT() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf", b37KGReference), + 1, + Arrays.asList("bc3d3eff337836af245b81f52b94d70c")); + executeTest("testUpdatePGT", spec); + } + @Test(enabled = true) public void combineSingleSamplePipelineGVCF() { WalkerTestSpec spec = new WalkerTestSpec( @@ -81,7 +90,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference), 1, - Arrays.asList("f1dd2b9ab422c0b806392464e466ed91")); + Arrays.asList("4193e7c4e6fc889b4aa5a326899b1a4e")); executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec); }