From ba949389c5a53d2422569783e50b3c24cad4949c Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Wed, 17 Dec 2014 14:02:24 -0500 Subject: [PATCH] matchHaplotypeAlleles() no longer calls alleleSegregationIsKnown(), added a TODO to investigate --- .../tools/walkers/phasing/PhasingUtils.java | 57 ++++++++++++------- .../walkers/phasing/PhasingUtilsUnitTest.java | 21 ++++--- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java index 6be2835d0..5167b2c57 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtils.java @@ -63,14 +63,28 @@ import htsjdk.variant.variantcontext.*; import java.util.*; /** - * Utility class for phasing analyis + * Utility class for phasing analysis */ class PhasingUtils { + + /** + * Merge variants into an MNP + * + * @param genomeLocParser parse the genome locations + * @param vc1 variant 1 + * @param vc2 variant 2 + * @param referenceFile sequence file containing the reference genome + * @param alleleMergeRule + * @return merged variant or null if the variants are NOT an SNP or MNP, on the same contig, variant location 1 is the same or after the variant location 2, + * their genotypes do NOT have the same number of chromosomes, haplotype, number of attributes as chromosomes, are both hetrozygous or do not abide by the merge rule + */ static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) { + + // Check if variants are an SNP or MNP, on the same contig, variant location 1 is not before variant location 2 if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2)) return null; - // Check that it's logically possible to merge the VCs: + // Check if variant genotypes have the same number of chromosomes, haplotype, number of attributes as chromosomes, and either genotype is homozygous if (!allSamplesAreMergeable(vc1, vc2)) return null; @@ -83,6 +97,8 @@ class PhasingUtils { /** * Find the alleles with the same haplotype + * assumes alleleSegregationIsKnown + * TODO - should alleleSegregationIsKnown be called within the method? * * @param gt1 genotype 1 * @param gt2 genotype 2 @@ -92,11 +108,6 @@ class PhasingUtils { final SameHaplotypeAlleles hapAlleles = new SameHaplotypeAlleles(); - // Skip if can not merge the haplotype alleles - if ( !alleleSegregationIsKnown(gt1, gt2) ){ - return hapAlleles; - } - // Get the alleles final int numAlleles = gt1.getPloidy(); final Allele[] site1AllelesArray = gt1.getAlleles().toArray(new Allele[numAlleles]); @@ -238,7 +249,7 @@ class PhasingUtils { * * @param vc1 * @param vc2 - * @return merged variant atrtributes + * @return merged variant attributes */ static Map mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) { Map mergedAttribs = new HashMap(); @@ -263,12 +274,13 @@ class PhasingUtils { } /** - * Check if variants can be moved into the MNP + * Check if variants can be merged into the MNP * * @param genomeLocParser parse the genome locations * @param vc1 variant 1 * @param vc2 variant 2 - * @return true if variants are an SNP or MNP, on the same contig, location 1 is not before location 2, unfiltered, from the same sample set and are called, + * @return true if variants are an SNP or MNP, on the same contig, variant location 1 is not before variant location 2, unfiltered, from the same sample set and are called, + * false otherwise */ static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) { // Can only merge "simple" base strings (i.e., SNPs or MNPs, but not indels): @@ -313,14 +325,14 @@ class PhasingUtils { } /** - * Check if can merge merge variants if phased or either is a homozygous + * Check if can merge variants * * @param vc1 variant 1 * @param vc2 variant 2 * @return true if variants are phased or either is a homozygous, false otherwise */ static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) { - // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1: + // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1 for (final Genotype gt1 : vc1.getGenotypes()) { final Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); @@ -348,7 +360,7 @@ class PhasingUtils { if (gt1.isHom() || gt2.isHom()) return true; - // If gt1 or gt2 do not read backed phasing haplotype, then can not be merged + // If gt1 or gt2 do not have a read backed phasing haplotype, then can not be merged if (!gt1.hasAnyAttribute(ReadBackedPhasing.HP_KEY) || !gt2.hasAnyAttribute(ReadBackedPhasing.HP_KEY)) return false; @@ -367,19 +379,23 @@ class PhasingUtils { } /** + * Check of any samples have alternate alleles * * @param vc1 variant 1 * @param vc2 variant 2 - * @return true if + * @return true if there is a sample with alternate alleles, false otherwise */ static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { for (final Genotype gt1 : vc1.getGenotypes()) { final Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); + // Find the alleles with the same haplotype final SameHaplotypeAlleles hapAlleles = matchHaplotypeAlleles(gt1, gt2); + + // Find corresponding alternate alleles for (AlleleOneAndTwo all1all2 : hapAlleles.hapAlleles) { - if (all1all2.all1.isNonReference() && all1all2.all2.isNonReference()) // corresponding alleles are alternate + if (all1all2.all1.isNonReference() && all1all2.all2.isNonReference()) return true; } } @@ -388,6 +404,7 @@ class PhasingUtils { } /** + * Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference * * @param vc1 variant 1 * @param vc2 variant 2 @@ -395,7 +412,6 @@ class PhasingUtils { */ static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) { - // Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference): final Map allele1ToAllele2 = new HashMap(); final Map allele2ToAllele1 = new HashMap(); @@ -430,7 +446,7 @@ class PhasingUtils { } /** - * + * Class for rule for merging variants */ abstract static class AlleleMergeRule { // vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2): @@ -442,7 +458,7 @@ class PhasingUtils { } /** - * Class for sore the alleles with the same haplotype + * Class for storing the alleles with the same haplotype */ static class SameHaplotypeAlleles { @@ -481,7 +497,7 @@ class PhasingUtils { /** * Get the hah code for alleles 1 and 2 * - * @return hasb code for alleles 1 and 2 + * @return hash code for alleles 1 and 2 */ public int hashCode() { return all1.hashCode() + all2.hashCode(); @@ -540,7 +556,7 @@ class PhasingUtils { * @param all1 allele 1 * @param all2 allele 2 * @param creatingReferenceForFirstTime - * @return + * @return merged allele */ private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) { AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2); @@ -564,6 +580,7 @@ class PhasingUtils { } /** + * Get all merged alleles * * @return set of merged alleles values */ diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java index 6737ec255..547eccdaa 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/phasing/PhasingUtilsUnitTest.java @@ -70,21 +70,17 @@ public class PhasingUtilsUnitTest extends BaseTest { private final int start = 10; private ReferenceSequenceFile referenceFile; - private Allele ref; - private Allele alt; @BeforeClass public void init() throws FileNotFoundException { referenceFile = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); - ref = Allele.create("C", true); - alt = Allele.create("T", false); } @Test public void TestMatchHaplotypeAllelesKeyHP() { final List alleleList = new ArrayList<>(); - alleleList.add(Allele.create("T", true)); + alleleList.add(Allele.create("T", false)); alleleList.add(Allele.create("C", false)); String samelpName = "TC"; final Genotype genotypeALT = new GenotypeBuilder(samelpName).attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList).make(); @@ -110,9 +106,6 @@ public class PhasingUtilsUnitTest extends BaseTest { // Must match because the same genotype PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT); - for (int i = 0; i < sameHaplotypeAlleles.hapAlleles.size(); i++) - System.out.println(sameHaplotypeAlleles.hapAlleles.get(i)); - PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles(); sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false))); sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false))); @@ -124,10 +117,16 @@ public class PhasingUtilsUnitTest extends BaseTest { public void TestMergeIntoMNPvalidationCheck() { String source = new String("test"); String contig = new String("1"); - final VariantContext variantContextRef = new VariantContextBuilder(source, contig, start, start, Arrays.asList(ref)).make(); - final VariantContext variantContextAlt = new VariantContextBuilder(source, contig, start, start, Arrays.asList(alt)).make(); + final List alleleList1 = new ArrayList<>(); + alleleList1.add(Allele.create("T", true)); + alleleList1.add(Allele.create("C", false)); + final List alleleList2 = new ArrayList<>(); + alleleList2.add(Allele.create("A", true)); + alleleList2.add(Allele.create("CC", false)); + final VariantContext variantContext1 = new VariantContextBuilder(source, contig, start, start, alleleList1).make(); + final VariantContext variantContext2 = new VariantContextBuilder(source, contig, start, start, alleleList2).make(); GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile); - Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, variantContextRef, variantContextAlt)); + Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, variantContext1, variantContext1)); } }