matchHaplotypeAlleles() no longer calls alleleSegregationIsKnown(), added a TODO to investigate

This commit is contained in:
Ron Levine 2014-12-17 14:02:24 -05:00
parent 56f8e4f9cf
commit ba949389c5
2 changed files with 47 additions and 31 deletions

View File

@ -63,14 +63,28 @@ import htsjdk.variant.variantcontext.*;
import java.util.*; import java.util.*;
/** /**
* Utility class for phasing analyis * Utility class for phasing analysis
*/ */
class PhasingUtils { class PhasingUtils {
/**
* Merge variants into an MNP
*
* @param genomeLocParser parse the genome locations
* @param vc1 variant 1
* @param vc2 variant 2
* @param referenceFile sequence file containing the reference genome
* @param alleleMergeRule
* @return merged variant or null if the variants are NOT an SNP or MNP, on the same contig, variant location 1 is the same or after the variant location 2,
* their genotypes do NOT have the same number of chromosomes, haplotype, number of attributes as chromosomes, are both hetrozygous or do not abide by the merge rule
*/
static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) { static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) {
// Check if variants are an SNP or MNP, on the same contig, variant location 1 is not before variant location 2
if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2)) if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2))
return null; return null;
// Check that it's logically possible to merge the VCs: // Check if variant genotypes have the same number of chromosomes, haplotype, number of attributes as chromosomes, and either genotype is homozygous
if (!allSamplesAreMergeable(vc1, vc2)) if (!allSamplesAreMergeable(vc1, vc2))
return null; return null;
@ -83,6 +97,8 @@ class PhasingUtils {
/** /**
* Find the alleles with the same haplotype * Find the alleles with the same haplotype
* assumes alleleSegregationIsKnown
* TODO - should alleleSegregationIsKnown be called within the method?
* *
* @param gt1 genotype 1 * @param gt1 genotype 1
* @param gt2 genotype 2 * @param gt2 genotype 2
@ -92,11 +108,6 @@ class PhasingUtils {
final SameHaplotypeAlleles hapAlleles = new SameHaplotypeAlleles(); final SameHaplotypeAlleles hapAlleles = new SameHaplotypeAlleles();
// Skip if can not merge the haplotype alleles
if ( !alleleSegregationIsKnown(gt1, gt2) ){
return hapAlleles;
}
// Get the alleles // Get the alleles
final int numAlleles = gt1.getPloidy(); final int numAlleles = gt1.getPloidy();
final Allele[] site1AllelesArray = gt1.getAlleles().toArray(new Allele[numAlleles]); final Allele[] site1AllelesArray = gt1.getAlleles().toArray(new Allele[numAlleles]);
@ -238,7 +249,7 @@ class PhasingUtils {
* *
* @param vc1 * @param vc1
* @param vc2 * @param vc2
* @return merged variant atrtributes * @return merged variant attributes
*/ */
static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) { static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
Map<String, Object> mergedAttribs = new HashMap<String, Object>(); Map<String, Object> mergedAttribs = new HashMap<String, Object>();
@ -263,12 +274,13 @@ class PhasingUtils {
} }
/** /**
* Check if variants can be moved into the MNP * Check if variants can be merged into the MNP
* *
* @param genomeLocParser parse the genome locations * @param genomeLocParser parse the genome locations
* @param vc1 variant 1 * @param vc1 variant 1
* @param vc2 variant 2 * @param vc2 variant 2
* @return true if variants are an SNP or MNP, on the same contig, location 1 is not before location 2, unfiltered, from the same sample set and are called, * @return true if variants are an SNP or MNP, on the same contig, variant location 1 is not before variant location 2, unfiltered, from the same sample set and are called,
* false otherwise
*/ */
static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) { static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) {
// Can only merge "simple" base strings (i.e., SNPs or MNPs, but not indels): // Can only merge "simple" base strings (i.e., SNPs or MNPs, but not indels):
@ -313,14 +325,14 @@ class PhasingUtils {
} }
/** /**
* Check if can merge merge variants if phased or either is a homozygous * Check if can merge variants
* *
* @param vc1 variant 1 * @param vc1 variant 1
* @param vc2 variant 2 * @param vc2 variant 2
* @return true if variants are phased or either is a homozygous, false otherwise * @return true if variants are phased or either is a homozygous, false otherwise
*/ */
static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) { static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1: // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1
for (final Genotype gt1 : vc1.getGenotypes()) { for (final Genotype gt1 : vc1.getGenotypes()) {
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
@ -348,7 +360,7 @@ class PhasingUtils {
if (gt1.isHom() || gt2.isHom()) if (gt1.isHom() || gt2.isHom())
return true; return true;
// If gt1 or gt2 do not read backed phasing haplotype, then can not be merged // If gt1 or gt2 do not have a read backed phasing haplotype, then can not be merged
if (!gt1.hasAnyAttribute(ReadBackedPhasing.HP_KEY) || !gt2.hasAnyAttribute(ReadBackedPhasing.HP_KEY)) if (!gt1.hasAnyAttribute(ReadBackedPhasing.HP_KEY) || !gt2.hasAnyAttribute(ReadBackedPhasing.HP_KEY))
return false; return false;
@ -367,19 +379,23 @@ class PhasingUtils {
} }
/** /**
* Check of any samples have alternate alleles
* *
* @param vc1 variant 1 * @param vc1 variant 1
* @param vc2 variant 2 * @param vc2 variant 2
* @return true if * @return true if there is a sample with alternate alleles, false otherwise
*/ */
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
for (final Genotype gt1 : vc1.getGenotypes()) { for (final Genotype gt1 : vc1.getGenotypes()) {
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
// Find the alleles with the same haplotype
final SameHaplotypeAlleles hapAlleles = matchHaplotypeAlleles(gt1, gt2); final SameHaplotypeAlleles hapAlleles = matchHaplotypeAlleles(gt1, gt2);
// Find corresponding alternate alleles
for (AlleleOneAndTwo all1all2 : hapAlleles.hapAlleles) { for (AlleleOneAndTwo all1all2 : hapAlleles.hapAlleles) {
if (all1all2.all1.isNonReference() && all1all2.all2.isNonReference()) // corresponding alleles are alternate if (all1all2.all1.isNonReference() && all1all2.all2.isNonReference())
return true; return true;
} }
} }
@ -388,6 +404,7 @@ class PhasingUtils {
} }
/** /**
* Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference
* *
* @param vc1 variant 1 * @param vc1 variant 1
* @param vc2 variant 2 * @param vc2 variant 2
@ -395,7 +412,6 @@ class PhasingUtils {
*/ */
static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) { static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
// Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference):
final Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>(); final Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
final Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>(); final Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
@ -430,7 +446,7 @@ class PhasingUtils {
} }
/** /**
* * Class for rule for merging variants
*/ */
abstract static class AlleleMergeRule { abstract static class AlleleMergeRule {
// vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2): // vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2):
@ -442,7 +458,7 @@ class PhasingUtils {
} }
/** /**
* Class for sore the alleles with the same haplotype * Class for storing the alleles with the same haplotype
*/ */
static class SameHaplotypeAlleles { static class SameHaplotypeAlleles {
@ -481,7 +497,7 @@ class PhasingUtils {
/** /**
* Get the hah code for alleles 1 and 2 * Get the hah code for alleles 1 and 2
* *
* @return hasb code for alleles 1 and 2 * @return hash code for alleles 1 and 2
*/ */
public int hashCode() { public int hashCode() {
return all1.hashCode() + all2.hashCode(); return all1.hashCode() + all2.hashCode();
@ -540,7 +556,7 @@ class PhasingUtils {
* @param all1 allele 1 * @param all1 allele 1
* @param all2 allele 2 * @param all2 allele 2
* @param creatingReferenceForFirstTime * @param creatingReferenceForFirstTime
* @return * @return merged allele
*/ */
private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) { private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) {
AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2); AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2);
@ -564,6 +580,7 @@ class PhasingUtils {
} }
/** /**
* Get all merged alleles
* *
* @return set of merged alleles values * @return set of merged alleles values
*/ */

View File

@ -70,21 +70,17 @@ public class PhasingUtilsUnitTest extends BaseTest {
private final int start = 10; private final int start = 10;
private ReferenceSequenceFile referenceFile; private ReferenceSequenceFile referenceFile;
private Allele ref;
private Allele alt;
@BeforeClass @BeforeClass
public void init() throws FileNotFoundException { public void init() throws FileNotFoundException {
referenceFile = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); referenceFile = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
ref = Allele.create("C", true);
alt = Allele.create("T", false);
} }
@Test @Test
public void TestMatchHaplotypeAllelesKeyHP() { public void TestMatchHaplotypeAllelesKeyHP() {
final List<Allele> alleleList = new ArrayList<>(); final List<Allele> alleleList = new ArrayList<>();
alleleList.add(Allele.create("T", true)); alleleList.add(Allele.create("T", false));
alleleList.add(Allele.create("C", false)); alleleList.add(Allele.create("C", false));
String samelpName = "TC"; String samelpName = "TC";
final Genotype genotypeALT = new GenotypeBuilder(samelpName).attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList).make(); final Genotype genotypeALT = new GenotypeBuilder(samelpName).attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList).make();
@ -110,9 +106,6 @@ public class PhasingUtilsUnitTest extends BaseTest {
// Must match because the same genotype // Must match because the same genotype
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT); PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT);
for (int i = 0; i < sameHaplotypeAlleles.hapAlleles.size(); i++)
System.out.println(sameHaplotypeAlleles.hapAlleles.get(i));
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles(); PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles();
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false))); sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false)));
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false))); sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false)));
@ -124,10 +117,16 @@ public class PhasingUtilsUnitTest extends BaseTest {
public void TestMergeIntoMNPvalidationCheck() { public void TestMergeIntoMNPvalidationCheck() {
String source = new String("test"); String source = new String("test");
String contig = new String("1"); String contig = new String("1");
final VariantContext variantContextRef = new VariantContextBuilder(source, contig, start, start, Arrays.asList(ref)).make(); final List<Allele> alleleList1 = new ArrayList<>();
final VariantContext variantContextAlt = new VariantContextBuilder(source, contig, start, start, Arrays.asList(alt)).make(); alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("CC", false));
final VariantContext variantContext1 = new VariantContextBuilder(source, contig, start, start, alleleList1).make();
final VariantContext variantContext2 = new VariantContextBuilder(source, contig, start, start, alleleList2).make();
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile); GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, variantContextRef, variantContextAlt)); Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, variantContext1, variantContext1));
} }
} }