matchHaplotypeAlleles() no longer calls alleleSegregationIsKnown(), added a TODO to investigate
This commit is contained in:
parent
56f8e4f9cf
commit
ba949389c5
|
|
@ -63,14 +63,28 @@ import htsjdk.variant.variantcontext.*;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Utility class for phasing analyis
|
||||
* Utility class for phasing analysis
|
||||
*/
|
||||
class PhasingUtils {
|
||||
|
||||
/**
|
||||
* Merge variants into an MNP
|
||||
*
|
||||
* @param genomeLocParser parse the genome locations
|
||||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
* @param referenceFile sequence file containing the reference genome
|
||||
* @param alleleMergeRule
|
||||
* @return merged variant or null if the variants are NOT an SNP or MNP, on the same contig, variant location 1 is the same or after the variant location 2,
|
||||
* their genotypes do NOT have the same number of chromosomes, haplotype, number of attributes as chromosomes, are both hetrozygous or do not abide by the merge rule
|
||||
*/
|
||||
static VariantContext mergeIntoMNP(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile, AlleleMergeRule alleleMergeRule) {
|
||||
|
||||
// Check if variants are an SNP or MNP, on the same contig, variant location 1 is not before variant location 2
|
||||
if (!mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2))
|
||||
return null;
|
||||
|
||||
// Check that it's logically possible to merge the VCs:
|
||||
// Check if variant genotypes have the same number of chromosomes, haplotype, number of attributes as chromosomes, and either genotype is homozygous
|
||||
if (!allSamplesAreMergeable(vc1, vc2))
|
||||
return null;
|
||||
|
||||
|
|
@ -83,6 +97,8 @@ class PhasingUtils {
|
|||
|
||||
/**
|
||||
* Find the alleles with the same haplotype
|
||||
* assumes alleleSegregationIsKnown
|
||||
* TODO - should alleleSegregationIsKnown be called within the method?
|
||||
*
|
||||
* @param gt1 genotype 1
|
||||
* @param gt2 genotype 2
|
||||
|
|
@ -92,11 +108,6 @@ class PhasingUtils {
|
|||
|
||||
final SameHaplotypeAlleles hapAlleles = new SameHaplotypeAlleles();
|
||||
|
||||
// Skip if can not merge the haplotype alleles
|
||||
if ( !alleleSegregationIsKnown(gt1, gt2) ){
|
||||
return hapAlleles;
|
||||
}
|
||||
|
||||
// Get the alleles
|
||||
final int numAlleles = gt1.getPloidy();
|
||||
final Allele[] site1AllelesArray = gt1.getAlleles().toArray(new Allele[numAlleles]);
|
||||
|
|
@ -238,7 +249,7 @@ class PhasingUtils {
|
|||
*
|
||||
* @param vc1
|
||||
* @param vc2
|
||||
* @return merged variant atrtributes
|
||||
* @return merged variant attributes
|
||||
*/
|
||||
static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
|
||||
Map<String, Object> mergedAttribs = new HashMap<String, Object>();
|
||||
|
|
@ -263,12 +274,13 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check if variants can be moved into the MNP
|
||||
* Check if variants can be merged into the MNP
|
||||
*
|
||||
* @param genomeLocParser parse the genome locations
|
||||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
* @return true if variants are an SNP or MNP, on the same contig, location 1 is not before location 2, unfiltered, from the same sample set and are called,
|
||||
* @return true if variants are an SNP or MNP, on the same contig, variant location 1 is not before variant location 2, unfiltered, from the same sample set and are called,
|
||||
* false otherwise
|
||||
*/
|
||||
static boolean mergeIntoMNPvalidationCheck(GenomeLocParser genomeLocParser, VariantContext vc1, VariantContext vc2) {
|
||||
// Can only merge "simple" base strings (i.e., SNPs or MNPs, but not indels):
|
||||
|
|
@ -313,14 +325,14 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check if can merge merge variants if phased or either is a homozygous
|
||||
* Check if can merge variants
|
||||
*
|
||||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
* @return true if variants are phased or either is a homozygous, false otherwise
|
||||
*/
|
||||
static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
|
||||
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1:
|
||||
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
|
|
@ -348,7 +360,7 @@ class PhasingUtils {
|
|||
if (gt1.isHom() || gt2.isHom())
|
||||
return true;
|
||||
|
||||
// If gt1 or gt2 do not read backed phasing haplotype, then can not be merged
|
||||
// If gt1 or gt2 do not have a read backed phasing haplotype, then can not be merged
|
||||
if (!gt1.hasAnyAttribute(ReadBackedPhasing.HP_KEY) || !gt2.hasAnyAttribute(ReadBackedPhasing.HP_KEY))
|
||||
return false;
|
||||
|
||||
|
|
@ -367,19 +379,23 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check of any samples have alternate alleles
|
||||
*
|
||||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
* @return true if
|
||||
* @return true if there is a sample with alternate alleles, false otherwise
|
||||
*/
|
||||
|
||||
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
||||
// Find the alleles with the same haplotype
|
||||
final SameHaplotypeAlleles hapAlleles = matchHaplotypeAlleles(gt1, gt2);
|
||||
|
||||
// Find corresponding alternate alleles
|
||||
for (AlleleOneAndTwo all1all2 : hapAlleles.hapAlleles) {
|
||||
if (all1all2.all1.isNonReference() && all1all2.all2.isNonReference()) // corresponding alleles are alternate
|
||||
if (all1all2.all1.isNonReference() && all1all2.all2.isNonReference())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -388,6 +404,7 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference
|
||||
*
|
||||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
|
|
@ -395,7 +412,6 @@ class PhasingUtils {
|
|||
*/
|
||||
|
||||
static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
|
||||
// Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference):
|
||||
final Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
|
||||
final Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
|
||||
|
||||
|
|
@ -430,7 +446,7 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Class for rule for merging variants
|
||||
*/
|
||||
abstract static class AlleleMergeRule {
|
||||
// vc1, vc2 are ONLY passed to allelesShouldBeMerged() if mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2) AND allSamplesAreMergeable(vc1, vc2):
|
||||
|
|
@ -442,7 +458,7 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Class for sore the alleles with the same haplotype
|
||||
* Class for storing the alleles with the same haplotype
|
||||
*/
|
||||
static class SameHaplotypeAlleles {
|
||||
|
||||
|
|
@ -481,7 +497,7 @@ class PhasingUtils {
|
|||
/**
|
||||
* Get the hah code for alleles 1 and 2
|
||||
*
|
||||
* @return hasb code for alleles 1 and 2
|
||||
* @return hash code for alleles 1 and 2
|
||||
*/
|
||||
public int hashCode() {
|
||||
return all1.hashCode() + all2.hashCode();
|
||||
|
|
@ -540,7 +556,7 @@ class PhasingUtils {
|
|||
* @param all1 allele 1
|
||||
* @param all2 allele 2
|
||||
* @param creatingReferenceForFirstTime
|
||||
* @return
|
||||
* @return merged allele
|
||||
*/
|
||||
private Allele ensureMergedAllele(Allele all1, Allele all2, boolean creatingReferenceForFirstTime) {
|
||||
AlleleOneAndTwo all12 = new AlleleOneAndTwo(all1, all2);
|
||||
|
|
@ -564,6 +580,7 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Get all merged alleles
|
||||
*
|
||||
* @return set of merged alleles values
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -70,21 +70,17 @@ public class PhasingUtilsUnitTest extends BaseTest {
|
|||
|
||||
private final int start = 10;
|
||||
private ReferenceSequenceFile referenceFile;
|
||||
private Allele ref;
|
||||
private Allele alt;
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
referenceFile = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
ref = Allele.create("C", true);
|
||||
alt = Allele.create("T", false);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestMatchHaplotypeAllelesKeyHP() {
|
||||
|
||||
final List<Allele> alleleList = new ArrayList<>();
|
||||
alleleList.add(Allele.create("T", true));
|
||||
alleleList.add(Allele.create("T", false));
|
||||
alleleList.add(Allele.create("C", false));
|
||||
String samelpName = "TC";
|
||||
final Genotype genotypeALT = new GenotypeBuilder(samelpName).attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList).make();
|
||||
|
|
@ -110,9 +106,6 @@ public class PhasingUtilsUnitTest extends BaseTest {
|
|||
// Must match because the same genotype
|
||||
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT);
|
||||
|
||||
for (int i = 0; i < sameHaplotypeAlleles.hapAlleles.size(); i++)
|
||||
System.out.println(sameHaplotypeAlleles.hapAlleles.get(i));
|
||||
|
||||
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles();
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false)));
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false)));
|
||||
|
|
@ -124,10 +117,16 @@ public class PhasingUtilsUnitTest extends BaseTest {
|
|||
public void TestMergeIntoMNPvalidationCheck() {
|
||||
String source = new String("test");
|
||||
String contig = new String("1");
|
||||
final VariantContext variantContextRef = new VariantContextBuilder(source, contig, start, start, Arrays.asList(ref)).make();
|
||||
final VariantContext variantContextAlt = new VariantContextBuilder(source, contig, start, start, Arrays.asList(alt)).make();
|
||||
final List<Allele> alleleList1 = new ArrayList<>();
|
||||
alleleList1.add(Allele.create("T", true));
|
||||
alleleList1.add(Allele.create("C", false));
|
||||
final List<Allele> alleleList2 = new ArrayList<>();
|
||||
alleleList2.add(Allele.create("A", true));
|
||||
alleleList2.add(Allele.create("CC", false));
|
||||
final VariantContext variantContext1 = new VariantContextBuilder(source, contig, start, start, alleleList1).make();
|
||||
final VariantContext variantContext2 = new VariantContextBuilder(source, contig, start, start, alleleList2).make();
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
|
||||
|
||||
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, variantContextRef, variantContextAlt));
|
||||
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, variantContext1, variantContext1));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue