Added more tests and documentation
This commit is contained in:
parent
ba949389c5
commit
069398ad46
|
|
@ -74,7 +74,7 @@ class PhasingUtils {
|
|||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
* @param referenceFile sequence file containing the reference genome
|
||||
* @param alleleMergeRule
|
||||
* @param alleleMergeRule rule for merging variants
|
||||
* @return merged variant or null if the variants are NOT an SNP or MNP, on the same contig, variant location 1 is the same or after the variant location 2,
|
||||
* their genotypes do NOT have the same number of chromosomes, haplotype, number of attributes as chromosomes, are both hetrozygous or do not abide by the merge rule
|
||||
*/
|
||||
|
|
@ -98,7 +98,7 @@ class PhasingUtils {
|
|||
/**
|
||||
* Find the alleles with the same haplotype
|
||||
* assumes alleleSegregationIsKnown
|
||||
* TODO - should alleleSegregationIsKnown be called within the method?
|
||||
* TODO - should alleleSegregationIsKnown be called within this method?
|
||||
*
|
||||
* @param gt1 genotype 1
|
||||
* @param gt2 genotype 2
|
||||
|
|
@ -160,13 +160,13 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Merge variants into an MNP
|
||||
*
|
||||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
* @param referenceFile sequence file containing the reference genome
|
||||
* @return variant with the merged MNP
|
||||
*/
|
||||
|
||||
static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
|
||||
final int startInter = vc1.getEnd() + 1;
|
||||
final int endInter = vc2.getStart() - 1;
|
||||
|
|
@ -246,25 +246,30 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Get preset attributes and whether they are in vc1 or vc2
|
||||
* TODO: Will always return an empty map because MERGE_OR_ATTRIBS is empty
|
||||
*
|
||||
* @param vc1
|
||||
* @param vc2
|
||||
* @return merged variant attributes
|
||||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
* @return whether the preset attributes are in vc1 or vc2
|
||||
*/
|
||||
static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
|
||||
// Map of attribute name to value
|
||||
Map<String, Object> mergedAttribs = new HashMap<String, Object>();
|
||||
|
||||
final List<VariantContext> vcList = new LinkedList<VariantContext>();
|
||||
vcList.add(vc1);
|
||||
vcList.add(vc2);
|
||||
|
||||
// Attribute of interest
|
||||
//String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY};
|
||||
final String[] MERGE_OR_ATTRIBS = {};
|
||||
for (String orAttrib : MERGE_OR_ATTRIBS) {
|
||||
boolean attribVal = false;
|
||||
for (VariantContext vc : vcList) {
|
||||
// Does the variant have the attribute?
|
||||
attribVal = vc.getAttributeAsBoolean(orAttrib, false);
|
||||
if (attribVal) // already true, so no reason to continue:
|
||||
if ( attribVal ) // already true, so no reason to continue:
|
||||
break;
|
||||
}
|
||||
mergedAttribs.put(orAttrib, attribVal);
|
||||
|
|
@ -335,6 +340,8 @@ class PhasingUtils {
|
|||
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
if ( gt2 == null ) // gt2 does not have sample name
|
||||
return false;
|
||||
|
||||
if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
|
||||
return false;
|
||||
|
|
@ -385,7 +392,6 @@ class PhasingUtils {
|
|||
* @param vc2 variant 2
|
||||
* @return true if there is a sample with alternate alleles, false otherwise
|
||||
*/
|
||||
|
||||
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
|
||||
for (final Genotype gt1 : vc1.getGenotypes()) {
|
||||
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
|
||||
|
|
@ -404,13 +410,12 @@ class PhasingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference
|
||||
* Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference)
|
||||
*
|
||||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
* @return true if
|
||||
*/
|
||||
|
||||
static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
|
||||
final Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
|
||||
final Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
|
||||
|
|
@ -527,12 +532,12 @@ class PhasingUtils {
|
|||
private int intermediateLength;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param intermediateBases
|
||||
* @param intermediateBases array of bases
|
||||
* @param vc1 variant 1
|
||||
* @param vc2 variant 2
|
||||
*/
|
||||
|
||||
public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) {
|
||||
this.mergedAlleles = new HashMap<AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo
|
||||
this.intermediateBases = intermediateBases;
|
||||
|
|
@ -545,7 +550,7 @@ class PhasingUtils {
|
|||
*
|
||||
* @param all1 allele 1
|
||||
* @param all2 allele 2
|
||||
* @return
|
||||
* @return merged allele
|
||||
*/
|
||||
public Allele ensureMergedAllele(Allele all1, Allele all2) {
|
||||
return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor
|
||||
|
|
|
|||
|
|
@ -56,8 +56,8 @@ import htsjdk.variant.variantcontext.*;
|
|||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.BaseTest;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
@ -66,7 +66,7 @@ import org.testng.annotations.BeforeClass;
|
|||
import org.testng.annotations.Test;
|
||||
|
||||
|
||||
public class PhasingUtilsUnitTest extends BaseTest {
|
||||
public class PhasingUtilsUnitTest extends BaseTest {
|
||||
|
||||
private final int start = 10;
|
||||
private ReferenceSequenceFile referenceFile;
|
||||
|
|
@ -79,54 +79,152 @@ public class PhasingUtilsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void TestMatchHaplotypeAllelesKeyHP() {
|
||||
|
||||
final List<Allele> alleleList = new ArrayList<>();
|
||||
alleleList.add(Allele.create("T", false));
|
||||
alleleList.add(Allele.create("C", false));
|
||||
String samelpName = "TC";
|
||||
final Genotype genotypeALT = new GenotypeBuilder(samelpName).attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList).make();
|
||||
final List<Allele> alleleList1 = new ArrayList<>();
|
||||
alleleList1.add(Allele.create("T", false));
|
||||
alleleList1.add(Allele.create("C", false));
|
||||
final Genotype genotype1 = new GenotypeBuilder("TC").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList1).make();
|
||||
|
||||
// Must match because the same genotype
|
||||
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT);
|
||||
// Attributes in different order
|
||||
final List<Allele> alleleList2 = new ArrayList<>();
|
||||
alleleList2.add(Allele.create("G", false));
|
||||
alleleList2.add(Allele.create("A", false));
|
||||
final Genotype genotype2 = new GenotypeBuilder("GA").attribute("HP", new String[]{"10-2", "10-1"}).alleles(alleleList2).make();
|
||||
|
||||
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotype1, genotype2);
|
||||
|
||||
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles();
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false)));
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false)));
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("A", false)));
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("G", false)));
|
||||
sameHaplotypeAllelesAnswer.requiresSwap = true;
|
||||
|
||||
Assert.assertEquals(sameHaplotypeAlleles.hapAlleles, sameHaplotypeAllelesAnswer.hapAlleles);
|
||||
Assert.assertEquals(sameHaplotypeAlleles.requiresSwap, sameHaplotypeAllelesAnswer.requiresSwap);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestMatchHaplotypeAllelesNotKeyHP() {
|
||||
public void TestMatchHaplotypeAllelesNoKeyHP() {
|
||||
|
||||
final List<Allele> alleleList = new ArrayList<>();
|
||||
alleleList.add(Allele.create("T", false));
|
||||
alleleList.add(Allele.create("C", false));
|
||||
final Genotype genotypeALT = new GenotypeBuilder("TC").PL(new int[]{0, 100, 1000}).alleles(alleleList).make();
|
||||
final List<Allele> alleleList1 = new ArrayList<>();
|
||||
alleleList1.add(Allele.create("T", false));
|
||||
alleleList1.add(Allele.create("C", false));
|
||||
final Genotype genotype1 = new GenotypeBuilder("TC").alleles(alleleList1).make();
|
||||
|
||||
// Attributes in different order
|
||||
final List<Allele> alleleList2 = new ArrayList<>();
|
||||
alleleList2.add(Allele.create("G", false));
|
||||
alleleList2.add(Allele.create("A", false));
|
||||
final Genotype genotype2 = new GenotypeBuilder("GA").alleles(alleleList2).make();
|
||||
|
||||
// Must match because the same genotype
|
||||
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT);
|
||||
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotype1, genotype2);
|
||||
|
||||
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles();
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false)));
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false)));
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("G", false)));
|
||||
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("A", false)));
|
||||
Assert.assertEquals(sameHaplotypeAlleles.hapAlleles, sameHaplotypeAllelesAnswer.hapAlleles);
|
||||
Assert.assertEquals(sameHaplotypeAlleles.requiresSwap, sameHaplotypeAllelesAnswer.requiresSwap);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestMergeIntoMNPvalidationCheck() {
|
||||
String source = new String("test");
|
||||
String contig = new String("1");
|
||||
public void TestMergeIntoMNPvalidationTrueCheck() {
|
||||
final String contig = new String("1");
|
||||
final List<Allele> alleleList1 = new ArrayList<>();
|
||||
alleleList1.add(Allele.create("T", true));
|
||||
alleleList1.add(Allele.create("C", false));
|
||||
final List<Allele> alleleList2 = new ArrayList<>();
|
||||
alleleList2.add(Allele.create("A", true));
|
||||
alleleList2.add(Allele.create("CC", false));
|
||||
final VariantContext variantContext1 = new VariantContextBuilder(source, contig, start, start, alleleList1).make();
|
||||
final VariantContext variantContext2 = new VariantContextBuilder(source, contig, start, start, alleleList2).make();
|
||||
alleleList2.add(Allele.create("C", false));
|
||||
final VariantContext vc1 = new VariantContextBuilder("TC", contig, start, start, alleleList1).make();
|
||||
final VariantContext vc2 = new VariantContextBuilder("AC", contig, start+1, start+1, alleleList2).make();
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
|
||||
|
||||
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, variantContext1, variantContext1));
|
||||
Assert.assertTrue(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestMergeIntoMNPvalidationCheckLocBefore() {
|
||||
final String contig = new String("1");
|
||||
final List<Allele> alleleList1 = new ArrayList<>();
|
||||
alleleList1.add(Allele.create("T", true));
|
||||
alleleList1.add(Allele.create("C", false));
|
||||
final List<Allele> alleleList2 = new ArrayList<>();
|
||||
alleleList2.add(Allele.create("A", true));
|
||||
alleleList2.add(Allele.create("C", false));
|
||||
final VariantContext vc1 = new VariantContextBuilder("TC", contig, start+1, start+1, alleleList1).make();
|
||||
final VariantContext vc2 = new VariantContextBuilder("AC", contig, start, start, alleleList2).make();
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
|
||||
|
||||
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestMergeIntoMNPvalidationDiffContigs() {
|
||||
final String contig1 = new String("1");
|
||||
final String contig2 = new String("2");
|
||||
final List<Allele> alleleList1 = new ArrayList<>();
|
||||
alleleList1.add(Allele.create("T", true));
|
||||
alleleList1.add(Allele.create("C", false));
|
||||
final List<Allele> alleleList2 = new ArrayList<>();
|
||||
alleleList2.add(Allele.create("A", true));
|
||||
alleleList2.add(Allele.create("C", false));
|
||||
final VariantContext vc1 = new VariantContextBuilder("TC", contig1, start+1, start+1, alleleList1).make();
|
||||
final VariantContext vc2 = new VariantContextBuilder("AC", contig2, start, start, alleleList2).make();
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
|
||||
|
||||
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestMergeVariantContextNames() {
|
||||
final String result = new String("A_B");
|
||||
Assert.assertEquals(result, PhasingUtils.mergeVariantContextNames("A", "B"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestMergeVariantContextAttributes() {
|
||||
final String contig = new String("1");
|
||||
final List<Allele> alleleList1 = new ArrayList<>();
|
||||
alleleList1.add(Allele.create("T", true));
|
||||
alleleList1.add(Allele.create("C", false));
|
||||
final List<Allele> alleleList2 = new ArrayList<>();
|
||||
alleleList2.add(Allele.create("A", true));
|
||||
alleleList2.add(Allele.create("C", false));
|
||||
final VariantContext vc1 = new VariantContextBuilder("TC", contig, start, start, alleleList1).make();
|
||||
final VariantContext vc2 = new VariantContextBuilder("AC", contig, start+1, start+1, alleleList2).make();
|
||||
|
||||
Assert.assertEquals(0, PhasingUtils.mergeVariantContextAttributes(vc1, vc2).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestAllSamplesAreMergeableDiffSampleNames() {
|
||||
final String contig = new String("1");
|
||||
final List<Allele> alleleList1 = new ArrayList<>();
|
||||
alleleList1.add(Allele.create("T", true));
|
||||
alleleList1.add(Allele.create("C", false));
|
||||
final List<Allele> alleleList2 = new ArrayList<>();
|
||||
alleleList2.add(Allele.create("A", true));
|
||||
alleleList2.add(Allele.create("C", false));
|
||||
|
||||
final Genotype g1 = new GenotypeBuilder().name("sample1").alleles(alleleList1).make();
|
||||
final VariantContext vc1 = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(g1).make();
|
||||
Genotype g2 = new GenotypeBuilder().name("sample2").alleles(alleleList2).make();
|
||||
final VariantContext vc2 = new VariantContextBuilder().chr(contig).id("i21").source("AC").start(start).stop(start).alleles(alleleList2).genotypes(g2).make();
|
||||
|
||||
Assert.assertFalse( PhasingUtils.allSamplesAreMergeable(vc1, vc2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestAlleleSegregationIsKnown(){
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestSomeSampleHasDoubleNonReferenceAllele(){
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestDoubleAllelesSegregatePerfectlyAmongSamples(){
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,7 +96,6 @@ public abstract class BaseTest {
|
|||
public static final String hg18Reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
|
||||
public static final String hg19Reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
|
||||
public static final String b36KGReference = "/humgen/1kg/reference/human_b36_both.fasta";
|
||||
//public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta";
|
||||
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
|
||||
public static final String b37KGReferenceWithDecoy = "/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37_decoy.fasta";
|
||||
public static final String hg19RefereneWithChrPrefixInChromosomeNames = "/humgen/gsa-hpprojects/GATK/bundle/current/hg19/ucsc.hg19.fasta";
|
||||
|
|
|
|||
Loading…
Reference in New Issue