Added more tests and documentation

This commit is contained in:
Ron Levine 2014-12-19 12:57:43 -05:00
parent ba949389c5
commit 069398ad46
3 changed files with 142 additions and 40 deletions

View File

@ -74,7 +74,7 @@ class PhasingUtils {
* @param vc1 variant 1
* @param vc2 variant 2
* @param referenceFile sequence file containing the reference genome
* @param alleleMergeRule
* @param alleleMergeRule rule for merging variants
* @return merged variant or null if the variants are NOT an SNP or MNP, on the same contig, variant location 1 is the same or after the variant location 2,
* their genotypes do NOT have the same number of chromosomes, haplotype, number of attributes as chromosomes, are both hetrozygous or do not abide by the merge rule
*/
@ -98,7 +98,7 @@ class PhasingUtils {
/**
* Find the alleles with the same haplotype
* assumes alleleSegregationIsKnown
* TODO - should alleleSegregationIsKnown be called within the method?
* TODO - should alleleSegregationIsKnown be called within this method?
*
* @param gt1 genotype 1
* @param gt2 genotype 2
@ -160,13 +160,13 @@ class PhasingUtils {
}
/**
* Merge variants into an MNP
*
* @param vc1 variant 1
* @param vc2 variant 2
* @param referenceFile sequence file containing the reference genome
* @return variant with the merged MNP
*/
static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
final int startInter = vc1.getEnd() + 1;
final int endInter = vc2.getStart() - 1;
@ -246,25 +246,30 @@ class PhasingUtils {
}
/**
* Get preset attributes and whether they are in vc1 or vc2
* TODO: Will always return an empty map because MERGE_OR_ATTRIBS is empty
*
* @param vc1
* @param vc2
* @return merged variant attributes
* @param vc1 variant 1
* @param vc2 variant 2
* @return whether the preset attributes are in vc1 or vc2
*/
static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
// Map of attribute name to value
Map<String, Object> mergedAttribs = new HashMap<String, Object>();
final List<VariantContext> vcList = new LinkedList<VariantContext>();
vcList.add(vc1);
vcList.add(vc2);
// Attribute of interest
//String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY};
final String[] MERGE_OR_ATTRIBS = {};
for (String orAttrib : MERGE_OR_ATTRIBS) {
boolean attribVal = false;
for (VariantContext vc : vcList) {
// Does the variant have the attribute?
attribVal = vc.getAttributeAsBoolean(orAttrib, false);
if (attribVal) // already true, so no reason to continue:
if ( attribVal ) // already true, so no reason to continue:
break;
}
mergedAttribs.put(orAttrib, attribVal);
@ -335,6 +340,8 @@ class PhasingUtils {
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1
for (final Genotype gt1 : vc1.getGenotypes()) {
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
if ( gt2 == null ) // gt2 does not have sample name
return false;
if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
return false;
@ -385,7 +392,6 @@ class PhasingUtils {
* @param vc2 variant 2
* @return true if there is a sample with alternate alleles, false otherwise
*/
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
for (final Genotype gt1 : vc1.getGenotypes()) {
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
@ -404,13 +410,12 @@ class PhasingUtils {
}
/**
* Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference
* Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference)
*
* @param vc1 variant 1
* @param vc2 variant 2
* @return true if
*/
static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
final Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
final Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
@ -527,12 +532,12 @@ class PhasingUtils {
private int intermediateLength;
/**
* Constructor
*
* @param intermediateBases
* @param intermediateBases array of bases
* @param vc1 variant 1
* @param vc2 variant 2
*/
public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) {
this.mergedAlleles = new HashMap<AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo
this.intermediateBases = intermediateBases;
@ -545,7 +550,7 @@ class PhasingUtils {
*
* @param all1 allele 1
* @param all2 allele 2
* @return
* @return merged allele
*/
public Allele ensureMergedAllele(Allele all1, Allele all2) {
return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor

View File

@ -56,8 +56,8 @@ import htsjdk.variant.variantcontext.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
@ -66,7 +66,7 @@ import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class PhasingUtilsUnitTest extends BaseTest {
public class PhasingUtilsUnitTest extends BaseTest {
private final int start = 10;
private ReferenceSequenceFile referenceFile;
@ -79,54 +79,152 @@ public class PhasingUtilsUnitTest extends BaseTest {
@Test
public void TestMatchHaplotypeAllelesKeyHP() {
final List<Allele> alleleList = new ArrayList<>();
alleleList.add(Allele.create("T", false));
alleleList.add(Allele.create("C", false));
String samelpName = "TC";
final Genotype genotypeALT = new GenotypeBuilder(samelpName).attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList).make();
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", false));
alleleList1.add(Allele.create("C", false));
final Genotype genotype1 = new GenotypeBuilder("TC").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList1).make();
// Must match because the same genotype
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT);
// Attributes in different order
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("G", false));
alleleList2.add(Allele.create("A", false));
final Genotype genotype2 = new GenotypeBuilder("GA").attribute("HP", new String[]{"10-2", "10-1"}).alleles(alleleList2).make();
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotype1, genotype2);
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles();
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false)));
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false)));
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("A", false)));
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("G", false)));
sameHaplotypeAllelesAnswer.requiresSwap = true;
Assert.assertEquals(sameHaplotypeAlleles.hapAlleles, sameHaplotypeAllelesAnswer.hapAlleles);
Assert.assertEquals(sameHaplotypeAlleles.requiresSwap, sameHaplotypeAllelesAnswer.requiresSwap);
}
@Test
public void TestMatchHaplotypeAllelesNotKeyHP() {
public void TestMatchHaplotypeAllelesNoKeyHP() {
final List<Allele> alleleList = new ArrayList<>();
alleleList.add(Allele.create("T", false));
alleleList.add(Allele.create("C", false));
final Genotype genotypeALT = new GenotypeBuilder("TC").PL(new int[]{0, 100, 1000}).alleles(alleleList).make();
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", false));
alleleList1.add(Allele.create("C", false));
final Genotype genotype1 = new GenotypeBuilder("TC").alleles(alleleList1).make();
// Attributes in different order
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("G", false));
alleleList2.add(Allele.create("A", false));
final Genotype genotype2 = new GenotypeBuilder("GA").alleles(alleleList2).make();
// Must match because the same genotype
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT);
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotype1, genotype2);
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles();
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false)));
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false)));
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("G", false)));
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("A", false)));
Assert.assertEquals(sameHaplotypeAlleles.hapAlleles, sameHaplotypeAllelesAnswer.hapAlleles);
Assert.assertEquals(sameHaplotypeAlleles.requiresSwap, sameHaplotypeAllelesAnswer.requiresSwap);
}
@Test
public void TestMergeIntoMNPvalidationCheck() {
String source = new String("test");
String contig = new String("1");
public void TestMergeIntoMNPvalidationTrueCheck() {
final String contig = new String("1");
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("CC", false));
final VariantContext variantContext1 = new VariantContextBuilder(source, contig, start, start, alleleList1).make();
final VariantContext variantContext2 = new VariantContextBuilder(source, contig, start, start, alleleList2).make();
alleleList2.add(Allele.create("C", false));
final VariantContext vc1 = new VariantContextBuilder("TC", contig, start, start, alleleList1).make();
final VariantContext vc2 = new VariantContextBuilder("AC", contig, start+1, start+1, alleleList2).make();
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, variantContext1, variantContext1));
Assert.assertTrue(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2));
}
@Test
public void TestMergeIntoMNPvalidationCheckLocBefore() {
final String contig = new String("1");
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("C", false));
final VariantContext vc1 = new VariantContextBuilder("TC", contig, start+1, start+1, alleleList1).make();
final VariantContext vc2 = new VariantContextBuilder("AC", contig, start, start, alleleList2).make();
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2));
}
@Test
public void TestMergeIntoMNPvalidationDiffContigs() {
final String contig1 = new String("1");
final String contig2 = new String("2");
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("C", false));
final VariantContext vc1 = new VariantContextBuilder("TC", contig1, start+1, start+1, alleleList1).make();
final VariantContext vc2 = new VariantContextBuilder("AC", contig2, start, start, alleleList2).make();
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2));
}
@Test
public void TestMergeVariantContextNames() {
final String result = new String("A_B");
Assert.assertEquals(result, PhasingUtils.mergeVariantContextNames("A", "B"));
}
@Test
public void TestMergeVariantContextAttributes() {
final String contig = new String("1");
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("C", false));
final VariantContext vc1 = new VariantContextBuilder("TC", contig, start, start, alleleList1).make();
final VariantContext vc2 = new VariantContextBuilder("AC", contig, start+1, start+1, alleleList2).make();
Assert.assertEquals(0, PhasingUtils.mergeVariantContextAttributes(vc1, vc2).size());
}
@Test
public void TestAllSamplesAreMergeableDiffSampleNames() {
final String contig = new String("1");
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("C", false));
final Genotype g1 = new GenotypeBuilder().name("sample1").alleles(alleleList1).make();
final VariantContext vc1 = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(g1).make();
Genotype g2 = new GenotypeBuilder().name("sample2").alleles(alleleList2).make();
final VariantContext vc2 = new VariantContextBuilder().chr(contig).id("i21").source("AC").start(start).stop(start).alleles(alleleList2).genotypes(g2).make();
Assert.assertFalse( PhasingUtils.allSamplesAreMergeable(vc1, vc2));
}
@Test
public void TestAlleleSegregationIsKnown(){
}
@Test
public void TestSomeSampleHasDoubleNonReferenceAllele(){
}
@Test
public void TestDoubleAllelesSegregatePerfectlyAmongSamples(){
}
}

View File

@ -96,7 +96,6 @@ public abstract class BaseTest {
public static final String hg18Reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
public static final String hg19Reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
public static final String b36KGReference = "/humgen/1kg/reference/human_b36_both.fasta";
//public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta";
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
public static final String b37KGReferenceWithDecoy = "/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37_decoy.fasta";
public static final String hg19RefereneWithChrPrefixInChromosomeNames = "/humgen/gsa-hpprojects/GATK/bundle/current/hg19/ucsc.hg19.fasta";