Added more tests and documentation

This commit is contained in:
Ron Levine 2014-12-19 12:57:43 -05:00
parent ba949389c5
commit 069398ad46
3 changed files with 142 additions and 40 deletions

View File

@ -74,7 +74,7 @@ class PhasingUtils {
* @param vc1 variant 1 * @param vc1 variant 1
* @param vc2 variant 2 * @param vc2 variant 2
* @param referenceFile sequence file containing the reference genome * @param referenceFile sequence file containing the reference genome
* @param alleleMergeRule * @param alleleMergeRule rule for merging variants
* @return merged variant or null if the variants are NOT an SNP or MNP, on the same contig, variant location 1 is the same or after the variant location 2, * @return merged variant or null if the variants are NOT an SNP or MNP, on the same contig, variant location 1 is the same or after the variant location 2,
* their genotypes do NOT have the same number of chromosomes, haplotype, number of attributes as chromosomes, are both hetrozygous or do not abide by the merge rule * their genotypes do NOT have the same number of chromosomes, haplotype, number of attributes as chromosomes, are both hetrozygous or do not abide by the merge rule
*/ */
@ -98,7 +98,7 @@ class PhasingUtils {
/** /**
* Find the alleles with the same haplotype * Find the alleles with the same haplotype
* assumes alleleSegregationIsKnown * assumes alleleSegregationIsKnown
* TODO - should alleleSegregationIsKnown be called within the method? * TODO - should alleleSegregationIsKnown be called within this method?
* *
* @param gt1 genotype 1 * @param gt1 genotype 1
* @param gt2 genotype 2 * @param gt2 genotype 2
@ -160,13 +160,13 @@ class PhasingUtils {
} }
/** /**
* Merge variants into an MNP
* *
* @param vc1 variant 1 * @param vc1 variant 1
* @param vc2 variant 2 * @param vc2 variant 2
* @param referenceFile sequence file containing the reference genome * @param referenceFile sequence file containing the reference genome
* @return variant with the merged MNP * @return variant with the merged MNP
*/ */
static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) { static VariantContext reallyMergeIntoMNP(VariantContext vc1, VariantContext vc2, ReferenceSequenceFile referenceFile) {
final int startInter = vc1.getEnd() + 1; final int startInter = vc1.getEnd() + 1;
final int endInter = vc2.getStart() - 1; final int endInter = vc2.getStart() - 1;
@ -246,23 +246,28 @@ class PhasingUtils {
} }
/** /**
* Get preset attributes and whether they are in vc1 or vc2
* TODO: Will always return an empty map because MERGE_OR_ATTRIBS is empty
* *
* @param vc1 * @param vc1 variant 1
* @param vc2 * @param vc2 variant 2
* @return merged variant attributes * @return whether the preset attributes are in vc1 or vc2
*/ */
static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) { static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
// Map of attribute name to value
Map<String, Object> mergedAttribs = new HashMap<String, Object>(); Map<String, Object> mergedAttribs = new HashMap<String, Object>();
final List<VariantContext> vcList = new LinkedList<VariantContext>(); final List<VariantContext> vcList = new LinkedList<VariantContext>();
vcList.add(vc1); vcList.add(vc1);
vcList.add(vc2); vcList.add(vc2);
// Attribute of interest
//String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY}; //String[] MERGE_OR_ATTRIBS = {VCFConstants.DBSNP_KEY};
final String[] MERGE_OR_ATTRIBS = {}; final String[] MERGE_OR_ATTRIBS = {};
for (String orAttrib : MERGE_OR_ATTRIBS) { for (String orAttrib : MERGE_OR_ATTRIBS) {
boolean attribVal = false; boolean attribVal = false;
for (VariantContext vc : vcList) { for (VariantContext vc : vcList) {
// Does the variant have the attribute?
attribVal = vc.getAttributeAsBoolean(orAttrib, false); attribVal = vc.getAttributeAsBoolean(orAttrib, false);
if ( attribVal ) // already true, so no reason to continue: if ( attribVal ) // already true, so no reason to continue:
break; break;
@ -335,6 +340,8 @@ class PhasingUtils {
// Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1 // Check that each sample's genotype in vc2 is uniquely appendable onto its genotype in vc1
for (final Genotype gt1 : vc1.getGenotypes()) { for (final Genotype gt1 : vc1.getGenotypes()) {
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
if ( gt2 == null ) // gt2 does not have sample name
return false;
if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom if (!alleleSegregationIsKnown(gt1, gt2)) // can merge if: phased, or if either is a hom
return false; return false;
@ -385,7 +392,6 @@ class PhasingUtils {
* @param vc2 variant 2 * @param vc2 variant 2
* @return true if there is a sample with alternate alleles, false otherwise * @return true if there is a sample with alternate alleles, false otherwise
*/ */
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) { static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
for (final Genotype gt1 : vc1.getGenotypes()) { for (final Genotype gt1 : vc1.getGenotypes()) {
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName()); final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
@ -404,13 +410,12 @@ class PhasingUtils {
} }
/** /**
* Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference * Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference)
* *
* @param vc1 variant 1 * @param vc1 variant 1
* @param vc2 variant 2 * @param vc2 variant 2
* @return true if * @return true if
*/ */
static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) { static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
final Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>(); final Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
final Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>(); final Map<Allele, Allele> allele2ToAllele1 = new HashMap<Allele, Allele>();
@ -527,12 +532,12 @@ class PhasingUtils {
private int intermediateLength; private int intermediateLength;
/** /**
* Constructor
* *
* @param intermediateBases * @param intermediateBases array of bases
* @param vc1 variant 1 * @param vc1 variant 1
* @param vc2 variant 2 * @param vc2 variant 2
*/ */
public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) { public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) {
this.mergedAlleles = new HashMap<AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo this.mergedAlleles = new HashMap<AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo
this.intermediateBases = intermediateBases; this.intermediateBases = intermediateBases;
@ -545,7 +550,7 @@ class PhasingUtils {
* *
* @param all1 allele 1 * @param all1 allele 1
* @param all2 allele 2 * @param all2 allele 2
* @return * @return merged allele
*/ */
public Allele ensureMergedAllele(Allele all1, Allele all2) { public Allele ensureMergedAllele(Allele all1, Allele all2) {
return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor return ensureMergedAllele(all1, all2, false); // false <-> since even if all1+all2 = reference, it was already created in the constructor

View File

@ -56,8 +56,8 @@ import htsjdk.variant.variantcontext.*;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
@ -79,54 +79,152 @@ public class PhasingUtilsUnitTest extends BaseTest {
@Test @Test
public void TestMatchHaplotypeAllelesKeyHP() { public void TestMatchHaplotypeAllelesKeyHP() {
final List<Allele> alleleList = new ArrayList<>(); final List<Allele> alleleList1 = new ArrayList<>();
alleleList.add(Allele.create("T", false)); alleleList1.add(Allele.create("T", false));
alleleList.add(Allele.create("C", false)); alleleList1.add(Allele.create("C", false));
String samelpName = "TC"; final Genotype genotype1 = new GenotypeBuilder("TC").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList1).make();
final Genotype genotypeALT = new GenotypeBuilder(samelpName).attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList).make();
// Must match because the same genotype // Attributes in different order
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT); final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("G", false));
alleleList2.add(Allele.create("A", false));
final Genotype genotype2 = new GenotypeBuilder("GA").attribute("HP", new String[]{"10-2", "10-1"}).alleles(alleleList2).make();
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotype1, genotype2);
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles(); PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles();
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false))); sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("A", false)));
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false))); sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("G", false)));
sameHaplotypeAllelesAnswer.requiresSwap = true;
Assert.assertEquals(sameHaplotypeAlleles.hapAlleles, sameHaplotypeAllelesAnswer.hapAlleles); Assert.assertEquals(sameHaplotypeAlleles.hapAlleles, sameHaplotypeAllelesAnswer.hapAlleles);
Assert.assertEquals(sameHaplotypeAlleles.requiresSwap, sameHaplotypeAllelesAnswer.requiresSwap); Assert.assertEquals(sameHaplotypeAlleles.requiresSwap, sameHaplotypeAllelesAnswer.requiresSwap);
} }
@Test @Test
public void TestMatchHaplotypeAllelesNotKeyHP() { public void TestMatchHaplotypeAllelesNoKeyHP() {
final List<Allele> alleleList = new ArrayList<>(); final List<Allele> alleleList1 = new ArrayList<>();
alleleList.add(Allele.create("T", false)); alleleList1.add(Allele.create("T", false));
alleleList.add(Allele.create("C", false)); alleleList1.add(Allele.create("C", false));
final Genotype genotypeALT = new GenotypeBuilder("TC").PL(new int[]{0, 100, 1000}).alleles(alleleList).make(); final Genotype genotype1 = new GenotypeBuilder("TC").alleles(alleleList1).make();
// Attributes in different order
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("G", false));
alleleList2.add(Allele.create("A", false));
final Genotype genotype2 = new GenotypeBuilder("GA").alleles(alleleList2).make();
// Must match because the same genotype // Must match because the same genotype
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotypeALT, genotypeALT); PhasingUtils.SameHaplotypeAlleles sameHaplotypeAlleles = PhasingUtils.matchHaplotypeAlleles(genotype1, genotype2);
PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles(); PhasingUtils.SameHaplotypeAlleles sameHaplotypeAllelesAnswer = new PhasingUtils.SameHaplotypeAlleles();
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("T", false))); sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("T", false), Allele.create("G", false)));
sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("C", false))); sameHaplotypeAllelesAnswer.hapAlleles.add(new PhasingUtils.AlleleOneAndTwo(Allele.create("C", false), Allele.create("A", false)));
Assert.assertEquals(sameHaplotypeAlleles.hapAlleles, sameHaplotypeAllelesAnswer.hapAlleles); Assert.assertEquals(sameHaplotypeAlleles.hapAlleles, sameHaplotypeAllelesAnswer.hapAlleles);
Assert.assertEquals(sameHaplotypeAlleles.requiresSwap, sameHaplotypeAllelesAnswer.requiresSwap); Assert.assertEquals(sameHaplotypeAlleles.requiresSwap, sameHaplotypeAllelesAnswer.requiresSwap);
} }
@Test @Test
public void TestMergeIntoMNPvalidationCheck() { public void TestMergeIntoMNPvalidationTrueCheck() {
String source = new String("test"); final String contig = new String("1");
String contig = new String("1");
final List<Allele> alleleList1 = new ArrayList<>(); final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true)); alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false)); alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>(); final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true)); alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("CC", false)); alleleList2.add(Allele.create("C", false));
final VariantContext variantContext1 = new VariantContextBuilder(source, contig, start, start, alleleList1).make(); final VariantContext vc1 = new VariantContextBuilder("TC", contig, start, start, alleleList1).make();
final VariantContext variantContext2 = new VariantContextBuilder(source, contig, start, start, alleleList2).make(); final VariantContext vc2 = new VariantContextBuilder("AC", contig, start+1, start+1, alleleList2).make();
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile); GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, variantContext1, variantContext1)); Assert.assertTrue(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2));
}
@Test
public void TestMergeIntoMNPvalidationCheckLocBefore() {
final String contig = new String("1");
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("C", false));
final VariantContext vc1 = new VariantContextBuilder("TC", contig, start+1, start+1, alleleList1).make();
final VariantContext vc2 = new VariantContextBuilder("AC", contig, start, start, alleleList2).make();
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2));
}
@Test
public void TestMergeIntoMNPvalidationDiffContigs() {
final String contig1 = new String("1");
final String contig2 = new String("2");
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("C", false));
final VariantContext vc1 = new VariantContextBuilder("TC", contig1, start+1, start+1, alleleList1).make();
final VariantContext vc2 = new VariantContextBuilder("AC", contig2, start, start, alleleList2).make();
GenomeLocParser genomeLocParser = new GenomeLocParser(referenceFile);
Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc2));
}
@Test
public void TestMergeVariantContextNames() {
final String result = new String("A_B");
Assert.assertEquals(result, PhasingUtils.mergeVariantContextNames("A", "B"));
}
@Test
public void TestMergeVariantContextAttributes() {
final String contig = new String("1");
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("C", false));
final VariantContext vc1 = new VariantContextBuilder("TC", contig, start, start, alleleList1).make();
final VariantContext vc2 = new VariantContextBuilder("AC", contig, start+1, start+1, alleleList2).make();
Assert.assertEquals(0, PhasingUtils.mergeVariantContextAttributes(vc1, vc2).size());
}
@Test
public void TestAllSamplesAreMergeableDiffSampleNames() {
final String contig = new String("1");
final List<Allele> alleleList1 = new ArrayList<>();
alleleList1.add(Allele.create("T", true));
alleleList1.add(Allele.create("C", false));
final List<Allele> alleleList2 = new ArrayList<>();
alleleList2.add(Allele.create("A", true));
alleleList2.add(Allele.create("C", false));
final Genotype g1 = new GenotypeBuilder().name("sample1").alleles(alleleList1).make();
final VariantContext vc1 = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(g1).make();
Genotype g2 = new GenotypeBuilder().name("sample2").alleles(alleleList2).make();
final VariantContext vc2 = new VariantContextBuilder().chr(contig).id("i21").source("AC").start(start).stop(start).alleles(alleleList2).genotypes(g2).make();
Assert.assertFalse( PhasingUtils.allSamplesAreMergeable(vc1, vc2));
}
@Test
public void TestAlleleSegregationIsKnown(){
}
@Test
public void TestSomeSampleHasDoubleNonReferenceAllele(){
}
@Test
public void TestDoubleAllelesSegregatePerfectlyAmongSamples(){
} }
} }

View File

@ -96,7 +96,6 @@ public abstract class BaseTest {
public static final String hg18Reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"; public static final String hg18Reference = "/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta";
public static final String hg19Reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta"; public static final String hg19Reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
public static final String b36KGReference = "/humgen/1kg/reference/human_b36_both.fasta"; public static final String b36KGReference = "/humgen/1kg/reference/human_b36_both.fasta";
//public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta";
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta"; public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
public static final String b37KGReferenceWithDecoy = "/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37_decoy.fasta"; public static final String b37KGReferenceWithDecoy = "/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37_decoy.fasta";
public static final String hg19RefereneWithChrPrefixInChromosomeNames = "/humgen/gsa-hpprojects/GATK/bundle/current/hg19/ucsc.hg19.fasta"; public static final String hg19RefereneWithChrPrefixInChromosomeNames = "/humgen/gsa-hpprojects/GATK/bundle/current/hg19/ucsc.hg19.fasta";