Add TestMergeIntoMNP() and TestReallyMergeIntoMNP()

This commit is contained in:
Ron Levine 2015-01-01 09:51:20 -05:00
parent bb94833750
commit 85dc703461
2 changed files with 107 additions and 43 deletions

View File

@ -71,8 +71,8 @@ class PhasingUtils {
* Merge variants into an MNP
*
* @param genomeLocParser parse the genome locations
* @param vc1 variant 1
* @param vc2 variant 2
* @param vc1 variant context 1
* @param vc2 variant context 2
* @param referenceFile sequence file containing the reference genome
* @param alleleMergeRule rule for merging variants
* @return merged variant or null if the variants are NOT an SNP or MNP, on the same contig, variant location 1 is the same or after the variant location 2,
@ -162,8 +162,8 @@ class PhasingUtils {
/**
* Merge variants into an MNP
*
* @param vc1 variant 1
* @param vc2 variant 2
* @param vc1 variant context 1
* @param vc2 variant context 2
* @param referenceFile sequence file containing the reference genome
* @return variant with the merged MNP
*/
@ -171,16 +171,21 @@ class PhasingUtils {
final int startInter = vc1.getEnd() + 1;
final int endInter = vc2.getStart() - 1;
byte[] intermediateBases = null;
// get bases between vc1 and vc2 in the reference sequence
if (startInter <= endInter) {
intermediateBases = referenceFile.getSubsequenceAt(vc1.getChr(), startInter, endInter).getBases();
StringUtil.toUpperCase(intermediateBases);
}
// merge the reference bases with vc1 and vc2
final MergedAllelesData mergeData = new MergedAllelesData(intermediateBases, vc1, vc2); // ensures that the reference allele is added
final GenotypesContext mergedGenotypes = GenotypesContext.create();
for (final Genotype gt1 : vc1.getGenotypes()) {
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
// Alleles with the same haplotype
final SameHaplotypeAlleles hapAlleles = matchHaplotypeAlleles(gt1, gt2);
boolean isPhased = gt1.isPhased() && gt2.isPhased();
@ -197,6 +202,7 @@ class PhasingUtils {
final Map<String, Object> mergedGtAttribs = new HashMap<String, Object>();
// get the min read backed phasing quality
double PQ = Double.MAX_VALUE;
if (gt1.hasAnyAttribute(ReadBackedPhasing.PQ_KEY)) {
PQ = Math.min(PQ, (double) gt1.getAnyAttribute(ReadBackedPhasing.PQ_KEY));
@ -207,6 +213,7 @@ class PhasingUtils {
if (PQ != Double.MAX_VALUE)
mergedGtAttribs.put(ReadBackedPhasing.PQ_KEY, PQ);
// get the read backed phasing phasing haplotype identifier
if (gt1.hasAnyAttribute(ReadBackedPhasing.HP_KEY)) {
mergedGtAttribs.put(ReadBackedPhasing.HP_KEY, gt1.getAnyAttribute(ReadBackedPhasing.HP_KEY));
}
@ -214,21 +221,24 @@ class PhasingUtils {
mergedGtAttribs.put(ReadBackedPhasing.HP_KEY, gt2.getAnyAttribute(ReadBackedPhasing.HP_KEY));
}
// make the merged genotype
final Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).log10PError(mergedGQ).attributes(mergedGtAttribs).phased(isPhased).make();
mergedGenotypes.add(mergedGt);
}
// get the merged name
final String mergedName = mergeVariantContextNames(vc1.getSource(), vc2.getSource());
final double mergedLog10PError = Math.min(vc1.getLog10PError(), vc2.getLog10PError());
final Set<String> mergedFilters = new HashSet<String>(); // Since vc1 and vc2 were unfiltered, the merged record remains unfiltered
final Map<String, Object> mergedAttribs = mergeVariantContextAttributes(vc1, vc2);
// ids
// get the merged ID
final List<String> mergedIDs = new ArrayList<String>();
if ( vc1.hasID() ) mergedIDs.add(vc1.getID());
if ( vc2.hasID() ) mergedIDs.add(vc2.getID());
final String mergedID = mergedIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(VCFConstants.ID_FIELD_SEPARATOR, mergedIDs);
// make the merged variant context
final VariantContextBuilder mergedBuilder = new VariantContextBuilder(mergedName, vc1.getChr(), vc1.getStart(), vc2.getEnd(), mergeData.getAllMergedAlleles()).id(mergedID).genotypes(mergedGenotypes).log10PError(mergedLog10PError).filters(mergedFilters).attributes(mergedAttribs);
VariantContextUtils.calculateChromosomeCounts(mergedBuilder, true);
return mergedBuilder.make();
@ -237,8 +247,8 @@ class PhasingUtils {
/**
* Merge two variant context names together with an underscore betwwen them
*
* @param name1 variant 1 name
* @param name2 variant 2 name
* @param name1 variant context 1 name
* @param name2 variant context 2 name
* @return combined variant names
*/
static String mergeVariantContextNames(String name1, String name2) {
@ -249,8 +259,8 @@ class PhasingUtils {
* Get preset attributes and whether they are in vc1 or vc2
* TODO: Will always return an empty map because MERGE_OR_ATTRIBS is empty
*
* @param vc1 variant 1
* @param vc2 variant 2
* @param vc1 variant context 1
* @param vc2 variant context 2
* @return whether the preset attributes are in vc1 or vc2
*/
static Map<String, Object> mergeVariantContextAttributes(VariantContext vc1, VariantContext vc2) {
@ -282,8 +292,8 @@ class PhasingUtils {
* Check if variants can be merged into the MNP
*
* @param genomeLocParser parse the genome locations
* @param vc1 variant 1
* @param vc2 variant 2
* @param vc1 variant context 1
* @param vc2 variant context 2
* @return true if variants are an SNP or MNP, on the same contig, variant location 1 is not before variant location 2, unfiltered, from the same sample set and are called,
* false otherwise
*/
@ -301,7 +311,7 @@ class PhasingUtils {
if (!loc1.onSameContig(loc2))
return false;
// Variant 1 location must not be before variant 2
// Variant 1 location must not be before variant context 2
if (!loc1.isBefore(loc2))
return false;
@ -332,8 +342,8 @@ class PhasingUtils {
/**
* Check if can merge variants
*
* @param vc1 variant 1
* @param vc2 variant 2
* @param vc1 variant context 1
* @param vc2 variant context 2
* @return true if variants are phased or either is a homozygous, false otherwise
*/
static boolean allSamplesAreMergeable(VariantContext vc1, VariantContext vc2) {
@ -386,23 +396,26 @@ class PhasingUtils {
}
/**
* Check of any samples have alternate alleles
* Check if some samples have double alternate alleles
*
* @param vc1 variant 1
* @param vc2 variant 2
* @return true if there is a sample with alternate alleles, false otherwise
* @param vc1 variant context 1
* @param vc2 variant context 2
* @return true if there is a sample with double alternate alleles, false otherwise
*/
static boolean someSampleHasDoubleNonReferenceAllele(VariantContext vc1, VariantContext vc2) {
for (final Genotype gt1 : vc1.getGenotypes()) {
// gt2 from the same sample as gt1
final Genotype gt2 = vc2.getGenotype(gt1.getSampleName());
// Find the alleles with the same haplotype
final SameHaplotypeAlleles hapAlleles = matchHaplotypeAlleles(gt1, gt2);
if ( gt2 != null ) {
// Find the alleles with the same haplotype
final SameHaplotypeAlleles hapAlleles = matchHaplotypeAlleles(gt1, gt2);
// Find corresponding alternate alleles
for (AlleleOneAndTwo all1all2 : hapAlleles.hapAlleles) {
if (all1all2.all1.isNonReference() && all1all2.all2.isNonReference())
return true;
// Find corresponding alternate alleles
for (AlleleOneAndTwo all1all2 : hapAlleles.hapAlleles) {
if (all1all2.all1.isNonReference() && all1all2.all2.isNonReference())
return true;
}
}
}
@ -412,9 +425,9 @@ class PhasingUtils {
/**
* Check that Alleles at vc1 and at vc2 always segregate together in all samples (including reference)
*
* @param vc1 variant 1
* @param vc2 variant 2
* @return true if
* @param vc1 variant context 1
* @param vc2 variant context 2
* @return true if alleles segregate together, false otherwise
*/
static boolean doubleAllelesSegregatePerfectlyAmongSamples(VariantContext vc1, VariantContext vc2) {
final Map<Allele, Allele> allele1ToAllele2 = new HashMap<Allele, Allele>();
@ -535,18 +548,20 @@ class PhasingUtils {
* Constructor
*
* @param intermediateBases array of bases
* @param vc1 variant 1
* @param vc2 variant 2
* @param vc1 variant context 1
* @param vc2 variant context 2
*/
public MergedAllelesData(byte[] intermediateBases, VariantContext vc1, VariantContext vc2) {
this.mergedAlleles = new HashMap<AlleleOneAndTwo, Allele>(); // implemented equals() and hashCode() for AlleleOneAndTwo
this.intermediateBases = intermediateBases;
this.intermediateLength = this.intermediateBases != null ? this.intermediateBases.length : 0;
// merge the reference bases from vc1 before and vc2 after the reference (intermediate) bases
this.ensureMergedAllele(vc1.getReference(), vc2.getReference(), true);
}
/**
* Ensure that the alleles are merged. The merged allele is alternate.
*
* @param all1 allele 1
* @param all2 allele 2
@ -558,11 +573,11 @@ class PhasingUtils {
/**
* Ensure that the alleles are merged.
* Put the bases from all1 before and all2 after the previously merged bases
* Put the bases from all1 before and all2 after the intermediate bases
*
* @param all1 allele 1
* @param all2 allele 2
* @param isRef is this a reference allele
* @param isRef if true, merged allele is reference, if false, merged allele is alternate
* @return merged allele
*/
private Allele ensureMergedAllele(Allele all1, Allele all2, boolean isRef) {

View File

@ -56,6 +56,7 @@ import htsjdk.variant.variantcontext.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
@ -65,7 +66,6 @@ import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
class AlwaysTrueMergeRule extends PhasingUtils.AlleleMergeRule {
@ -93,8 +93,8 @@ public class PhasingUtilsUnitTest extends BaseTest {
genomeLocParser = new GenomeLocParser(referenceFile);
alleleList1 = Arrays.asList(Allele.create("T", true), Allele.create("C", false));
alleleList2 = Arrays.asList(Allele.create("G", true), Allele.create("A", false));
genotype1 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList1).make();
genotype2 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-2", "10-1"}).alleles(alleleList2).make();
genotype1 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).attribute("PQ", 100.0).alleles(alleleList1).make();
genotype2 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-2", "10-1"}).attribute("PQ", 200.0).alleles(alleleList2).make();
contig = new String("1");
vc1 = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(genotype1).make();
vc2 = new VariantContextBuilder().chr(contig).id("id2").source("GA").start(start+1).stop(start+1).alleles(alleleList2).genotypes(genotype2).make();
@ -187,13 +187,22 @@ public class PhasingUtilsUnitTest extends BaseTest {
}
@Test
public void TestSomeSampleHasDoubleNonReferenceAllele(){
public void TestSomeSampleHasDoubleNonReferenceAlleleTrue(){
Genotype genotype = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList2).make();
VariantContext vc = new VariantContextBuilder().chr(contig).id("id2").source("GA").start(start+1).stop(start+1).alleles(alleleList2).genotypes(genotype).make();
Assert.assertTrue(PhasingUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc));
}
@Test
public void TestSomeSampleHasDoubleNonReferenceAlleleFalse(){
Assert.assertFalse(PhasingUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc2));
}
@Test
public void TestDoubleAllelesSegregatePerfectlyAmongSamples(){
Assert.assertFalse(PhasingUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc2));
Genotype genotype = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList2).make();
VariantContext vc = new VariantContextBuilder().chr(contig).id("id2").source("GA").start(start+1).stop(start+1).alleles(alleleList2).genotypes(genotype).make();
Assert.assertTrue(PhasingUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc));
}
@Test
@ -207,18 +216,58 @@ public class PhasingUtilsUnitTest extends BaseTest {
AlwaysTrueMergeRule alleleMergeRule = new AlwaysTrueMergeRule();
VariantContext vc = PhasingUtils.mergeIntoMNP(genomeLocParser, vc1, vc2, referenceFile, alleleMergeRule);
Map<String,Object> attributes = new HashMap<String,Object>();
attributes.put("AC", new String[]{"1", "1"});
attributes.put("AF", new String[]{"0.5", "0.5"});
attributes.put("AN", new String("2"));
List<Allele> alleleList = Arrays.asList(Allele.create("TG", true), Allele.create("TA", false), Allele.create("CG", false));
Genotype genotype = new GenotypeBuilder().name("sample").attributes(attributes).alleles(alleleList).make();
final VariantContext vcExpected = new VariantContextBuilder().chr(contig).id("id1;id2").source("TC_AC").start(start).stop(start+1).alleles(alleleList).genotypes(genotype).make();
Map<String,Object> attributes = new HashMap<String,Object>(){{
put("AC", new ArrayList<Integer>(Arrays.asList(1, 1)));
put("AF", new ArrayList<Double>(Arrays.asList(0.5, 0.5)));
put("AN", 2);
}};
final Map<String, Object> extendedAttributes = new HashMap<String, Object>(){{
put("PQ", 100.0); put("HP", new String[]{"10-1", "10-2"});
}};
List<Allele> alleleListMeged = Arrays.asList(Allele.create("TA"), Allele.create("CG"));
Genotype genotype = new GenotypeBuilder().name("sample").attributes(extendedAttributes).alleles(alleleListMeged).make();
final VariantContext vcExpected = new VariantContextBuilder().chr(contig).id("id1;id2").source("TC_GA").start(start).stop(start+1).alleles(alleleList).genotypes(genotype).attributes(attributes).make();
Assert.assertTrue(genotype.sameGenotype(vcExpected.getGenotypes().get("sample")));
Assert.assertTrue(vcExpected.hasSameAllelesAs(vc));
Assert.assertEquals(vcExpected.getChr(), vc.getChr());
Assert.assertEquals(vcExpected.getStart(), vc.getStart());
Assert.assertEquals(vcExpected.getEnd(), vc.getEnd());
Assert.assertEquals(vcExpected.getID(), vc.getID());
Assert.assertEquals(vcExpected.getSource(), vc.getSource());
Assert.assertEquals(vcExpected.isFiltered(), vc.isFiltered());
Assert.assertEquals(vcExpected.getPhredScaledQual(), vc.getPhredScaledQual());
Assert.assertEquals(vcExpected.getAttribute("PQ"), vc.getAttribute("PQ"));
Assert.assertEquals(vcExpected.getAttribute("HP"), vc.getAttribute("HP"));
}
@Test
public void TestReallyMergeIntoMNP(final VariantContext vc1, final VariantContext vc2 ){
public void TestReallyMergeIntoMNP( ){
VariantContext vc = PhasingUtils.reallyMergeIntoMNP(vc1, vc2, referenceFile);
List<Allele> alleleList = Arrays.asList(Allele.create("TG", true), Allele.create("TA", false), Allele.create("CG", false));
Map<String,Object> attributes = new HashMap<String,Object>(){{
put("AC", new ArrayList<Integer>(Arrays.asList(1, 1)));
put("AF", new ArrayList<Double>(Arrays.asList(0.5, 0.5)));
put("AN", 2);
}};
final Map<String, Object> extendedAttributes = new HashMap<String, Object>(){{
put("PQ", 100.0); put("HP", new String[]{"10-1", "10-2"});
}};
List<Allele> alleleListMeged = Arrays.asList(Allele.create("TA"), Allele.create("CG"));
Genotype genotype = new GenotypeBuilder().name("sample").attributes(extendedAttributes).alleles(alleleListMeged).make();
final VariantContext vcExpected = new VariantContextBuilder().chr(contig).id("id1;id2").source("TC_GA").start(start).stop(start+1).alleles(alleleList).genotypes(genotype).attributes(attributes).make();
Assert.assertTrue(genotype.sameGenotype(vcExpected.getGenotypes().get("sample")));
Assert.assertTrue(vcExpected.hasSameAllelesAs(vc));
Assert.assertEquals(vcExpected.getChr(), vc.getChr());
Assert.assertEquals(vcExpected.getStart(), vc.getStart());
Assert.assertEquals(vcExpected.getEnd(), vc.getEnd());
Assert.assertEquals(vcExpected.getID(), vc.getID());
Assert.assertEquals(vcExpected.getSource(), vc.getSource());
Assert.assertEquals(vcExpected.isFiltered(), vc.isFiltered());
Assert.assertEquals(vcExpected.getPhredScaledQual(), vc.getPhredScaledQual());
Assert.assertEquals(vcExpected.getAttribute("PQ"), vc.getAttribute("PQ"));
Assert.assertEquals(vcExpected.getAttribute("HP"), vc.getAttribute("HP"));
}
@Test