Adding 80% support for multi-allelic variants
-- Multi-allelic variants are split into their bi-allelic version, trimmed, and we attempt to provide a meaningful genotype for NA12878 here. It's not perfect and needs some discussion on how to handle het/alt variants -- Adding splitInBiallelic funtion to VariantContextUtils as well as extensive unit tests that also indirectly test reverseTrimAlleles (which worked perfectly FYI)
This commit is contained in:
parent
e14bfa9f5c
commit
48f271c5bd
|
|
@ -979,6 +979,40 @@ public class VariantContextUtils {
|
|||
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
|
||||
/**
|
||||
* Split variant context into its biallelic components if there are more than 2 alleles
|
||||
*
|
||||
* For VC has A/B/C alleles, returns A/B and A/C contexts.
|
||||
* Genotypes are all no-calls now (it's not possible to fix them easily)
|
||||
* Alleles are right trimmed to satisfy VCF conventions
|
||||
*
|
||||
* If vc is biallelic or non-variant it is just returned
|
||||
*
|
||||
* Chromosome counts are updated (but they are by definition 0)
|
||||
*
|
||||
* @param vc a potentially multi-allelic variant context
|
||||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
// non variant or biallelics already satisfy the contract
|
||||
return Collections.singletonList(vc);
|
||||
else {
|
||||
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
|
||||
|
||||
for ( final Allele alt : vc.getAlternateAlleles() ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
calculateChromosomeCounts(builder, true);
|
||||
biallelics.add(reverseTrimAlleles(builder.make()));
|
||||
}
|
||||
|
||||
return biallelics;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
|
||||
*
|
||||
|
|
|
|||
|
|
@ -782,7 +782,7 @@ public class VariantContextTestProvider {
|
|||
Assert.assertEquals(actual.getStart(), expected.getStart(), "start");
|
||||
Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end");
|
||||
Assert.assertEquals(actual.getID(), expected.getID(), "id");
|
||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
|
||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual);
|
||||
|
||||
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
|
||||
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
|
|||
import java.util.*;
|
||||
|
||||
public class VariantContextUtilsUnitTest extends BaseTest {
|
||||
Allele Aref, T, C, Cref, ATC, ATCATC;
|
||||
Allele Aref, T, C, G, Cref, ATC, ATCATC;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeSuite
|
||||
|
|
@ -58,6 +58,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
Cref = Allele.create("C", true);
|
||||
T = Allele.create("T");
|
||||
C = Allele.create("C");
|
||||
G = Allele.create("G");
|
||||
ATC = Allele.create("ATC");
|
||||
ATCATC = Allele.create("ATCATC");
|
||||
}
|
||||
|
|
@ -697,10 +698,120 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
Assert.assertEquals(result, cfg.expectedClip);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// test splitting into bi-allelics
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "SplitBiallelics")
|
||||
public Object[][] makeSplitBiallelics() throws CloneNotSupportedException {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final VariantContextBuilder root = new VariantContextBuilder("x", "20", 10, 10, Arrays.asList(Aref, C));
|
||||
|
||||
// biallelic -> biallelic
|
||||
tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
|
||||
|
||||
// monos -> monos
|
||||
root.alleles(Arrays.asList(Aref));
|
||||
tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
|
||||
|
||||
root.alleles(Arrays.asList(Aref, C, T));
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(Aref, C)).make(),
|
||||
root.alleles(Arrays.asList(Aref, T)).make())});
|
||||
|
||||
root.alleles(Arrays.asList(Aref, C, T, G));
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(Aref, C)).make(),
|
||||
root.alleles(Arrays.asList(Aref, T)).make(),
|
||||
root.alleles(Arrays.asList(Aref, G)).make())});
|
||||
|
||||
final Allele C = Allele.create("C");
|
||||
final Allele CA = Allele.create("CA");
|
||||
final Allele CAA = Allele.create("CAA");
|
||||
final Allele CAAAA = Allele.create("CAAAA");
|
||||
final Allele CAAAAA = Allele.create("CAAAAA");
|
||||
final Allele Cref = Allele.create("C", true);
|
||||
final Allele CAref = Allele.create("CA", true);
|
||||
final Allele CAAref = Allele.create("CAA", true);
|
||||
final Allele CAAAref = Allele.create("CAAA", true);
|
||||
|
||||
root.alleles(Arrays.asList(Cref, CA, CAA));
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(Cref, CA)).make(),
|
||||
root.alleles(Arrays.asList(Cref, CAA)).make())});
|
||||
|
||||
root.alleles(Arrays.asList(CAAref, C, CA)).stop(12);
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(CAAref, C)).make(),
|
||||
root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
|
||||
|
||||
root.alleles(Arrays.asList(CAAAref, C, CA, CAA)).stop(13);
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(CAAAref, C)).make(),
|
||||
root.alleles(Arrays.asList(CAAref, C)).stop(12).make(),
|
||||
root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
|
||||
|
||||
root.alleles(Arrays.asList(CAAAref, CAAAAA, CAAAA, CAA, C)).stop(13);
|
||||
tests.add(new Object[]{root.make(),
|
||||
Arrays.asList(
|
||||
root.alleles(Arrays.asList(Cref, CAA)).stop(10).make(),
|
||||
root.alleles(Arrays.asList(Cref, CA)).stop(10).make(),
|
||||
root.alleles(Arrays.asList(CAref, C)).stop(11).make(),
|
||||
root.alleles(Arrays.asList(CAAAref, C)).stop(13).make())});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "SplitBiallelics")
|
||||
public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
|
||||
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc);
|
||||
Assert.assertEquals(biallelics.size(), expectedBiallelics.size());
|
||||
for ( int i = 0; i < biallelics.size(); i++ ) {
|
||||
final VariantContext actual = biallelics.get(i);
|
||||
final VariantContext expected = expectedBiallelics.get(i);
|
||||
VariantContextTestProvider.assertEquals(actual, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "SplitBiallelics", dependsOnMethods = "testSplitBiallelicsNoGenotypes")
|
||||
public void testSplitBiallelicsGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
|
||||
int sampleI = 0;
|
||||
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
|
||||
genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles));
|
||||
}
|
||||
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
|
||||
|
||||
final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make();
|
||||
|
||||
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
|
||||
for ( int i = 0; i < biallelics.size(); i++ ) {
|
||||
final VariantContext actual = biallelics.get(i);
|
||||
Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples
|
||||
|
||||
for ( final Genotype inputGenotype : genotypes ) {
|
||||
final Genotype actualGenotype = actual.getGenotype(inputGenotype.getSampleName());
|
||||
Assert.assertNotNull(actualGenotype);
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
Assert.assertEquals(actualGenotype, vcWithGenotypes.getGenotype(inputGenotype.getSampleName()));
|
||||
else
|
||||
Assert.assertTrue(actualGenotype.isNoCall());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue