gatk-3.8/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextTest.java

471 lines
18 KiB
Java
Executable File

// our package
package org.broadinstitute.sting.oneoffprojects.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.BeforeClass;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.Collection;
import java.io.FileNotFoundException;
import java.io.File;
import net.sf.picard.reference.ReferenceSequenceFile;
/**
* Basic unit test for RecalData
*/
public class VariantContextTest extends BaseTest {
private static ReferenceSequenceFile seq;
@BeforeClass
public static void init() throws FileNotFoundException {
// sequence
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLocParser.setupRefContigOrdering(seq);
}
Allele A, Aref, T, Tref;
Allele del, delRef, ATC, ATCref;
// A [ref] / T at 10
GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10);
// - / ATC [ref] from 20-23
GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22);
// - [ref] / ATC from 20-20
GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20);
// - / A / T / ATC [ref] from 20-23
GenomeLoc mixedLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22);
@Before
public void before() {
del = new Allele("-");
delRef = new Allele("-", true);
A = new Allele("A");
Aref = new Allele("A", true);
T = new Allele("T");
Tref = new Allele("T", true);
ATC = new Allele("ATC");
ATCref = new Allele("ATC", true);
}
@Test
public void testCreatingSNPVariantContext() {
logger.warn("testCreatingSNPVariantContext");
List<Allele> alleles = Arrays.asList(Aref, T);
VariantContext vc = new VariantContext(snpLoc, alleles);
logger.warn("vc = " + vc);
Assert.assertEquals(vc.getLocation(), snpLoc);
Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);
Assert.assertTrue(vc.isSNP());
Assert.assertFalse(vc.isIndel());
Assert.assertFalse(vc.isInsertion());
Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertTrue(vc.isTransversion());
Assert.assertFalse(vc.isTransition());
Assert.assertEquals(vc.getReference(), Aref);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), T);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingRefVariantContext() {
logger.warn("testCreatingRefVariantContext");
List<Allele> alleles = Arrays.asList(Aref);
VariantContext vc = new VariantContext(snpLoc, alleles);
logger.warn("vc = " + vc);
Assert.assertEquals(vc.getLocation(), snpLoc);
Assert.assertEquals(vc.getType(), VariantContext.Type.NO_VARIATION);
Assert.assertFalse(vc.isSNP());
Assert.assertFalse(vc.isIndel());
Assert.assertFalse(vc.isInsertion());
Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertFalse(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 1);
Assert.assertEquals(vc.getReference(), Aref);
Assert.assertEquals(vc.getAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAlleles().size(), 0);
//Assert.assertEquals(vc.getAlternateAllele(0), T);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingDeletionVariantContext() {
logger.warn("testCreatingDeletionVariantContext");
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContext(delLoc, alleles);
logger.warn("vc = " + vc);
Assert.assertEquals(vc.getLocation(), delLoc);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
Assert.assertFalse(vc.isInsertion());
Assert.assertTrue(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertEquals(vc.getReference(), ATCref);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), del);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingInsertionVariantContext() {
logger.warn("testCreatingInsertionVariantContext");
List<Allele> alleles = Arrays.asList(delRef, ATC);
VariantContext vc = new VariantContext(insLoc, alleles);
logger.warn("vc = " + vc);
Assert.assertEquals(vc.getLocation(), insLoc);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
Assert.assertTrue(vc.isInsertion());
Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertEquals(vc.getReference(), delRef);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), ATC);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs1() {
logger.warn("testBadConstructorArgs1");
new VariantContext(insLoc, Arrays.asList(delRef, ATCref));
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs2() {
logger.warn("testBadConstructorArgs2");
new VariantContext(insLoc, Arrays.asList(delRef, del));
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs3() {
logger.warn("testBadConstructorArgs3");
new VariantContext(insLoc, Arrays.asList(del));
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgsDuplicateAlleles1() {
logger.warn("testBadConstructorArgsDuplicateAlleles1");
new VariantContext(insLoc, Arrays.asList(Aref, T, T));
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgsDuplicateAlleles2() {
logger.warn("testBadConstructorArgsDuplicateAlleles2");
new VariantContext(insLoc, Arrays.asList(Aref, A));
}
@Test (expected = IllegalStateException.class)
public void testBadLoc1() {
logger.warn("testBadLoc1");
List<Allele> alleles = Arrays.asList(Aref, T, del);
VariantContext vc = new VariantContext(delLoc, alleles);
}
@Test (expected = IllegalStateException.class)
public void testBadTiTvRequest() {
logger.warn("testBadConstructorArgsDuplicateAlleles2");
new VariantContext(insLoc, Arrays.asList(Aref, ATC)).isTransition();
}
@Test
public void testAccessingSimpleSNPGenotypes() {
logger.warn("testAccessingSimpleSNPGenotypes");
List<Allele> alleles = Arrays.asList(Aref, T);
VariantContext vc = new VariantContext(snpLoc, alleles);
logger.warn("vc = " + vc);
Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "AA", 10);
Genotype g2 = new Genotype(Arrays.asList(Aref, T), "AT", 10);
Genotype g3 = new Genotype(Arrays.asList(T, T), "TT", 10);
vc.addGenotypes(Arrays.asList(g1, g2, g3));
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphic());
Assert.assertTrue(vc.isPolymorphic());
Assert.assertEquals(vc.getSampleNames().size(), 3);
Assert.assertEquals(vc.getGenotypes().size(), 3);
Assert.assertEquals(vc.getGenotypes().get("AA"), g1);
Assert.assertEquals(vc.getGenotype("AA"), g1);
Assert.assertEquals(vc.getGenotypes().get("AT"), g2);
Assert.assertEquals(vc.getGenotype("AT"), g2);
Assert.assertEquals(vc.getGenotypes().get("TT"), g3);
Assert.assertEquals(vc.getGenotype("TT"), g3);
Assert.assertTrue(vc.hasGenotype("AA"));
Assert.assertTrue(vc.hasGenotype("AT"));
Assert.assertTrue(vc.hasGenotype("TT"));
Assert.assertFalse(vc.hasGenotype("foo"));
Assert.assertFalse(vc.hasGenotype("TTT"));
Assert.assertFalse(vc.hasGenotype("at"));
Assert.assertFalse(vc.hasGenotype("tt"));
Assert.assertEquals(vc.getChromosomeCount(), 6);
Assert.assertEquals(vc.getChromosomeCount(Aref), 3);
Assert.assertEquals(vc.getChromosomeCount(T), 3);
}
@Test
public void testAccessingCompleteGenotypes() {
logger.warn("testAccessingCompleteGenotypes");
List<Allele> alleles = Arrays.asList(Aref, T, del);
VariantContext vc = new VariantContext(snpLoc, alleles);
logger.warn("vc = " + vc);
Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "AA", 10);
Genotype g2 = new Genotype(Arrays.asList(Aref, T), "AT", 10);
Genotype g3 = new Genotype(Arrays.asList(T, T), "TT", 10);
Genotype g4 = new Genotype(Arrays.asList(T, del), "Td", 10);
Genotype g5 = new Genotype(Arrays.asList(del, del), "dd", 10);
Genotype g6 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "..", 10);
vc.addGenotypes(Arrays.asList(g1, g2, g3, g4, g5, g6));
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphic());
Assert.assertTrue(vc.isPolymorphic());
Assert.assertEquals(vc.getGenotypes().size(), 6);
Assert.assertEquals(3, vc.getGenotypes(Arrays.asList("AA", "Td", "dd")).size());
Assert.assertEquals(10, vc.getChromosomeCount());
Assert.assertEquals(3, vc.getChromosomeCount(Aref));
Assert.assertEquals(4, vc.getChromosomeCount(T));
Assert.assertEquals(3, vc.getChromosomeCount(del));
Assert.assertEquals(2, vc.getChromosomeCount(Allele.NO_CALL));
}
@Test
public void testAccessingRefGenotypes() {
logger.warn("testAccessingRefGenotypes");
List<Allele> alleles1 = Arrays.asList(Aref, T);
List<Allele> alleles2 = Arrays.asList(Aref);
List<Allele> alleles3 = Arrays.asList(Aref, T, del);
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
VariantContext vc = new VariantContext(snpLoc, alleles);
logger.warn("vc = " + vc);
Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "AA1", 10);
Genotype g2 = new Genotype(Arrays.asList(Aref, Aref), "AA2", 10);
Genotype g3 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "..", 10);
vc.addGenotypes(Arrays.asList(g1, g2, g3));
Assert.assertTrue(vc.hasGenotypes());
Assert.assertTrue(vc.isMonomorphic());
Assert.assertFalse(vc.isPolymorphic());
Assert.assertEquals(vc.getGenotypes().size(), 3);
Assert.assertEquals(4, vc.getChromosomeCount());
Assert.assertEquals(4, vc.getChromosomeCount(Aref));
Assert.assertEquals(0, vc.getChromosomeCount(T));
Assert.assertEquals(2, vc.getChromosomeCount(Allele.NO_CALL));
}
}
@Test
public void testFilters() {
logger.warn("testFilters");
List<Allele> alleles = Arrays.asList(Aref, T, del);
Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "AA", 10);
Genotype g2 = new Genotype(Arrays.asList(Aref, T), "AT", 10);
VariantContext vc = new VariantContext(snpLoc, alleles, Arrays.asList(g1,g2));
logger.warn("vc = " + vc);
Assert.assertTrue(vc.isNotFiltered());
Assert.assertFalse(vc.isFiltered());
Assert.assertEquals(0, vc.getFilters().size());
vc.addFilter("BAD_SNP_BAD!");
Assert.assertFalse(vc.isNotFiltered());
Assert.assertTrue(vc.isFiltered());
Assert.assertEquals(1, vc.getFilters().size());
vc.addFilters(Arrays.asList("REALLY_BAD_SNP", "CHRIST_THIS_IS_TERRIBLE"));
Assert.assertFalse(vc.isNotFiltered());
Assert.assertTrue(vc.isFiltered());
Assert.assertEquals(3, vc.getFilters().size());
vc.clearFilters();
Assert.assertTrue(vc.isNotFiltered());
Assert.assertFalse(vc.isFiltered());
Assert.assertEquals(0, vc.getFilters().size());
}
@Test
public void testVCromGenotypes() {
logger.warn("testVCromGenotypes");
List<Allele> alleles = Arrays.asList(Aref, T, del);
Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "AA", 10);
Genotype g2 = new Genotype(Arrays.asList(Aref, T), "AT", 10);
Genotype g3 = new Genotype(Arrays.asList(T, T), "TT", 10);
Genotype g4 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "..", 10);
Genotype g5 = new Genotype(Arrays.asList(del, del), "--", 10);
VariantContext vc = new VariantContext(snpLoc, alleles, Arrays.asList(g1,g2,g3,g4,g5));
logger.warn("vc = " + vc);
VariantContext vc12 = vc.subContextFromGenotypes(Arrays.asList(g1,g2));
VariantContext vc1 = vc.subContextFromGenotypes(Arrays.asList(g1));
VariantContext vc23 = vc.subContextFromGenotypes(Arrays.asList(g2, g3));
VariantContext vc4 = vc.subContextFromGenotypes(Arrays.asList(g4));
VariantContext vc14 = vc.subContextFromGenotypes(Arrays.asList(g1, g4));
VariantContext vc5 = vc.subContextFromGenotypes(Arrays.asList(g5));
Assert.assertTrue(vc12.isPolymorphic());
Assert.assertTrue(vc23.isPolymorphic());
Assert.assertTrue(vc1.isMonomorphic());
Assert.assertTrue(vc4.isMonomorphic());
Assert.assertTrue(vc14.isMonomorphic());
Assert.assertTrue(vc5.isPolymorphic());
Assert.assertTrue(vc12.isSNP());
Assert.assertTrue(vc12.isVariant());
Assert.assertTrue(vc12.isBiallelic());
Assert.assertFalse(vc1.isSNP());
Assert.assertFalse(vc1.isVariant());
Assert.assertFalse(vc1.isBiallelic());
Assert.assertTrue(vc23.isSNP());
Assert.assertTrue(vc23.isVariant());
Assert.assertTrue(vc23.isBiallelic());
Assert.assertFalse(vc4.isSNP());
Assert.assertFalse(vc4.isVariant());
Assert.assertFalse(vc4.isBiallelic());
Assert.assertFalse(vc14.isSNP());
Assert.assertFalse(vc14.isVariant());
Assert.assertFalse(vc14.isBiallelic());
Assert.assertTrue(vc5.isIndel());
Assert.assertTrue(vc5.isDeletion());
Assert.assertTrue(vc5.isVariant());
Assert.assertTrue(vc5.isBiallelic());
Assert.assertEquals(3, vc12.getChromosomeCount(Aref));
Assert.assertEquals(1, vc23.getChromosomeCount(Aref));
Assert.assertEquals(2, vc1.getChromosomeCount(Aref));
Assert.assertEquals(0, vc4.getChromosomeCount(Aref));
Assert.assertEquals(2, vc14.getChromosomeCount(Aref));
Assert.assertEquals(0, vc5.getChromosomeCount(Aref));
}
@Test
public void testManipulatingAlleles() {
logger.warn("testManipulatingAlleles");
// todo -- add tests that call add/set/remove
}
@Test
public void testManipulatingGenotypes() {
logger.warn("testManipulatingGenotypes");
// todo -- add tests that call add/set/remove
}
}
// genotype functions
// public boolean hasGenotypes() { return genotypes.size() > 0; }
// public Map<String, Genotype> getGenotypes() { return genotypes; }
// public Set<String> getSampleNames() {
// public int getChromosomeCount() {
// public int getChromosomeCount(Allele a) {
// public boolean isMonomorphic() {
// public boolean isPolymorphic() {
// public Genotype getGenotype(String sample) {
// public boolean hasGenotype(String sample) {
// public void setGenotypes(Genotype genotype) {
// public void setGenotypes(Collection<Genotype> genotypes) {
// public void setGenotypes(Map<String, Genotype> genotypes) {
// public void addGenotype(Genotype genotype) {
// public void addGenotype(String sampleName, Genotype genotype) {
// public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
// public void removeGenotype(String sampleName) {
// public void removeGenotype(Genotype genotype) {
// all functions
// public Type getType() {
// public boolean isSNP() { return getType() == Type.SNP; }
// public boolean isVariant() { return getType() != Type.NO_VARIATION; }
// public boolean isIndel() { return getType() == Type.INDEL; }
// public boolean isMixed() { return getType() == Type.MIXED; }
// public GenomeLoc getLocation() { return loc; }
// public Allele getReference() {
// public boolean isBiallelic() {
// public boolean isMonomorphic() {
// public boolean isPolymorphic() {
// public int getNAlleles() {
// public Set<Allele> getAlleles() { return alleles; }
// public Set<Allele> getAlternateAlleles() {
// public Allele getAlternateAllele(int i) {
// public void setAlleles(Set<Allele> alleles) {
// public void addAllele(Allele allele) {
// public void addAllele(Allele allele, boolean allowDuplicates) {
// public boolean validate() {