package org.broadinstitute.sting.gatk.refdata; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.junit.Assert; import static org.junit.Assert.fail; import org.junit.BeforeClass; import org.junit.Test; import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * @author aaron *

* Class RodVCFTest *

* test out the rod VCF */ public class RodVCFTest extends BaseTest { private static IndexedFastaSequenceFile seq; private static File vcfFile = new File(validationDataLocation + "vcfexample.vcf"); private VCFHeader mHeader; @BeforeClass public static void beforeTests() { try { seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta")); } catch (FileNotFoundException e) { throw new StingException("unable to load the sequence dictionary"); } GenomeLocParser.setupRefContigOrdering(seq); } private RodVCF getVCFObject() { RodVCF vcf = new RodVCF("VCF"); mHeader = null; try { mHeader = (VCFHeader) vcf.initialize(vcfFile); } catch (FileNotFoundException e) { fail("Unable to open VCF file"); } mHeader.checkVCFVersion(); return vcf; } @Test public void testInitialize() { getVCFObject(); } @Test public void testToIterator() { RodVCF vcf = getVCFObject(); VCFReader reader = new VCFReader(vcfFile); Iterator iter = vcf.createIterator("VCF", vcfFile); while (iter.hasNext()) { VCFRecord rec1 = reader.next(); VCFRecord rec2 = iter.next().mCurrentRecord; if (!rec1.equals(rec2)) { fail("VCF record rec1 != rec2"); } } } @Test public void testToMAF() { RodVCF vcf = getVCFObject(); Iterator iter = vcf.createIterator("VCF", vcfFile); RodVCF rec = iter.next(); Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.786, 0.00001); rec = iter.next(); rec = iter.next(); rec = iter.next(); Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.0, 0.00001); } @Test public void testToString() { // slightly altered line, due to map ordering final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tAF=0.786;DP=258;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:\n"; RodVCF vcf = getVCFObject(); VCFReader reader = new VCFReader(vcfFile); Iterator iter = vcf.createIterator("VCF", vcfFile); boolean first = true; while (iter.hasNext()) { VCFRecord rec1 = reader.next(); VCFRecord rec2 = iter.next().mCurrentRecord; if (!rec1.toStringEncoding(mHeader).equals(rec2.toStringEncoding(mHeader))) { fail("VCF record rec1.toStringEncoding() != rec2.toStringEncoding()"); } // verify the first line too if (first) { if (!firstLine.equals(rec1.toStringEncoding(mHeader) + "\n")) { fail("VCF record rec1.toStringEncoding() != expected string :\n" + rec1.toStringEncoding(mHeader) + "\n" + firstLine); } first = false; } } } @Test public void testInsertion() { RodVCF vcf = getVCFObject(); Iterator iter = vcf.createIterator("VCF", vcfFile); RodVCF rec = iter.next(); Assert.assertTrue(rec.isSNP()); rec = iter.next(); rec = iter.next(); rec = iter.next(); rec = iter.next(); Assert.assertTrue(rec.isIndel()); Assert.assertTrue(rec.isDeletion()); } @Test public void testDeletion() { RodVCF vcf = getVCFObject(); Iterator iter = vcf.createIterator("VCF", vcfFile); RodVCF rec = iter.next(); Assert.assertTrue(rec.isSNP()); rec = iter.next(); rec = iter.next(); rec = iter.next(); rec = iter.next(); rec = iter.next(); Assert.assertTrue(rec.isIndel()); Assert.assertTrue(rec.isInsertion()); } @Test public void testGetGenotypes() { RodVCF vcf = getVCFObject(); Iterator iter = vcf.createIterator("VCF", vcfFile); RodVCF rec = iter.next(); // we should get back a ref 'G' and an alt 'A' List list = rec.getGenotypes(); List knowngenotypes = new ArrayList(); knowngenotypes.add("GG"); knowngenotypes.add("AG"); knowngenotypes.add("AA"); Assert.assertEquals(3, list.size()); for (Genotype g: list) { Assert.assertTrue(knowngenotypes.contains(g.getBases())); } } @Test public void testGetGenotypesQual() { RodVCF vcf = getVCFObject(); Iterator iter = vcf.createIterator("VCF", vcfFile); RodVCF rec = iter.next(); // we should get back a ref 'G' and an alt 'A' List list = rec.getGenotypes(); Assert.assertEquals(4.8,list.get(0).getNegLog10PError(),0.0001); Assert.assertEquals(4.8,list.get(1).getNegLog10PError(),0.0001); Assert.assertEquals(4.3,list.get(2).getNegLog10PError(),0.0001); } @Test public void testHasGenotypes() { RodVCF vcf = getVCFObject(); Iterator iter = vcf.createIterator("VCF", vcfFile); RodVCF rec = iter.next(); Assert.assertTrue(rec.hasGenotype(DiploidGenotype.valueOf("GG"))); Assert.assertTrue(rec.hasGenotype(DiploidGenotype.valueOf("AG"))); Assert.assertTrue(rec.hasGenotype(DiploidGenotype.valueOf("AA"))); Assert.assertTrue(!rec.hasGenotype(DiploidGenotype.valueOf("TT"))); } @Test public void testGetLocation() { RodVCF vcf = getVCFObject(); Iterator iter = vcf.createIterator("VCF", vcfFile); RodVCF rec = iter.next(); GenomeLoc loc = rec.getLocation(); Assert.assertEquals(14370,loc.getStart()); Assert.assertEquals(14370,loc.getStop()); Assert.assertTrue(loc.getContig().equals("20")); } }