-- Convenience constructor for VariantContextBuilder that creates a new one based on an existing builder
-- Convenience routine for creating alleles from strings of bases -- Convenience constructor for VCFFilterHeader line whose description is the same as name -- VariantContextTestProvider creates all sorts of types of VariantContexts for testing purposes. Can be reused throughtout code for BCF, VCF, etc. -- Created basic BCF2WriterCodec tests that consumes VariantContextTestProvider contexts, writes them to disk with BCF2 writer, and checks that they come back equals to the original VariantContexts. Actually worked for some complex tests in the first go
This commit is contained in:
parent
4968dcd36a
commit
fb1911a1b6
|
|
@ -18,6 +18,14 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
|
|||
super("FILTER", name, description);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor for FILTER whose description is the name
|
||||
* @param name
|
||||
*/
|
||||
public VCFFilterHeaderLine(String name) {
|
||||
super("FILTER", name, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
|
|
|
|||
|
|
@ -122,6 +122,27 @@ public class VariantContextBuilder {
|
|||
this.stop = parent.getEnd();
|
||||
}
|
||||
|
||||
public VariantContextBuilder(VariantContextBuilder parent) {
|
||||
if ( parent == null ) throw new ReviewedStingException("BUG: VariantContext parent argument cannot be null in VariantContextBuilder");
|
||||
this.alleles = parent.alleles;
|
||||
this.attributesCanBeModified = false;
|
||||
this.contig = parent.contig;
|
||||
this.genotypes = parent.genotypes;
|
||||
this.ID = parent.ID;
|
||||
this.log10PError = parent.log10PError;
|
||||
this.referenceBaseForIndel = parent.referenceBaseForIndel;
|
||||
this.source = parent.source;
|
||||
this.start = parent.start;
|
||||
this.stop = parent.stop;
|
||||
|
||||
this.attributes(parent.attributes);
|
||||
this.filters(parent.filters);
|
||||
}
|
||||
|
||||
public VariantContextBuilder copy() {
|
||||
return new VariantContextBuilder(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells this builder to use this collection of alleles for the resulting VariantContext
|
||||
*
|
||||
|
|
@ -135,6 +156,20 @@ public class VariantContextBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
public VariantContextBuilder alleles(final String ... alleleStrings) {
|
||||
List<Allele> alleles = new ArrayList<Allele>(alleleStrings.length);
|
||||
|
||||
for ( int i = 0; i < alleleStrings.length; i++ ) {
|
||||
alleles.add(Allele.create(alleleStrings[i], i == 0));
|
||||
}
|
||||
|
||||
return alleles(alleles);
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles() {
|
||||
return new ArrayList<Allele>(alleles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells this builder to use this map of attributes alleles for the resulting VariantContext
|
||||
*
|
||||
|
|
@ -317,6 +352,10 @@ public class VariantContextBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
public VariantContextBuilder referenceBaseForIndel(final String referenceBaseForIndel) {
|
||||
return referenceBaseForIndel(referenceBaseForIndel.getBytes()[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells us that the resulting VariantContext should have source field set to source
|
||||
* @param source
|
||||
|
|
|
|||
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Routines for generating all sorts of VCs for testing
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class VariantContextTestProvider {
|
||||
final static VCFHeader header;
|
||||
final static List<VariantContextsTest> tests = new ArrayList<VariantContextsTest>();
|
||||
final static VariantContext ROOT;
|
||||
|
||||
public static class VariantContextsTest {
|
||||
public List<VariantContext> vcs;
|
||||
|
||||
public VariantContextsTest(final VariantContextBuilder builder) {
|
||||
this(Collections.singletonList(builder.make()));
|
||||
}
|
||||
|
||||
public VariantContextsTest(final VariantContext vc) {
|
||||
this(Collections.singletonList(vc));
|
||||
}
|
||||
|
||||
public VariantContextsTest(final List<VariantContext> vcs) {
|
||||
this.vcs = vcs;
|
||||
}
|
||||
|
||||
public boolean hasGenotypes() {
|
||||
return vcs.get(0).hasGenotypes();
|
||||
}
|
||||
}
|
||||
|
||||
private final static VariantContextBuilder builder() {
|
||||
return new VariantContextBuilder(ROOT);
|
||||
}
|
||||
|
||||
private final static void add(VariantContextBuilder builder) {
|
||||
tests.add(new VariantContextsTest(builder));
|
||||
}
|
||||
|
||||
static {
|
||||
VariantContextBuilder rootBuilder = new VariantContextBuilder();
|
||||
rootBuilder.source("test");
|
||||
rootBuilder.loc("1", 10, 10);
|
||||
rootBuilder.alleles("A", "C");
|
||||
rootBuilder.unfiltered();
|
||||
ROOT = rootBuilder.make();
|
||||
|
||||
add(builder());
|
||||
add(builder().alleles("A"));
|
||||
add(builder().alleles("A", "C", "T"));
|
||||
add(builder().alleles("-", "C").referenceBaseForIndel("A"));
|
||||
add(builder().alleles("-", "CAGT").referenceBaseForIndel("A"));
|
||||
add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A"));
|
||||
add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A"));
|
||||
|
||||
// make sure filters work
|
||||
add(builder().unfiltered());
|
||||
add(builder().passFilters());
|
||||
add(builder().filters("FILTER1"));
|
||||
add(builder().filters("FILTER1", "FILTER2"));
|
||||
|
||||
add(builder().log10PError(VariantContext.NO_LOG10_PERROR));
|
||||
add(builder().log10PError(-1));
|
||||
add(builder().log10PError(-1.234e6));
|
||||
|
||||
add(builder().noID());
|
||||
add(builder().id("rsID12345"));
|
||||
|
||||
// prep the header
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
metaData.add(new VCFFilterHeaderLine("FILTER1"));
|
||||
metaData.add(new VCFFilterHeaderLine("FILTER2"));
|
||||
metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
|
||||
|
||||
header = new VCFHeader(metaData);
|
||||
}
|
||||
|
||||
public static VCFHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
public static List<VariantContextsTest> generateSiteTests() {
|
||||
return tests;
|
||||
}
|
||||
|
||||
public static void assertEquals( final VariantContext actual, final VariantContext expected ) {
|
||||
Assert.assertNotNull(actual);
|
||||
Assert.assertEquals(actual.getChr(), expected.getChr());
|
||||
Assert.assertEquals(actual.getStart(), expected.getStart());
|
||||
Assert.assertEquals(actual.getEnd(), expected.getEnd());
|
||||
// TODO -- expand me
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class BCF2WriterCodecUnitTest extends BaseTest {
|
||||
private static File tmpFile;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
|
||||
// private final static String START_VCF41_LINES = "##fileformat=VCFv4.1\n" +
|
||||
// "##reference=file://" + BaseTest.b37KGReference + "\n" +
|
||||
// "##contig=<ID=1,length=249250621,assembly=b37>\n" +
|
||||
// "##contig=<ID=2,length=243199373,assembly=b37>\n";
|
||||
//
|
||||
//// ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
|
||||
//// ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
|
||||
//// ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
|
||||
//// ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
|
||||
//// ##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
|
||||
//// ##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
|
||||
//// ##FILTER=<ID=q10,Description="Quality below 10">
|
||||
//// ##FILTER=<ID=s50,Description="Less than 50% of samples have data">
|
||||
//// ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
|
||||
//// ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
|
||||
//// ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
|
||||
//// ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
|
||||
//
|
||||
// private final static String SITES_HEADER_LINE = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT";
|
||||
|
||||
@BeforeSuite
|
||||
public void before() throws IOException {
|
||||
tmpFile = File.createTempFile("BCF2WriterCodecUnitTest", ".bcf");
|
||||
tmpFile.delete();
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
dictionary = seq.getSequenceDictionary();
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
public void beforeMethod() throws IOException {
|
||||
tmpFile.delete(); // cleanup the test file
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Provider of VariantContexts for testing
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "SiteVCs")
|
||||
public Object[][] SiteVCsTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( VariantContextTestProvider.VariantContextsTest test : VariantContextTestProvider.generateSiteTests() )
|
||||
tests.add(new Object[]{test.vcs});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "SiteVCs")
|
||||
public void testBCF2WriterReader(final List<VariantContext> contexts) throws IOException {
|
||||
// todo -- test all options
|
||||
|
||||
// write
|
||||
final VariantContextWriter writer = VariantContextWriterFactory.create(tmpFile, dictionary);
|
||||
writer.writeHeader(VariantContextTestProvider.getHeader());
|
||||
for ( VariantContext vc : contexts )
|
||||
writer.add(vc);
|
||||
writer.close();
|
||||
|
||||
// read in the features
|
||||
BCF2Codec codec = new BCF2Codec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(tmpFile));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(tmpFile));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
Iterator<VariantContext> it = contexts.iterator();
|
||||
while ( ! pbs.isDone() ) {
|
||||
VariantContext vc = it.next();
|
||||
VariantContext bcf = codec.decode(pbs);
|
||||
VariantContextTestProvider.assertEquals(vc, bcf);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue