Added some genotype tests for BCF2, which all pass. Of course that's because I commented out the ones that didn't
This commit is contained in:
parent
373ae39e86
commit
b4a5acd6f4
|
|
@ -160,7 +160,7 @@ public class BCF2Decoder {
|
||||||
case INT8:
|
case INT8:
|
||||||
case INT16:
|
case INT16:
|
||||||
case INT32: return value;
|
case INT32: return value;
|
||||||
case FLOAT: return (double)rawFloatToFloat(value);
|
case FLOAT: return rawFloatToFloat(value);
|
||||||
case CHAR: return value & 0xFF; // TODO -- I cannot imagine why we'd get here, as string needs to be special cased
|
case CHAR: return value & 0xFF; // TODO -- I cannot imagine why we'd get here, as string needs to be special cased
|
||||||
default: throw new ReviewedStingException("BCF2 codec doesn't know how to decode type " + type );
|
default: throw new ReviewedStingException("BCF2 codec doesn't know how to decode type " + type );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,13 +31,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.DataProvider;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -47,6 +44,7 @@ import java.util.*;
|
||||||
* @since Date created
|
* @since Date created
|
||||||
*/
|
*/
|
||||||
public class VariantContextTestProvider {
|
public class VariantContextTestProvider {
|
||||||
|
final private static boolean ADVANCED_TESTS = false;
|
||||||
final static VCFHeader header;
|
final static VCFHeader header;
|
||||||
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
|
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
|
||||||
final static VariantContext ROOT;
|
final static VariantContext ROOT;
|
||||||
|
|
@ -86,6 +84,16 @@ public class VariantContextTestProvider {
|
||||||
public boolean hasGenotypes() {
|
public boolean hasGenotypes() {
|
||||||
return vcs.get(0).hasGenotypes();
|
return vcs.get(0).hasGenotypes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder b = new StringBuilder();
|
||||||
|
b.append("VariantContextTestData: [");
|
||||||
|
for ( VariantContext vc : vcs ) {
|
||||||
|
b.append(vc.toString()).append(" ----- ");
|
||||||
|
}
|
||||||
|
b.append("]");
|
||||||
|
return b.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static VariantContextBuilder builder() {
|
private final static VariantContextBuilder builder() {
|
||||||
|
|
@ -143,6 +151,9 @@ public class VariantContextTestProvider {
|
||||||
metaData.add(new VCFInfoHeaderLine("INT3", 3, VCFHeaderLineType.Integer, "x"));
|
metaData.add(new VCFInfoHeaderLine("INT3", 3, VCFHeaderLineType.Integer, "x"));
|
||||||
metaData.add(new VCFInfoHeaderLine("INT20", 20, VCFHeaderLineType.Integer, "x"));
|
metaData.add(new VCFInfoHeaderLine("INT20", 20, VCFHeaderLineType.Integer, "x"));
|
||||||
|
|
||||||
|
|
||||||
|
metaData.add(new VCFInfoHeaderLine("INT.VAR", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
|
||||||
|
|
||||||
add(builder().attribute("FLOAT1", 1.0));
|
add(builder().attribute("FLOAT1", 1.0));
|
||||||
add(builder().attribute("FLOAT1", 100.0));
|
add(builder().attribute("FLOAT1", 100.0));
|
||||||
add(builder().attribute("FLOAT1", 1000.0));
|
add(builder().attribute("FLOAT1", 1000.0));
|
||||||
|
|
@ -169,7 +180,11 @@ public class VariantContextTestProvider {
|
||||||
metaData.add(new VCFInfoHeaderLine("STRING3", 3, VCFHeaderLineType.String, "x"));
|
metaData.add(new VCFInfoHeaderLine("STRING3", 3, VCFHeaderLineType.String, "x"));
|
||||||
metaData.add(new VCFInfoHeaderLine("STRING20", 20, VCFHeaderLineType.String, "x"));
|
metaData.add(new VCFInfoHeaderLine("STRING20", 20, VCFHeaderLineType.String, "x"));
|
||||||
|
|
||||||
addGenotypesData(new ArrayList<VariantContextTestData>(TEST_DATAs), metaData);
|
metaData.add(new VCFInfoHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "Genotype Quality"));
|
||||||
|
metaData.add(new VCFInfoHeaderLine("PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||||
|
|
||||||
|
addGenotypesToTestData();
|
||||||
|
|
||||||
// prep the header
|
// prep the header
|
||||||
metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
|
metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
|
||||||
|
|
@ -177,24 +192,151 @@ public class VariantContextTestProvider {
|
||||||
header = new VCFHeader(metaData);
|
header = new VCFHeader(metaData);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void addGenotypesData(final ArrayList<VariantContextTestData> sites, Set<VCFHeaderLine> metaData) {
|
private static void addGenotypesToTestData() {
|
||||||
// TODO
|
final ArrayList<VariantContext> sites = new ArrayList<VariantContext>();
|
||||||
|
|
||||||
|
sites.add(builder().alleles("A").make());
|
||||||
|
sites.add(builder().alleles("A", "C", "T").make());
|
||||||
|
sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make());
|
||||||
|
sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make());
|
||||||
|
|
||||||
|
for ( VariantContext site : sites ) {
|
||||||
|
addGenotypes(site);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addGenotypeTests( final VariantContext site, Genotype ... genotypes ) {
|
||||||
// for each sites VC, we are going to add create two root genotypes.
|
// for each sites VC, we are going to add create two root genotypes.
|
||||||
// The first is the primary, and will be added to each new test
|
// The first is the primary, and will be added to each new test
|
||||||
// The second is variable. In some tests it's absent (testing 1 genotype), in others it is duplicated
|
// The second is variable. In some tests it's absent (testing 1 genotype), in others it is duplicated
|
||||||
// 1 once, 10, 100, or 1000 times to test scaling
|
// 1 once, 10, 100, or 1000 times to test scaling
|
||||||
// Also, create a "missing" genotype (corresponding to a . sample) in the VCF for inclusion as well.
|
|
||||||
|
|
||||||
// test GT
|
final VariantContextBuilder builder = new VariantContextBuilder(site);
|
||||||
|
|
||||||
// test GQ
|
// add a single context
|
||||||
|
builder.genotypes(genotypes[0]);
|
||||||
|
add(builder);
|
||||||
|
|
||||||
|
if ( genotypes.length > 1 ) {
|
||||||
|
// add all
|
||||||
|
add(builder.genotypes(Arrays.asList(genotypes)));
|
||||||
|
|
||||||
|
// add all with the last replicated 10x, 100x, 1000x times
|
||||||
|
for ( int nCopiesOfLast : Arrays.asList(10, 100, 1000) ) {
|
||||||
|
final GenotypesContext gc = new GenotypesContext();
|
||||||
|
final Genotype last = genotypes[genotypes.length-1];
|
||||||
|
for ( int i = 0; i < genotypes.length - 1; i++ )
|
||||||
|
gc.add(genotypes[i]);
|
||||||
|
for ( int i = 0; i < nCopiesOfLast; i++ )
|
||||||
|
gc.add(new Genotype("copy" + i, last));
|
||||||
|
add(builder.genotypes(gc));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static void addGenotypes( final VariantContext site) {
|
||||||
|
final GenotypesContext gc = new GenotypesContext();
|
||||||
|
|
||||||
|
// test ref/ref
|
||||||
|
final Allele ref = site.getReference();
|
||||||
|
final Allele alt1 = site.getNAlleles() > 1 ? site.getAlternateAllele(0) : null;
|
||||||
|
final Genotype homRef = new Genotype("homRef", Arrays.asList(ref, ref));
|
||||||
|
addGenotypeTests(site, homRef);
|
||||||
|
|
||||||
|
if ( alt1 != null ) {
|
||||||
|
final Genotype het = new Genotype("het", Arrays.asList(ref, alt1));
|
||||||
|
final Genotype homVar = new Genotype("homVar", Arrays.asList(alt1, alt1));
|
||||||
|
addGenotypeTests(site, homRef, het);
|
||||||
|
addGenotypeTests(site, homRef, het, homVar);
|
||||||
|
|
||||||
|
// ploidy
|
||||||
|
if ( ADVANCED_TESTS ) {
|
||||||
|
addGenotypeTests(site,
|
||||||
|
new Genotype("dip", Arrays.asList(ref, alt1)),
|
||||||
|
new Genotype("hap", Arrays.asList(ref)));
|
||||||
|
|
||||||
|
addGenotypeTests(site,
|
||||||
|
new Genotype("dip", Arrays.asList(ref, alt1)),
|
||||||
|
new Genotype("tet", Arrays.asList(ref, alt1, alt1)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ADVANCED_TESTS ) {
|
||||||
|
// testing PLs
|
||||||
|
addGenotypeTests(site,
|
||||||
|
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{0, -1, -2}),
|
||||||
|
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3}));
|
||||||
|
|
||||||
|
addGenotypeTests(site,
|
||||||
|
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}),
|
||||||
|
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3}));
|
||||||
|
|
||||||
|
addGenotypeTests(site,
|
||||||
|
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}),
|
||||||
|
new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2000, -1000}));
|
||||||
|
|
||||||
|
addGenotypeTests(site, // missing PLs
|
||||||
|
new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}),
|
||||||
|
new Genotype("g2", Arrays.asList(ref, ref), -1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// test attributes
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "INT1", 1),
|
||||||
|
attr("g2", ref, "INT1", 2));
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "INT1", 1),
|
||||||
|
attr("g2", ref, "INT1"));
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "INT3", 1, 2, 3),
|
||||||
|
attr("g2", ref, "INT3", 4, 5, 6));
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "INT3", 1, 2, 3),
|
||||||
|
attr("g2", ref, "INT3"));
|
||||||
|
|
||||||
|
if ( ADVANCED_TESTS ) {
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "INT.VAR", 1, 2, 3),
|
||||||
|
attr("g2", ref, "INT.VAR", 4, 5),
|
||||||
|
attr("g3", ref, "INT.VAR", 6));
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "INT.VAR", 1, 2, 3),
|
||||||
|
attr("g2", ref, "INT.VAR"),
|
||||||
|
attr("g3", ref, "INT.VAR", 5));
|
||||||
|
}
|
||||||
|
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "FLOAT1", 1.0),
|
||||||
|
attr("g2", ref, "FLOAT1", 2.0));
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "FLOAT1", 1.0),
|
||||||
|
attr("g2", ref, "FLOAT1"));
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0),
|
||||||
|
attr("g2", ref, "FLOAT3", 4.0, 5.0, 6.0));
|
||||||
|
addGenotypeTests(site,
|
||||||
|
attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0),
|
||||||
|
attr("g2", ref, "FLOAT3"));
|
||||||
|
|
||||||
// test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample
|
// test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Genotype attr(final String name, final Allele ref, final String key, final Object ... value) {
|
||||||
|
if ( value.length == 0 )
|
||||||
|
return new Genotype(name, Arrays.asList(ref, ref), -1);
|
||||||
|
else {
|
||||||
|
final Object toAdd = value.length == 1 ? value[0] : Arrays.asList(value);
|
||||||
|
Map<String, Object> attr = Collections.singletonMap(key, toAdd);
|
||||||
|
return new Genotype(name, Arrays.asList(ref, ref), -1, null, attr, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static VCFHeader getHeader() {
|
private static VCFHeader getHeader(final List<VariantContext> vcs) {
|
||||||
return header;
|
final Set<String> samples = new HashSet<String>();
|
||||||
|
for ( final VariantContext vc : vcs )
|
||||||
|
samples.addAll(vc.getSampleNames());
|
||||||
|
return new VCFHeader(header.getMetaData(), samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<VariantContextTestData> generateSiteTests() {
|
public static List<VariantContextTestData> generateSiteTests() {
|
||||||
|
|
@ -210,7 +352,7 @@ public class VariantContextTestProvider {
|
||||||
// write
|
// write
|
||||||
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||||
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
|
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
|
||||||
writer.writeHeader(VariantContextTestProvider.getHeader());
|
writer.writeHeader(VariantContextTestProvider.getHeader(data.vcs));
|
||||||
final List<VariantContext> expected = data.vcs;
|
final List<VariantContext> expected = data.vcs;
|
||||||
for ( VariantContext vc : expected )
|
for ( VariantContext vc : expected )
|
||||||
writer.add(vc);
|
writer.add(vc);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue