From b4a5acd6f443b62aded0d0e82a34aead94acedad Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 15 May 2012 16:34:29 -0400 Subject: [PATCH] Added some genotype tests for BCF2, which all pass. Of course that's because I commented out the ones that didn't --- .../sting/utils/codecs/bcf2/BCF2Decoder.java | 2 +- .../VariantContextTestProvider.java | 166 ++++++++++++++++-- 2 files changed, 155 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index 1d5f8b885..1cf5370b0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -160,7 +160,7 @@ public class BCF2Decoder { case INT8: case INT16: case INT32: return value; - case FLOAT: return (double)rawFloatToFloat(value); + case FLOAT: return rawFloatToFloat(value); case CHAR: return value & 0xFF; // TODO -- I cannot imagine why we'd get here, as string needs to be special cased default: throw new ReviewedStingException("BCF2 codec doesn't know how to decode type " + type ); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 0ca465e5d..9499160a2 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -31,13 +31,10 @@ import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.writer.Options; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.testng.Assert; -import org.testng.annotations.DataProvider; import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStream; import java.util.*; /** @@ -47,6 +44,7 @@ import java.util.*; * @since Date created */ public class VariantContextTestProvider { + final private static boolean ADVANCED_TESTS = false; final static VCFHeader header; final static List TEST_DATAs = new ArrayList(); final static VariantContext ROOT; @@ -86,6 +84,16 @@ public class VariantContextTestProvider { public boolean hasGenotypes() { return vcs.get(0).hasGenotypes(); } + + public String toString() { + StringBuilder b = new StringBuilder(); + b.append("VariantContextTestData: ["); + for ( VariantContext vc : vcs ) { + b.append(vc.toString()).append(" ----- "); + } + b.append("]"); + return b.toString(); + } } private final static VariantContextBuilder builder() { @@ -143,6 +151,9 @@ public class VariantContextTestProvider { metaData.add(new VCFInfoHeaderLine("INT3", 3, VCFHeaderLineType.Integer, "x")); metaData.add(new VCFInfoHeaderLine("INT20", 20, VCFHeaderLineType.Integer, "x")); + + metaData.add(new VCFInfoHeaderLine("INT.VAR", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x")); + add(builder().attribute("FLOAT1", 1.0)); add(builder().attribute("FLOAT1", 100.0)); add(builder().attribute("FLOAT1", 1000.0)); @@ -169,7 +180,11 @@ public class VariantContextTestProvider { metaData.add(new VCFInfoHeaderLine("STRING3", 3, VCFHeaderLineType.String, "x")); metaData.add(new VCFInfoHeaderLine("STRING20", 20, VCFHeaderLineType.String, "x")); - addGenotypesData(new ArrayList(TEST_DATAs), metaData); + metaData.add(new VCFInfoHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype")); + metaData.add(new VCFInfoHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "Genotype Quality")); + metaData.add(new VCFInfoHeaderLine("PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); + + addGenotypesToTestData(); // prep the header metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0)); @@ -177,24 +192,151 @@ public class VariantContextTestProvider { header = new VCFHeader(metaData); } - private static void addGenotypesData(final ArrayList sites, Set metaData) { - // TODO + private static void addGenotypesToTestData() { + final ArrayList sites = new ArrayList(); + + sites.add(builder().alleles("A").make()); + sites.add(builder().alleles("A", "C", "T").make()); + sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make()); + sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make()); + + for ( VariantContext site : sites ) { + addGenotypes(site); + } + } + + private static void addGenotypeTests( final VariantContext site, Genotype ... genotypes ) { // for each sites VC, we are going to add create two root genotypes. // The first is the primary, and will be added to each new test // The second is variable. In some tests it's absent (testing 1 genotype), in others it is duplicated // 1 once, 10, 100, or 1000 times to test scaling - // Also, create a "missing" genotype (corresponding to a . sample) in the VCF for inclusion as well. - // test GT + final VariantContextBuilder builder = new VariantContextBuilder(site); - // test GQ + // add a single context + builder.genotypes(genotypes[0]); + add(builder); + + if ( genotypes.length > 1 ) { + // add all + add(builder.genotypes(Arrays.asList(genotypes))); + + // add all with the last replicated 10x, 100x, 1000x times + for ( int nCopiesOfLast : Arrays.asList(10, 100, 1000) ) { + final GenotypesContext gc = new GenotypesContext(); + final Genotype last = genotypes[genotypes.length-1]; + for ( int i = 0; i < genotypes.length - 1; i++ ) + gc.add(genotypes[i]); + for ( int i = 0; i < nCopiesOfLast; i++ ) + gc.add(new Genotype("copy" + i, last)); + add(builder.genotypes(gc)); + } + } + } + + + private static void addGenotypes( final VariantContext site) { + final GenotypesContext gc = new GenotypesContext(); + + // test ref/ref + final Allele ref = site.getReference(); + final Allele alt1 = site.getNAlleles() > 1 ? site.getAlternateAllele(0) : null; + final Genotype homRef = new Genotype("homRef", Arrays.asList(ref, ref)); + addGenotypeTests(site, homRef); + + if ( alt1 != null ) { + final Genotype het = new Genotype("het", Arrays.asList(ref, alt1)); + final Genotype homVar = new Genotype("homVar", Arrays.asList(alt1, alt1)); + addGenotypeTests(site, homRef, het); + addGenotypeTests(site, homRef, het, homVar); + + // ploidy + if ( ADVANCED_TESTS ) { + addGenotypeTests(site, + new Genotype("dip", Arrays.asList(ref, alt1)), + new Genotype("hap", Arrays.asList(ref))); + + addGenotypeTests(site, + new Genotype("dip", Arrays.asList(ref, alt1)), + new Genotype("tet", Arrays.asList(ref, alt1, alt1))); + } + } + + if ( ADVANCED_TESTS ) { + // testing PLs + addGenotypeTests(site, + new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{0, -1, -2}), + new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3})); + + addGenotypeTests(site, + new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}), + new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2, -3})); + + addGenotypeTests(site, + new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}), + new Genotype("g2", Arrays.asList(ref, ref), -1, new double[]{0, -2000, -1000})); + + addGenotypeTests(site, // missing PLs + new Genotype("g1", Arrays.asList(ref, ref), -1, new double[]{-1, 0, -2}), + new Genotype("g2", Arrays.asList(ref, ref), -1)); + } + + // test attributes + addGenotypeTests(site, + attr("g1", ref, "INT1", 1), + attr("g2", ref, "INT1", 2)); + addGenotypeTests(site, + attr("g1", ref, "INT1", 1), + attr("g2", ref, "INT1")); + addGenotypeTests(site, + attr("g1", ref, "INT3", 1, 2, 3), + attr("g2", ref, "INT3", 4, 5, 6)); + addGenotypeTests(site, + attr("g1", ref, "INT3", 1, 2, 3), + attr("g2", ref, "INT3")); + + if ( ADVANCED_TESTS ) { + addGenotypeTests(site, + attr("g1", ref, "INT.VAR", 1, 2, 3), + attr("g2", ref, "INT.VAR", 4, 5), + attr("g3", ref, "INT.VAR", 6)); + addGenotypeTests(site, + attr("g1", ref, "INT.VAR", 1, 2, 3), + attr("g2", ref, "INT.VAR"), + attr("g3", ref, "INT.VAR", 5)); + } + + addGenotypeTests(site, + attr("g1", ref, "FLOAT1", 1.0), + attr("g2", ref, "FLOAT1", 2.0)); + addGenotypeTests(site, + attr("g1", ref, "FLOAT1", 1.0), + attr("g2", ref, "FLOAT1")); + addGenotypeTests(site, + attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), + attr("g2", ref, "FLOAT3", 4.0, 5.0, 6.0)); + addGenotypeTests(site, + attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0), + attr("g2", ref, "FLOAT3")); // test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample } + private static Genotype attr(final String name, final Allele ref, final String key, final Object ... value) { + if ( value.length == 0 ) + return new Genotype(name, Arrays.asList(ref, ref), -1); + else { + final Object toAdd = value.length == 1 ? value[0] : Arrays.asList(value); + Map attr = Collections.singletonMap(key, toAdd); + return new Genotype(name, Arrays.asList(ref, ref), -1, null, attr, false); + } + } - public static VCFHeader getHeader() { - return header; + private static VCFHeader getHeader(final List vcs) { + final Set samples = new HashSet(); + for ( final VariantContext vc : vcs ) + samples.addAll(vc.getSampleNames()); + return new VCFHeader(header.getMetaData(), samples); } public static List generateSiteTests() { @@ -210,7 +352,7 @@ public class VariantContextTestProvider { // write final EnumSet options = EnumSet.of(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = tester.makeWriter(tmpFile, options); - writer.writeHeader(VariantContextTestProvider.getHeader()); + writer.writeHeader(VariantContextTestProvider.getHeader(data.vcs)); final List expected = data.vcs; for ( VariantContext vc : expected ) writer.add(vc);