Initial version of clean, fast formatting routines built dynamically from a VCF header

-- BCFFieldEncoder and writers divide up the task of formatting values (atomic or vector, ints, strings, floats, etc) from the task of writing these out at the sites or genotypes level.
-- Allows us to create efficient encoders for specific combinations of header fields, such as int[] encoded values with exactly 3 values
-- Currently only used for INFO fields, but subsequent commit will include optimized genotype field encoder
-- Allowed us to naturally support encoding of lists of strings
-- Bugfixes in VariantContextUtils introduced in genotype -> genotypebuilder conversion
-- Fixes for integration test failures
-- Enabling contig updates
-- WalkerTest now prints out relative paths where possible to make cut/paste/run easier
This commit is contained in:
Mark DePristo 2012-06-10 10:53:51 -04:00
parent 51a3b6e25e
commit 2a86b81a3f
12 changed files with 546 additions and 56 deletions

View File

@ -51,7 +51,7 @@ import java.util.List;
* @version 0.1
*/
public class VariantContextWriterStub implements Stub<VariantContextWriter>, VariantContextWriter {
public final static boolean UPDATE_CONTIG_HEADERS = false;
public final static boolean UPDATE_CONTIG_HEADERS = true;
/**
* The engine, central to the GATK's processing.

View File

@ -326,7 +326,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
else {
originalAttributes.put("OG",".");
}
Genotype imputedGenotype = new GenotypeBuilder(g.getSampleName(), alleles).log10PError(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
Genotype imputedGenotype = new GenotypeBuilder(g).alleles(alleles).log10PError(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
beagleVarCounts++;
}

View File

@ -730,7 +730,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
//Set genotype to no call if it falls in the fraction.
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).GQ(-1).make());
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
}
else{
genotypes.add(genotype);

View File

@ -346,6 +346,16 @@ public final class GenotypeBuilder {
return this;
}
/**
* Tells this builder to remove all extended attributes
*
* @return
*/
public GenotypeBuilder noAttributes() {
this.extendedAttributes = null;
return this;
}
/**
* This genotype has this attribute key / value pair.
*

View File

@ -462,9 +462,10 @@ public class VariantContextUtils {
// Genotypes
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes() ) {
// TODO -- fixme
//Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
//genotypes.add(new GenotypeBuilder(g).attributes(genotypeAttributes).make());
final GenotypeBuilder gb = new GenotypeBuilder(g);
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
gb.noAD().noDP().noPL().noAttributes();
genotypes.add(gb.make());
}
return builder.genotypes(genotypes).attributes(attributes);

View File

@ -0,0 +1,233 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext.writer;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
*
*
* @author Your Name
* @since Date created
*/
public abstract class BCF2FieldEncoder {
final VCFCompoundHeaderLine headerLine;
final BCF2Type fixedType;
final int dictionaryOffset;
final BCF2Type dictionaryOffsetType;
public BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict, final BCF2Type fixedType) {
this.headerLine = headerLine;
this.fixedType = fixedType;
final Integer offset = dict.get(getField());
if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + getField() + " in header as required by BCF");
this.dictionaryOffset = offset;
dictionaryOffsetType = BCF2Utils.determineIntegerType(offset);
}
public VCFHeaderLineCount getCountType() {
return headerLine.getCountType();
}
public VCFCompoundHeaderLine getHeaderLine() {
return headerLine;
}
public boolean hasFixedCount() { return getCountType() == VCFHeaderLineCount.INTEGER; }
public boolean hasUnboundedCount() { return getCountType() == VCFHeaderLineCount.UNBOUNDED; }
public boolean hasContextDeterminedCount() { return ! hasFixedCount() && ! hasUnboundedCount(); }
@Requires("hasFixedCount()")
public int getFixedCount() { return headerLine.getCount(); }
public int getContextDeterminedCount(final VariantContext vc) {
return headerLine.getCount(vc.getNAlleles() - 1);
}
public int getBCFFieldCount(final VariantContext vc, final Object value) {
if ( hasFixedCount() )
return getFixedCount();
else if ( hasUnboundedCount() )
return value instanceof List ? ((List) value).size() : 1;
else
return getContextDeterminedCount(vc);
}
public String getField() { return headerLine.getID(); }
public int getDictionaryOffset() { return dictionaryOffset; }
public BCF2Type getDictionaryOffsetType() { return dictionaryOffsetType; }
public boolean isFixedTyped() { return ! isDynamicallyTyped(); }
public boolean isDynamicallyTyped() { return fixedType == null; }
public BCF2Type getType(final Object value) { return isDynamicallyTyped() ? getDynamicType(value) : getFixedType(); }
public BCF2Type getFixedType() {
if ( fixedType != null )
return fixedType;
else
throw new ReviewedStingException("Not a fixed type encoder: " + getField());
}
public BCF2Type getDynamicType(final Object value) { throw new ReviewedStingException("Function getDynamicType() not implemented"); }
@Override
public String toString() {
return "BCF2FieldEncoder for " + getField() + " with count " + getCountType() + " encoded with " + getClass().getSimpleName();
}
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException;
/**
* Helper function that takes an object and returns a list representation
* of it:
*
* o == null => []
* o is a list => o
* else => [o]
*
* @param o
* @return
*/
private final static <T> List<T> toList(final Class<T> c, final Object o) {
if ( o == null ) return Collections.emptyList();
else if ( o instanceof List ) return (List<T>)o;
else return Collections.singletonList((T)o);
}
public static class StringOrCharacter extends BCF2FieldEncoder {
public StringOrCharacter(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
super(headerLine, encoder, dict, BCF2Type.CHAR);
}
@Override
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
if ( value != null ) {
final String s = encodeString(value);
encoder.encodeString(s, s.length());
}
}
@Override
public int getBCFFieldCount(final VariantContext vc, final Object value) {
return value == null ? 0 : encodeString(value).length();
}
private String encodeString(final Object value) {
return value instanceof List ? BCF2Utils.collapseStringList((List<String>)value) : (String)value;
}
}
public static class Flag extends BCF2FieldEncoder {
public Flag(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
super(headerLine, encoder, dict, BCF2Type.INT8);
if ( getHeaderLine().getCount() != 0 )
throw new ReviewedStingException("Flag encoder only suppports atomic flags!");
}
@Override
public int getFixedCount() {
return 1; // the header says 0 but we will write 1 value
}
@Override
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
encoder.encodePrimitive(1, getFixedType());
}
}
public static class Float extends BCF2FieldEncoder {
public Float(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
super(headerLine, encoder, dict, BCF2Type.FLOAT);
}
@Override
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
final List<Double> doubles = toList(Double.class, value);
for ( final double d : doubles )
encoder.encodeRawFloat(d);
}
}
public static class IntArray extends BCF2FieldEncoder {
public IntArray(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
super(headerLine, encoder, dict, null);
}
@Override
public BCF2Type getDynamicType(final Object value) {
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((int[])value);
}
@Override
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
for ( final int i : (int[])value )
encoder.encodeRawInt(i, type);
}
}
public static class IntList extends BCF2FieldEncoder {
public IntList(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
super(headerLine, encoder, dict, null);
}
@Override
public BCF2Type getDynamicType(final Object value) {
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType(toList(Integer.class, value));
}
@Override
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
for ( final int i : toList(Integer.class, value) )
encoder.encodeRawInt(i, type);
}
}
public static class AtomicInt extends BCF2FieldEncoder {
public AtomicInt(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
super(headerLine, encoder, dict, null);
}
@Override
public BCF2Type getDynamicType(final Object value) {
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((Integer)value);
}
@Override
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
encoder.encodeRawInt((Integer)value, type);
}
}
}

View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext.writer;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.IOException;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public abstract class BCF2FieldWriter {
private final BCF2FieldEncoder fieldEncoder;
protected BCF2FieldWriter(final BCF2FieldEncoder fieldEncoder) {
this.fieldEncoder = fieldEncoder;
}
protected BCF2FieldEncoder getFieldEncoder() {
return fieldEncoder;
}
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
encoder.encodeTyped(fieldEncoder.getDictionaryOffset(), fieldEncoder.getDictionaryOffsetType());
}
public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { }
@Override
public String toString() {
return "BCF2FieldWriter " + getClass().getSimpleName() + " with encoder " + getFieldEncoder();
}
public static abstract class SiteWriter extends BCF2FieldWriter {
protected SiteWriter(final BCF2FieldEncoder fieldEncoder) {
super(fieldEncoder);
}
public abstract void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException;
}
public static class GenericSiteWriter extends SiteWriter {
public GenericSiteWriter(final BCF2FieldEncoder fieldEncoder) {
super(fieldEncoder);
}
@Override
public void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
final Object rawValue = vc.getAttribute(getFieldEncoder().getField(), null);
final BCF2Type type = getFieldEncoder().getType(rawValue);
if ( rawValue == null ) {
// the value is missing, just write in null
encoder.encodeType(0, type);
} else {
final int valueCount = getFieldEncoder().getBCFFieldCount(vc, rawValue);
encoder.encodeType(valueCount, type);
getFieldEncoder().encodeValue(encoder, rawValue, type);
}
}
}
}

View File

@ -0,0 +1,113 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext.writer;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.HashMap;
import java.util.Map;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public class BCF2FieldWriterManager {
final protected static Logger logger = Logger.getLogger(BCF2FieldWriterManager.class);
final Map<String, BCF2FieldWriter.SiteWriter> siteWriters = new HashMap<String, BCF2FieldWriter.SiteWriter>();
public BCF2FieldWriterManager() { }
public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> dictionary) {
for (final VCFHeaderLine line : header.getMetaData()) {
if ( line instanceof VCFInfoHeaderLine ) {
final String field = ((VCFInfoHeaderLine) line).getID();
final BCF2FieldWriter.SiteWriter writer = createInfoWriter((VCFInfoHeaderLine)line, encoder, dictionary);
logger.info("Installing for field " + field + " field writer " + writer);
siteWriters.put(field, writer);
}
}
}
private BCF2FieldWriter.SiteWriter createInfoWriter(final VCFInfoHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict) {
BCF2FieldEncoder fieldEncoder = null;
switch ( line.getType() ) {
case Character:
case String:
fieldEncoder = new BCF2FieldEncoder.StringOrCharacter(line, encoder, dict);
break;
case Flag:
fieldEncoder = new BCF2FieldEncoder.Flag(line, encoder, dict);
break;
case Float:
fieldEncoder = new BCF2FieldEncoder.Float(line, encoder, dict);
break;
case Integer:
if ( line.getCountType() == VCFHeaderLineCount.INTEGER && line.getCount() == 1 )
fieldEncoder = new BCF2FieldEncoder.AtomicInt(line, encoder, dict);
else
fieldEncoder = new BCF2FieldEncoder.IntList(line, encoder, dict);
break;
default:
throw new ReviewedStingException("Unexpected type for field " + line.getID());
}
return new BCF2FieldWriter.GenericSiteWriter(fieldEncoder);
}
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String key) {
final BCF2FieldWriter.SiteWriter writer = siteWriters.get(key);
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
return writer;
}
}

View File

@ -51,6 +51,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
final BCF2FieldWriterManager fieldManager = new BCF2FieldWriterManager();
public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
@ -80,6 +81,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
stringDictionaryMap.put(dict.get(i), i);
}
// setup the field encodings
fieldManager.setup(header, encoder, stringDictionaryMap);
try {
// write out the header into a byte stream, get it's length, and write everything to the file
final ByteArrayOutputStream capture = new ByteArrayOutputStream();
@ -225,10 +229,15 @@ class BCF2Writer extends IndexingVariantContextWriter {
private void buildInfo( VariantContext vc ) throws IOException {
for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
final String key = infoFieldEntry.getKey();
final VCFToBCFEncoding encoding = prepFieldValueForEncoding(key, infoFieldEntry.getValue());
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key);
writer.start(encoder, vc);
writer.site(encoder, vc);
writer.done(encoder, vc);
encodeStringByRef(key);
encoder.encodeTyped(encoding.valuesToEncode, encoding.BCF2Type);
// the old way of doing things
// final VCFToBCFEncoding encoding = prepFieldValueForEncoding(key, infoFieldEntry.getValue());
// encodeStringByRef(key);
// encoder.encodeTyped(encoding.valuesToEncode, encoding.BCF2Type);
}
}
@ -278,9 +287,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
BCF2Type intType;
if ( isList ) {
l = (List<Integer>)value;
intType = encoder.determineIntegerType(l);
intType = BCF2Utils.determineIntegerType(l);
} else if ( value != null ) {
intType = encoder.determineIntegerType((Integer)value);
intType = BCF2Utils.determineIntegerType((Integer) value);
l = Collections.singletonList((Integer)value);
} else {
intType = BCF2Type.INT8;
@ -417,7 +426,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
}
// determine the best size
final BCF2Type type = encoder.determineIntegerType(allPLs);
final BCF2Type type = BCF2Utils.determineIntegerType(allPLs);
startGenotypeField(field, numPLs, type);
for ( int pl : allPLs )
encoder.encodePrimitive(pl == -1 ? type.getMissingBytes() : pl, type);
@ -495,7 +504,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
private final BCF2Type encodeStringByRef(final String string) throws IOException {
final Integer offset = stringDictionaryMap.get(string);
if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + string + " in header as required by BCF");
final BCF2Type type = encoder.determineIntegerType(offset);
final BCF2Type type = BCF2Utils.determineIntegerType(offset);
encoder.encodeTyped(offset, type);
return type;
}
@ -516,7 +525,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
offsets.add(offset);
if ( maxType != BCF2Type.INT32) { // don't bother looking if we already are at 32 bit ints
final BCF2Type type1 = encoder.determineIntegerType(offset);
final BCF2Type type1 = BCF2Utils.determineIntegerType(offset);
switch ( type1 ) {
case INT8: break;
case INT16: if ( maxType == BCF2Type.INT8 ) maxType = BCF2Type.INT16; break;

View File

@ -87,8 +87,10 @@ public abstract class BaseTest {
private static final String networkTempDir;
private static final File networkTempDirFile;
public static final File testDirFile = new File("public/testdata/");
protected static final String testDirRelative = "public/testdata/";
public static final File testDirFile = new File(testDirRelative);
public static final String testDir = testDirFile.getAbsolutePath() + "/";
protected static final String testDirRoot = testDirFile.getPath().replace(testDirRelative, "");
public static final String keysDataLocation = validationDataLocation + "keys/";
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";

View File

@ -354,7 +354,9 @@ public class WalkerTest extends BaseTest {
final String now = new SimpleDateFormat("HH:mm:ss").format(new Date());
final String cmdline = Utils.join(" ",command);
System.out.println(String.format("[%s] Executing test %s with GATK arguments: %s", now, name, cmdline));
BaseTest.log(cmdline); // also write the command line to the HTML log for convenient follow-up
// also write the command line to the HTML log for convenient follow-up
// do the replaceAll so paths become relative to the current
BaseTest.log(cmdline.replaceAll(testDirRoot, ""));
CommandLineExecutable.start(instance, command);
} catch (Exception e) {
gotAnException = true;

View File

@ -55,6 +55,8 @@ public class VariantContextTestProvider {
final private static boolean ENABLE_PLOIDY_TESTS = true;
final private static boolean ENABLE_PL_TESTS = true;
final private static boolean ENABLE_SOURCE_VCF_TESTS = true;
final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = false;
private static VCFHeader syntheticHeader;
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
private static VariantContext ROOT;
@ -160,6 +162,7 @@ public class VariantContextTestProvider {
metaData.add(new VCFInfoHeaderLine("STRING1", 1, VCFHeaderLineType.String, "x"));
metaData.add(new VCFInfoHeaderLine("STRING3", 3, VCFHeaderLineType.String, "x"));
metaData.add(new VCFInfoHeaderLine("STRING20", 20, VCFHeaderLineType.String, "x"));
metaData.add(new VCFInfoHeaderLine("VAR.INFO.STRING", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "x"));
metaData.add(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype"));
metaData.add(new VCFFormatHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "Genotype Quality"));
@ -180,7 +183,7 @@ public class VariantContextTestProvider {
metaData.add(new VCFInfoHeaderLine("INT.VAR", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
metaData.add(new VCFInfoHeaderLine("FLOAT1", 1, VCFHeaderLineType.Float, "x"));
metaData.add(new VCFInfoHeaderLine("FLOAT3", 3, VCFHeaderLineType.Float, "x"));
metaData.add(new VCFInfoHeaderLine("FLAG", 1, VCFHeaderLineType.Flag, "x"));
metaData.add(new VCFInfoHeaderLine("FLAG", 0, VCFHeaderLineType.Flag, "x"));
syntheticHeader = new VCFHeader(metaData);
}
@ -246,6 +249,11 @@ public class VariantContextTestProvider {
add(builder().attribute("STRING3", null));
add(builder().attribute("STRING20", Arrays.asList("s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20")));
add(builder().attribute("VAR.INFO.STRING", "s1"));
add(builder().attribute("VAR.INFO.STRING", Arrays.asList("s1", "s2")));
add(builder().attribute("VAR.INFO.STRING", Arrays.asList("s1", "s2", "s3")));
add(builder().attribute("VAR.INFO.STRING", null));
addGenotypesToTestData();
addComplexGenotypesTest();
@ -390,51 +398,53 @@ public class VariantContextTestProvider {
attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0),
attr("g2", ref, "FLOAT3"));
//
//
// TESTING MULTIPLE SIZED LISTS IN THE GENOTYPE FIELD
//
//
addGenotypeTests(site,
attr("g1", ref, "GS", Arrays.asList("S1", "S2")),
attr("g2", ref, "GS", Arrays.asList("S3", "S4")));
if (ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS) {
//
//
// TESTING MULTIPLE SIZED LISTS IN THE GENOTYPE FIELD
//
//
addGenotypeTests(site,
attr("g1", ref, "GS", Arrays.asList("S1", "S2")),
attr("g2", ref, "GS", Arrays.asList("S3", "S4")));
addGenotypeTests(site, // g1 is missing the string, and g2 is missing FLOAT1
attr("g1", ref, "FLOAT1", 1.0),
attr("g2", ref, "GS", Arrays.asList("S3", "S4")));
addGenotypeTests(site, // g1 is missing the string, and g2 is missing FLOAT1
attr("g1", ref, "FLOAT1", 1.0),
attr("g2", ref, "GS", Arrays.asList("S3", "S4")));
// variable sized lists
addGenotypeTests(site,
attr("g1", ref, "GV", Arrays.asList("S1")),
attr("g2", ref, "GV", Arrays.asList("S3", "S4")));
// variable sized lists
addGenotypeTests(site,
attr("g1", ref, "GV", Arrays.asList("S1")),
attr("g2", ref, "GV", Arrays.asList("S3", "S4")));
addGenotypeTests(site,
attr("g1", ref, "GV", Arrays.asList("S1", "S2")),
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
addGenotypeTests(site,
attr("g1", ref, "GV", Arrays.asList("S1", "S2")),
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
addGenotypeTests(site, // missing value in varlist of string
attr("g1", ref, "FLOAT1", 1.0),
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
addGenotypeTests(site, // missing value in varlist of string
attr("g1", ref, "FLOAT1", 1.0),
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
//
//
// TESTING GENOTYPE FILTERS
//
//
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).filters("X").make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X", "Y").make());
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make(),
new GenotypeBuilder("g3", Arrays.asList(ref, ref)).filters("X", "Y").make());
//
//
// TESTING GENOTYPE FILTERS
//
//
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).filters("X").make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X", "Y").make());
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make(),
new GenotypeBuilder("g3", Arrays.asList(ref, ref)).filters("X", "Y").make());
}
// TODO -- test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample
}