First step in major cleanup/redo of VCF functionality. Specifically, now:

a) VCF track name can work again with 3.3 or 4.0 VCF's when specifying -B name,VCF,file. Code will read header and parse automatically the version. 
b) Old VCF codec is deprecated. Reader goes now direct from parsing VCF lines into producing VariantContext objects, with no intermediate VCF records. If anyone can't resist the urge to still input files using the old method, a new VCF3Codec is in place with the old code, but it will be eventually deleted.
c) VCF headers and VCF info fields no longer keep track of the version. They are parsed into an internal representation and will be output only in VCF4.0 format.
d) As a consequence, the existing GATK bug where files are produced with VCF4 body but VCF3.3 headers is solved.
e) Several VCF 4.0 writer bugs are now solved.
f) Integration test MD5's are changed, mostly because of corrected VCF4.0 headers and because validation data mostly uses now VCF4.0.
g) Several VCF files in the ValidationData/ directory have been converted to VCF 4.0 format. I kept the old versions, and the new versions have a .vcf4 extension.

Pending issues:
a) We are still not dealing with indels consistently or correctly when representing them. This will be a second part of the changes.
b) The VCF writer doesn't use VCFRecord but it does still use a lot of leftovers like VCFGenotypeEncoding, VCFGenotypeRecord, etc. This needs to be simplified and cleaned.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3813 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2010-07-16 22:49:16 +00:00
parent 75bea4881a
commit 55b756f1cc
24 changed files with 339 additions and 327 deletions

View File

@ -0,0 +1,124 @@
package org.broad.tribble.vcf;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.util.LineReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: Jul 13, 2010
* Time: 3:57:01 PM
* To change this template use File | Settings | File Templates.
*/
public class VCF3Codec implements FeatureCodec {
// we have to store the list of strings that make up the header until they're needed
private List<String> headerStrings = new ArrayList<String>();
private VCFHeader header = null;
private VCFHeaderVersion version = VCFHeaderVersion.VCF3_3;
// some classes need to transform the line before
private LineTransform transformer = null;
/**
* Fast path to get the location of the Feature for indexing
* @param line the input line to decode
* @return
*/
public Feature decodeLoc(String line) {
return reallyDecode(line, true);
}
/**
* Decode a line as a Feature.
*
* @param line
*
* @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is
* a comment)
*/
public Feature decode(String line) {
return reallyDecode(line, false);
}
private Feature reallyDecode(String line, boolean justLocationPlease ) {
// transform the line, if we have a transform to do
if (transformer != null) line = transformer.lineTransform(line);
if (line.startsWith("#"))
return null;
// make a VCFRecord of the line and return it
VCFRecord rec = VCFReaderUtils.createRecord(line, header, justLocationPlease);
if ( ! justLocationPlease ) rec.setHeader(header);
return rec;
}
/**
* Return the # of header lines for this file. We use this to parse out the header
*
* @return 0
*/
public int readHeader(LineReader reader) {
String line = "";
try {
while ((line = reader.readLine()) != null) {
if (line.startsWith("##")) {
headerStrings.add(line);
}
else if (line.startsWith("#")) {
headerStrings.add(line);
header = VCFReaderUtils.createHeader(headerStrings,version);
return headerStrings.size();
}
else {
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
}
}
} catch (IOException e) {
throw new RuntimeException("IO Exception ", e);
}
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
}
/**
* @return VCFRecord.class
*/
public Class getFeatureType() {
return VCFRecord.class;
}
public VCFHeader getHeader(Class clazz) throws ClassCastException {
if (!clazz.equals(VCFHeader.class))
throw new ClassCastException("Unable to cast to expected type " + clazz + " from type " + VCFHeader.class);
return header;
}
public static interface LineTransform {
public String lineTransform(String line);
}
public LineTransform getTransformer() {
return transformer;
}
public void setTransformer(LineTransform transformer) {
this.transformer = transformer;
}
public VCFHeaderVersion getVersion() {
return version;
}
public void setVersion(VCFHeaderVersion version) {
this.version = version;
}
}

View File

@ -4,6 +4,7 @@ import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.util.LineReader;
import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec;
import java.io.IOException;
import java.util.ArrayList;
@ -18,110 +19,4 @@ import java.util.List;
*
* The codec for VCF, which relies on VCFReaderUtils to do most of the processing
*/
public class VCFCodec implements FeatureCodec {
// we have to store the list of strings that make up the header until they're needed
private List<String> headerStrings = new ArrayList<String>();
private VCFHeader header = null;
private VCFHeaderVersion version = VCFHeaderVersion.VCF3_3;
// some classes need to transform the line before
private LineTransform transformer = null;
/**
* Fast path to get the location of the Feature for indexing
* @param line the input line to decode
* @return
*/
public Feature decodeLoc(String line) {
return reallyDecode(line, true);
}
/**
* Decode a line as a Feature.
*
* @param line
*
* @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is
* a comment)
*/
public Feature decode(String line) {
return reallyDecode(line, false);
}
private Feature reallyDecode(String line, boolean justLocationPlease ) {
// transform the line, if we have a transform to do
if (transformer != null) line = transformer.lineTransform(line);
if (line.startsWith("#"))
return null;
// make a VCFRecord of the line and return it
VCFRecord rec = VCFReaderUtils.createRecord(line, header, justLocationPlease);
if ( ! justLocationPlease ) rec.setHeader(header);
return rec;
}
/**
* Return the # of header lines for this file. We use this to parse out the header
*
* @return 0
*/
public int readHeader(LineReader reader) {
String line = "";
try {
while ((line = reader.readLine()) != null) {
if (line.startsWith("##")) {
if ( line.startsWith("##fileformat") && ! line.startsWith("##fileformat=VCFv3" ) )
throw new CodecLineParsingException("VCF codec can only parse VCF3 formated files. Your version line is " + line + ". If you want to parse VCF4, use VCF4 use VCF as the rod type");
headerStrings.add(line);
}
else if (line.startsWith("#")) {
headerStrings.add(line);
header = VCFReaderUtils.createHeader(headerStrings,version);
return headerStrings.size();
}
else {
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
}
}
} catch (IOException e) {
throw new RuntimeException("IO Exception ", e);
}
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
}
/**
* @return VCFRecord.class
*/
public Class getFeatureType() {
return VCFRecord.class;
}
public VCFHeader getHeader(Class clazz) throws ClassCastException {
if (!clazz.equals(VCFHeader.class))
throw new ClassCastException("Unable to cast to expected type " + clazz + " from type " + VCFHeader.class);
return header;
}
public static interface LineTransform {
public String lineTransform(String line);
}
public LineTransform getTransformer() {
return transformer;
}
public void setTransformer(LineTransform transformer) {
this.transformer = transformer;
}
public VCFHeaderVersion getVersion() {
return version;
}
public void setVersion(VCFHeaderVersion version) {
this.version = version;
}
}
public class VCFCodec extends VCF4Codec {}

View File

@ -82,7 +82,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
}
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType, VCFHeaderVersion version) {
super(lineType.toString(), "", version);
super(lineType.toString(), "");
this.name = name;
this.count = count;
this.type = type;
@ -98,7 +98,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
*
*/
protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
super(lineType.toString(), "", version);
super(lineType.toString(), "");
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
name = mapping.get("ID");
count = version == VCFHeaderVersion.VCF4_0 ?

View File

@ -34,22 +34,17 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
* @param version the vcf header version
*/
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
super("FILTER", "", version);
super("FILTER", "");
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
name = mapping.get("ID");
description = mapping.get("Description");
}
protected String toStringEncoding() {
if (mVersion == VCFHeaderVersion.VCF3_3 || mVersion == VCFHeaderVersion.VCF3_2)
return String.format("FILTER=%s,\"%s\"", name, description);
else if (mVersion == VCFHeaderVersion.VCF4_0) {
Map<String,Object> map = new LinkedHashMap<String,Object>();
map.put("ID", name);
map.put("Description", description);
return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
}
else throw new RuntimeException("Unsupported VCFVersion " + mVersion);
Map<String,Object> map = new LinkedHashMap<String,Object>();
map.put("ID", name);
map.put("Description", description);
return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
}
public boolean equals(Object o) {

View File

@ -30,9 +30,6 @@ public class VCFHeader {
// the header string indicator
public static final String HEADER_INDICATOR = "#";
// our header version
private VCFHeaderVersion version;
/** do we have genotying data? */
private boolean hasGenotypingData = false;
@ -70,7 +67,6 @@ public class VCFHeader {
List<VCFHeaderLine> toRemove = new ArrayList<VCFHeaderLine>();
for ( VCFHeaderLine line : mMetaData )
if ( VCFHeaderVersion.isFormatString(line.getKey())) {
version = VCFHeaderVersion.toHeaderVersion(line.getValue(),line.getKey());
toRemove.add(line);
}
// remove old header lines for now,
@ -98,10 +94,7 @@ public class VCFHeader {
*/
public Set<VCFHeaderLine> getMetaData() {
Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>();
if (version == null)
lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString()));
else
lines.add(new VCFHeaderLine(version.getFormatString(), version.getVersionString()));
lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
lines.addAll(mMetaData);
return lines;
}
@ -129,19 +122,6 @@ public class VCFHeader {
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
}
/**
* convert the header to a new VCF version
* @param version the version to convert to
*/
public void setVersion(VCFHeaderVersion version) {
if (version.equals(this.version))
return; // we're all set, do nothing
// store the new version, and update each of the header lines
this.version = version;
for (VCFHeaderLine line : mMetaData)
line.setVersion(version);
}
}

View File

@ -43,19 +43,6 @@ public class VCFHeaderLine implements Comparable {
private String mKey = null;
private String mValue = null;
protected VCFHeaderVersion mVersion = null;
/**
* create a VCF header line
*
* @param key the key for this header line
* @param value the value for this header line
*/
public VCFHeaderLine(String key, String value, VCFHeaderVersion version) {
mKey = key;
mValue = value;
mVersion = version;
}
/**
* create a VCF header line
@ -66,7 +53,6 @@ public class VCFHeaderLine implements Comparable {
public VCFHeaderLine(String key, String value) {
mKey = key;
mValue = value;
mVersion = VCFHeaderVersion.VCF3_3;
}
/**
@ -127,18 +113,6 @@ public class VCFHeaderLine implements Comparable {
return toString().compareTo(other.toString());
}
/**
* set the version string, which resets the current stored string representation if the version changed
* @param version
*/
public void setVersion(VCFHeaderVersion version) {
if (!version.equals(this.mVersion)) this.stringRep = null;
this.mVersion = version;
}
public VCFHeaderVersion getVersion() {
return mVersion;
}
/**
* create a string of a mapping pair for the target VCF version

View File

@ -17,7 +17,7 @@ public class VCFReaderUtils {
* package protected so that the VCFReaderUtils can access this function
*
* @param headerStrings a list of header strings
*
* @param version Header version to parse
* @return a VCF Header created from the list of stinrgs
*/
public static VCFHeader createHeader(List<String> headerStrings, VCFHeaderVersion version) {
@ -53,7 +53,7 @@ public class VCFReaderUtils {
else {
int equals = str.indexOf("=");
if ( equals != -1 )
metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1),version));
metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));
}
}
}

View File

@ -25,12 +25,11 @@ import java.util.*;
*/
public class VCF4Codec implements FeatureCodec, NameAwareCodec {
// a variant context flag for original allele strings
public static final String ORIGINAL_ALLELE_LIST = "ORIGINAL_ALLELE_LIST";
// we have to store the list of strings that make up the header until they're needed
private VCFHeader header = null;
private VCFHeaderVersion version = VCFHeaderVersion.VCF4_0;
// used to convert the index of the alternate allele in genotypes to a integer index
private static int ZERO_CHAR = (byte)'0';
@ -66,6 +65,9 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
private int lineNo = 0;
// some classes need to transform the line before
private LineTransform transformer = null;
/**
* this method is a big hack, since I haven't gotten to updating the VCF header for the 4.0 updates
* @param reader the line reader to take header lines from
@ -77,14 +79,22 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
String line = "";
try {
boolean foundHeaderVersion = false;
while ((line = reader.readLine()) != null) {
lineNo++;
if (line.startsWith("##")) {
if ( line.startsWith("##fileformat") && ! line.startsWith("##fileformat=VCFv4" ) )
throw new CodecLineParsingException("VCF4 codec can only parse VCF4 formated files. Your version line is " + line + ". If you want VCF3 parsing, use VCF as the rod type.");
String[] lineFields = line.substring(2).split("=");
if (lineFields.length == 2 &&
VCFHeaderVersion.isVersionString(lineFields[1]) && VCFHeaderVersion.isFormatString(lineFields[0])) {
foundHeaderVersion = true;
this.version = VCFHeaderVersion.toHeaderVersion(lineFields[1]);
}
headerStrings.add(line);
}
else if (line.startsWith("#")) {
if (!foundHeaderVersion) {
throw new CodecLineParsingException("We never saw a header line specifying VCF version");
}
return createHeader(headerStrings, line);
}
else {
@ -107,7 +117,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
*/
public int createHeader(List<String> headerStrings, String line) {
headerStrings.add(line);
header = VCFReaderUtils.createHeader(headerStrings, VCFHeaderVersion.VCF4_0);
header = VCFReaderUtils.createHeader(headerStrings, this.version);
// load the parsing fields
Set<VCFHeaderLine> headerLines = header.getMetaData();
@ -226,12 +236,14 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
String[] split = str.split(",");
for (String substring : split) {
VCFHeaderLineType type = infoFields.get(key);
objects.add(type != null ? type.convert(substring,VCFCompoundHeaderLine.SupportedHeaderLineType.INFO) : substring);
// objects.add(type != null ? type.convert(substring,VCFCompoundHeaderLine.SupportedHeaderLineType.INFO) : substring);
objects.add(substring);
}
value = objects;
} else {
VCFHeaderLineType type = infoFields.get(key);
value = type != null ? type.convert(str,VCFCompoundHeaderLine.SupportedHeaderLineType.INFO) : str;
//value = type != null ? type.convert(str,VCFCompoundHeaderLine.SupportedHeaderLineType.INFO) : str;
value = str;
}
//System.out.printf("%s %s%n", key, value);
} else {
@ -269,17 +281,16 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
* @param qualString the quality string
* @return return a double
*/
private static Double parseQual(String qualString) {
// todo -- remove double once we deal with annoying VCFs from 1KG
if ( qualString.equals(".") )
return VariantContext.NO_NEG_LOG_10PERROR;
else {
double q = Double.valueOf(qualString);
if ( q == -1 )
return VariantContext.NO_NEG_LOG_10PERROR;
else
return Double.valueOf(qualString) / 10;
}
private Double parseQual(String qualString) {
if (qualString.equals(VCFConstants.MISSING_VALUE_v4))
return VariantContext.NO_NEG_LOG_10PERROR;
else {
double q = Double.valueOf(qualString);
if ( q == -1 )
return VariantContext.NO_NEG_LOG_10PERROR;
else
return Double.valueOf(qualString) / 10;
}
}
/**
@ -346,7 +357,14 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
Set<String> fFields;
// a PASS is simple (no filters)
if ( filterString.equals("PASS") ) {
String passString = VCFConstants.PASSES_FILTERS_v3;
if (this.version == VCFHeaderVersion.VCF4_0)
passString = VCFConstants.PASSES_FILTERS_v4;
if ( filterString.equals(passString) ) {
return null;
}
if ( filterString.equals(VCFConstants.UNFILTERED)) {
return null;
}
// else do we have the filter string cached?
@ -373,6 +391,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
* parse out the VCF line
*
* @param parts the parts split up
* @param parseGenotypes whether to parse genotypes or not
* @return a variant context object
*/
private VariantContext parseVCFLine(String[] parts, boolean parseGenotypes) {
@ -398,7 +417,8 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
// find out our current location, and clip the alleles down to their minimum length
Pair<GenomeLoc, List<Allele>> locAndAlleles;
if ( !isSingleNucleotideEvent(alleles) ) {
attributes.put(ORIGINAL_ALLELE_LIST,alleles);
if (this.version != VCFHeaderVersion.VCF4_0)
throw new VCFParserException("Saw Indel/non SNP event on a VCF 3.3 or earlier file. Please convert file to VCF4.0 with VCFTools before using the GATK on it");
locAndAlleles = clipAlleles(contig, pos, ref, alleles);
} else {
locAndAlleles = new Pair<GenomeLoc, List<Allele>>(GenomeLocParser.createGenomeLoc(contig, pos), alleles);
@ -469,8 +489,14 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
if (nGTKeys >= 1) {
gtAttributes = new HashMap<String, String>(nGTKeys - 1);
for (int i = 0; i < nGTKeys; i++) {
if (i >= GTValueSplitSize)
gtAttributes.put(genotypeKeyArray[i],".");
if (i >= GTValueSplitSize) {
if (genotypeKeyArray[i].equals("GQ"))
GTQual = parseQual(VCFConstants.MISSING_VALUE_v4);
else if (genotypeKeyArray[i].equals("FT")) // deal with genotype filters here
genotypeFilters = parseFilters(VCFConstants.MISSING_VALUE_v4);
else
gtAttributes.put(genotypeKeyArray[i],VCFConstants.MISSING_VALUE_v4);
}
else if (genotypeKeyArray[i].equals("GT"))
if (i != 0)
throw new VCFParserException("Saw GT at position " + i + ", it must be at the first position for genotypes. At location = " + locAndAlleles.first);
@ -480,9 +506,11 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
GTQual = parseQual(GTValueArray[i]);
else if (genotypeKeyArray[i].equals("FT")) // deal with genotype filters here
genotypeFilters = parseFilters(GTValueArray[i]);
else
else {
if (this.version != VCFHeaderVersion.VCF4_0 && GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3))
GTValueArray[i] = VCFConstants.MISSING_VALUE_v4;
gtAttributes.put(genotypeKeyArray[i], GTValueArray[i]);
}
}
// validate the format fields
validateFields(gtAttributes.keySet(), new ArrayList(formatFields.keySet()));
@ -517,7 +545,22 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
static Pair<GenomeLoc,List<Allele>> clipAlleles(String contig, long position, String ref, List<Allele> unclippedAlleles) {
List<Allele> newAlleleList = new ArrayList<Allele>();
// find the preceeding string common to all alleles and the reference
/* //+
boolean clipping = true;
int forwardClipping = 0;
while(clipping) {
for (Allele a : unclippedAlleles) {
if (a.length()-forwardClipping == 0)
clipping = false;
else if (a.getBases()[forwardClipping] != ref.getBytes()[forwardClipping])
clipping = false;
if (clipping) forwardClipping++;
}
}
//-
*/ // find the preceeding string common to all alleles and the reference
boolean clipping = true;
for (Allele a : unclippedAlleles)
if (a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0])) {
@ -576,6 +619,19 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
return name;
}
public static interface LineTransform {
public String lineTransform(String line);
}
public LineTransform getTransformer() {
return transformer;
}
public void setTransformer(LineTransform transformer) {
this.transformer = transformer;
}
/**
* set the name of this codec
* @param name
@ -583,4 +639,6 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
public void setName(String name) {
this.name = name;
}
}

View File

@ -62,7 +62,7 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
protected static String line = null;
final TreeSet<String> samples = new TreeSet<String>();
VCF4Codec vcf4codec = new VCF4Codec();
VCFCodec vcf4codec = new VCFCodec();
/**
@ -86,32 +86,25 @@ public class VCF4WriterTestWalker extends RodWalker<Integer, Integer> {
for( final ReferenceOrderedDataSource source : dataSources ) {
final RMDTrack rod = source.getReferenceOrderedData();
if(rod.getName().equalsIgnoreCase(INPUT_ROD_NAME)) {
if (rod.getType().equals(vcf4codec.getClass())) {
try {
AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath()));
int lineNumber = vcf4codec.readHeader(lineReader);
out.printf("Read %d header lines%n", lineNumber);
try {
AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath()));
int lineNumber = vcf4codec.readHeader(lineReader);
out.printf("Read %d header lines%n", lineNumber);
header = vcf4codec.getHeader(VCFHeader.class);
}
catch (FileNotFoundException e ) {
throw new StingException(e.getMessage());
}
} else {
final VCFReader reader = new VCFReader(rod.getFile());
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
samples.addAll(vcfSamples);
reader.close();
header = new VCFHeader(hInfo, samples);
header = vcf4codec.getHeader(VCFHeader.class);
}
catch (FileNotFoundException e ) {
throw new StingException(e.getMessage());
}
final Set<String> vcfSamples = header.getGenotypeSamples();
samples.addAll(vcfSamples);
}
}
if ( header != null )
header.setVersion(VCFHeaderVersion.VCF4_0);
vcfWriter.writeHeader(header);
}

View File

@ -86,10 +86,10 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
if ( vc_eval == null || vc_eval.isFiltered() )
return 0;
if (vc_eval.getType() != VariantContext.Type.SNP)
if (!vc_eval.isSNP())
return 0;
if (vc_eval.getAlleles().size()!= 2)
if (!vc_eval.isBiallelic())
return 0;
// output marker ID to Beagle input file

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -61,7 +62,7 @@ public class VCFUtils {
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getRecordType().equals(VCFRecord.class) ) {
if ( rod.getRecordType().equals(VariantContext.class) ) {
fields.addAll(rod.getHeader(VCFHeader.class).getMetaData());
}
}
@ -139,7 +140,7 @@ public class VCFUtils {
if ( map.containsKey(key) ) {
VCFHeaderLine other = map.get(key);
if ( line.equals(other) || line.getVersion() != VCFHeaderVersion.VCF4_0 ) // todo -- remove me when everything is 4
if ( line.equals(other) )
continue;
else if ( ! line.getClass().equals(other.getClass()) )
throw new IllegalStateException("Incompatible header types: " + line + " " + other );
@ -173,7 +174,6 @@ public class VCFUtils {
if ( logger != null ) logger.warn(String.format("Ignoring header line already in map: this header line = " + line + " already present header = " + other));
}
} else {
line.setVersion(VCFHeaderVersion.VCF4_0); // todo -- remove this when we finally have vcf3/4 unified headers
map.put(key, line);
//System.out.printf("Adding header line %s%n", line);
}

View File

@ -201,9 +201,6 @@ public class VCFWriter {
String paddingBases = "";
String trailingBases = "";
ArrayList<Allele> originalAlleles = (ArrayList)vc.getAttribute("ORIGINAL_ALLELE_LIST");
// search for reference allele and find trailing and padding at the end.
// See first if all alleles have a base encoding (ie no deletions)
// if so, add one common base to all alleles (reference at this location)
@ -217,50 +214,12 @@ public class VCFWriter {
refString = new String(a.getBases());
}
if (originalAlleles != null) {
// if original allele info was filled when reading from a VCF4 file,
// determine whether there was a padding base(s) at the beginning and end.
byte previousBase = 0;
int cnt=0;
boolean firstBaseCommonInAllAlleles = true;
Allele originalReferenceAllele = null;
for (Allele originalAllele : originalAlleles){
// if first base of allele is common to all of them, there may have been a common base deleted from all
byte firstBase = originalAllele.getBases()[0];
if (cnt > 0) {
if (firstBase != previousBase)
firstBaseCommonInAllAlleles = false;
}
previousBase = firstBase;
cnt++;
if (originalAllele.isReference())
originalReferenceAllele = originalAllele;
}
numTrailingBases = (firstBaseCommonInAllAlleles)? 1:0;
position -= numTrailingBases;
if (originalReferenceAllele == null)
throw new IllegalStateException("At least one Allele must be reference");
String originalRef = new String(originalReferenceAllele.getBases());
numPaddingBases = originalRef.length()-refString.length()-numTrailingBases;
if (numTrailingBases > 0) {
trailingBases = originalRef.substring(0,numTrailingBases);
}
if (numPaddingBases > 0)
paddingBases = originalRef.substring(originalRef.length()-numPaddingBases,originalRef.length());
}
else {
// Case where there is no original allele info, e.g. when reading from VCF3.3 or when vc was produced by the GATK.
if (!hasBasesInAllAlleles) {
trailingBases = new String(refBases);
numTrailingBases = 1;
position--;
}
// Case where there is no original allele info, e.g. when reading from VCF3.3 or when vc was produced by the GATK.
if (!hasBasesInAllAlleles) {
trailingBases = new String(refBases);
numTrailingBases = 1;
position--;
}
@ -295,6 +254,7 @@ public class VCFWriter {
// this needs to be done in case all samples are no-calls
vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
}
String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys);
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>(vc.getGenotypes().size());
@ -378,10 +338,6 @@ public class VCFWriter {
if ( key.equals("ID") )
continue;
// Original alleles are not for reporting but only for internal bookkeeping
if (key.equals("ORIGINAL_ALLELE_LIST"))
continue;
String outputValue = formatVCFField(key, elt.getValue());
if ( outputValue != null )
infoFields.put(key, outputValue);
@ -587,14 +543,21 @@ public class VCFWriter {
Set<String> keys = new HashSet<String>();
boolean sawGoodQual = false;
boolean sawGenotypeFilter = false;
for ( Genotype g : vc.getGenotypes().values() ) {
keys.addAll(g.getAttributes().keySet());
if ( g.hasNegLog10PError() )
sawGoodQual = true;
if (g.isFiltered() && g.isCalled())
sawGenotypeFilter = true;
}
if ( sawGoodQual )
keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
if (sawGenotypeFilter)
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
return Utils.sorted(new ArrayList<String>(keys));
}

View File

@ -205,20 +205,20 @@ public class VCF4UnitTest extends BaseTest {
String clippedAlleleLine = "20\t14370\trs6054257\tGGG\tG\t29\tPASS\tNS=3;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|1:48:1:51,51\t0|0:48:1:51,51\t0|0:48:1:51,51";
@Test
public void testClippedAllelesAddedAsAnnotation() {
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
// TODO - either modify or kill this test
/* TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
VariantContext context = (VariantContext)testSetup.codec.decode(clippedAlleleLine);
Assert.assertTrue(context.hasAttribute(VCF4Codec.ORIGINAL_ALLELE_LIST));
List<Allele> alleles = (List<Allele>)context.getAttribute(VCF4Codec.ORIGINAL_ALLELE_LIST);
Assert.assertEquals("Expected allele list of 2, got " + alleles.size(),2,alleles.size());
Assert.assertTrue(alleles.get(0).basesMatch("GGG"));
Assert.assertTrue(alleles.get(1).basesMatch("G"));
}
*/ }
// test that when we don't clip the alleles, we don't see the annotation
@Test
public void testNoClippedAllelesNoAddedAsAnnotation() {
TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
VariantContext context = (VariantContext)testSetup.codec.decode(twoFewInfoLine);
Assert.assertTrue(!context.hasAttribute(VCF4Codec.ORIGINAL_ALLELE_LIST));
}
// test that we're getting the right genotype for a multi-base polymorphism

View File

@ -70,11 +70,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingVCFInput() {
List<String> md5 = new ArrayList<String>();
md5.add("b423141ca600d581dc73e9b3dff4f782");
md5.add("919eb499bfcc980a14825a0265e575e3");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +
" -B variant,VCF," + validationDataLocation + "complexExample.vcf" +
" -B variant,VCF," + validationDataLocation + "complexExample.vcf4" +
" -T VariantsToVCF" +
" -o %s",
1, // just one output file

View File

@ -16,9 +16,9 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
executeTest("testFastaReference", spec1);
WalkerTestSpec spec2 = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B indels,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s",
"-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B indels,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s",
1,
Arrays.asList("3a48986c3832a768b478c3e95f994b0f"));
Arrays.asList("a7bfc673eaa202a668c1424a933671ad"));
executeTest("testFastaAlternateReferenceIndels", spec2);
WalkerTestSpec spec4 = new WalkerTestSpec(

View File

@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testClusteredSnps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -window 10 -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("fa522e5b8c89c5a465ca03dd88bc2b8f"));
Arrays.asList("2385975931cd06fca452655bebf5c379"));
executeTest("test clustered SNPs", spec);
}
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testMask() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -mask foo -B mask,VCF," + validationDataLocation + "vcfexample2.vcf -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4d8a184417289c325410a5e545d9fc80"));
Arrays.asList("6743efa09985206819adcf8eaf5ff936"));
executeTest("test mask", spec);
}
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("d48062f139d853a61e4f0bfb76bd695d"));
Arrays.asList("e054f57d3794ce8a57cae92f16886cf0"));
executeTest("test filter #1", spec);
}
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("791f89c6da7bdde79913754d9ddb72eb"));
Arrays.asList("db09a1f7ff523087ea0f6a56f3febfe7"));
executeTest("test filter #2", spec);
}
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilterWithSeparateNames() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("174caf880477f3edf68fe6722f227017"));
Arrays.asList("edbd505d8d55b4ba71f99d7006871db0"));
executeTest("test filter with separate names #2", spec);
}
}

View File

@ -108,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("9ebe61dcb5112e7e745412d7767d101a"));
Arrays.asList("e3f402fbbb6bbb4f60b1aa0549989d85"));
executeTest("testConfidence2", spec2);
}

View File

@ -29,9 +29,9 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
@Test
public void testRealignerKnownsOnly() {
String[] md5s = {"7084d4e543bc756730ab306768028530", "1091436c40d5ba557d85662999cc0c1d"};
String[] md5s = {"83bc0c9a7d8872b552c6cbd994672c3b", "92bf331b672a03d63c26e4d9d3615f5b"};
WalkerTestSpec spec = new WalkerTestSpec(
"-T IndelRealigner -noPG -LOD 1.0 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10076000 -compress 1 -targetIntervals " + validationDataLocation + "NA12878.indels.intervals -B knownIndels,VCF," + validationDataLocation + "NA12878.indels.vcf -O %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe -knownsOnly",
"-T IndelRealigner -noPG -LOD 1.0 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10076000 -compress 1 -targetIntervals " + validationDataLocation + "NA12878.indels.intervals -B knownIndels,VCF," + validationDataLocation + "NA12878.indels.vcf4 -O %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe -knownsOnly",
2,
Arrays.asList(md5s));
executeTest("test realigner known indels only", spec);

View File

@ -8,10 +8,10 @@ import java.util.Arrays;
public class PickSequenomProbesIntegrationTest extends WalkerTest {
@Test
public void testProbes() {
String testVCF = validationDataLocation + "complexExample.vcf";
String testVCF = validationDataLocation + "complexExample.vcf4";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B input,VCF,"+testVCF+" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("0f356354a4a78ff62b2848431ec11262"));
Arrays.asList("46d2acea95d36725db63af61ee963ce6"));
executeTest("Test probes", spec);
}

View File

@ -11,7 +11,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest {
String testPedFile = validationDataLocation + "Sequenom_Test_File.txt";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B sequenom,Plink,"+testPedFile+" -o %s";
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("2e273d400b4b69e39c34e465b200b192"));
Arrays.asList("2dab4630f40b76c0762de83fcbb60d09"));
executeTest("Test SNPs", spec);
}
@ -20,7 +20,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest {
String testPedFile = validationDataLocation + "pilot2_indel_validation.renamed.ped";
String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B sequenom,Plink,"+testPedFile+" -o %s";
WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1,
Arrays.asList("e15a63fc49ec25ebcae60a28a5f3f830"));
Arrays.asList("845b9a15ac947052ddded5b79228e5ec"));
executeTest("Test Indels", spec);
}
}

View File

@ -37,7 +37,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testVariantRecalibrator() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "1f7adb28007d77e65c02112480f56663" );
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "a1acb90f0695cbe33c290403113ac3e1" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey();

View File

@ -35,9 +35,9 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
public void testComplexVCFPileup() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString1KG() + " -B eval,VCF," + validationDataLocation + "MultiSample.vcf" +
" -B eval2,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf"
" -B eval2,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4"
, 1,
Arrays.asList("c775c995c9fc09c66db51a694511d07b"));
Arrays.asList("216b5de6f58be6cf3286ed5261772904"));
executeTest("testComplexVCFPileup", spec);
}

View File

@ -41,7 +41,7 @@ public class VariantSelectIntegrationTest extends WalkerTest {
public void testVCFSelect1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'AF == 0.50' -L 1:10001290-10048590 ", 1,
Arrays.asList("8b358e0cfa35de022a37360a6f28a839"));
Arrays.asList("8fd97a99174483920f84aaf54eeb9dd9"));
executeTest("testVCFSelect1", spec);
}
@ -49,7 +49,7 @@ public class VariantSelectIntegrationTest extends WalkerTest {
public void testVCFSelect2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -L 1:10001290-10048590 ", 1,
Arrays.asList("8e991b9d6d610c8f89c8557756fc8e34"));
Arrays.asList("ecb61d798ff5e0b003d2986500bf4462"));
executeTest("testVCFSelect2", spec);
}
@ -57,7 +57,7 @@ public class VariantSelectIntegrationTest extends WalkerTest {
public void testVCFSelectOr() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -match 'AF == 0.50' -L 1:10001290-10048590 ", 1,
Arrays.asList("7bd064c8d8bf5389fcd0b78a7c32b599"));
Arrays.asList("7c23412a0abe28057a78cd532b752019"));
executeTest("testVCFSelectOr", spec);
}
@ -65,7 +65,7 @@ public class VariantSelectIntegrationTest extends WalkerTest {
public void testVCFSelectAnd() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6 && AF == 0.50' -L 1:10001290-10048590 ", 1,
Arrays.asList("5af565836fa926feaa130715b93188bc"));
Arrays.asList("71cc18348a9d5de7270613bedb79880a"));
executeTest("testVCFSelectAnd", spec);
}
}

View File

@ -1,8 +1,13 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.junit.Assert;
@ -10,7 +15,9 @@ import org.junit.Test;
import org.junit.BeforeClass;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
import net.sf.picard.reference.IndexedFastaSequenceFile;
@ -34,30 +41,47 @@ public class VCFWriterUnitTest extends BaseTest {
GenomeLocParser.setupRefContigOrdering(seq);
}
@Test
public void emptyTest() {
}
/** test, using the writer and reader, that we can output and input a VCF file without problems */
//@Test
@Test
public void testBasicWriteAndRead() {
VCFHeader header = createFakeHeader(metaData,additionalColumns);
VCFWriter writer = new VCFWriter(fakeVCFFile);
writer.writeHeader(header);
writer.addRecord(createVCFRecord(header));
writer.addRecord(createVCFRecord(header));
writer.add(createVC(header),"A".getBytes());
writer.add(createVC(header),"A".getBytes());
writer.close();
VCFReader reader = new VCFReader(fakeVCFFile);
int counter = 0;
// validate what we're reading in
validateHeader(reader.getHeader());
for (VCFRecord rec : reader) {
counter++;
VCFCodec reader = new VCFCodec();
AsciiLineReader lineReader;
try {
lineReader = new AsciiLineReader(new FileInputStream(fakeVCFFile));
int lineNumber = reader.readHeader(lineReader);
}
Assert.assertEquals(2,counter);
reader.close();
new File(fakeVCFFile + TribbleRMDTrackBuilder.linearIndexExtension).delete();
//fakeVCFFile.delete();
catch (FileNotFoundException e ) {
throw new StingException(e.getMessage());
}
int counter = 0;
// validate what we're reading in
validateHeader(reader.getHeader(VCFHeader.class));
try {
while(true) {
String line = lineReader.readLine();
if (line == null)
break;
VariantContext vc = (VariantContext)reader.decode(line);
counter++;
}
Assert.assertEquals(2,counter);
new File(fakeVCFFile + TribbleRMDTrackBuilder.linearIndexExtension).delete();
fakeVCFFile.delete();
}
catch (IOException e ) {
throw new StingException(e.getMessage());
}
}
/**
@ -67,8 +91,8 @@ public class VCFWriterUnitTest extends BaseTest {
* @return a fake VCF header
*/
public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString(),VCFHeaderVersion.VCF3_3));
metaData.add(new VCFHeaderLine("two", "2",VCFHeaderVersion.VCF3_3));
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
metaData.add(new VCFHeaderLine("two", "2"));
additionalColumns.add("FORMAT");
additionalColumns.add("extra1");
additionalColumns.add("extra2");
@ -80,23 +104,29 @@ public class VCFWriterUnitTest extends BaseTest {
* @param header the VCF header
* @return a VCFRecord
*/
private VCFRecord createVCFRecord(VCFHeader header) {
List<VCFGenotypeEncoding> altBases = new ArrayList<VCFGenotypeEncoding>();
altBases.add(new VCFGenotypeEncoding("C"));
altBases.add(new VCFGenotypeEncoding("D1"));
Map<String,String> infoFields = new HashMap<String,String>();
infoFields.put("DP","50");
private VariantContext createVC(VCFHeader header) {
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1",1);
List<Allele> alleles = new ArrayList<Allele>();
Set<String> filters = null;
Map<String, String> attributes = new HashMap<String,String>();
Map<String, Genotype> genotypes = new HashMap<String,Genotype>();
alleles.add(Allele.create("A",true));
alleles.add(Allele.create("CC",false));
alleles.add(Allele.create("-",false));
attributes.put("DP","50");
for (String name : header.getGenotypeSamples()) {
List<VCFGenotypeEncoding> myAlleles = new ArrayList<VCFGenotypeEncoding>();
myAlleles.add(new VCFGenotypeEncoding("C"));
myAlleles.add(new VCFGenotypeEncoding("D1"));
VCFGenotypeRecord rec = new VCFGenotypeRecord(name, myAlleles, VCFGenotypeRecord.PHASE.PHASED);
rec.setField("bb", "0");
gt.add(rec);
Map<String, String> gtattributes = new HashMap<String,String>();
gtattributes.put("BB","1");
Genotype gt = new Genotype(name,alleles.subList(1,2),0,null,gtattributes,true);
genotypes.put(name,gt);
}
return new VCFRecord("A","chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt);
return new VariantContext("RANDOM",loc, alleles, genotypes, 0, filters, attributes);
}