another iteration of the VCFReader and VCFRecord, introducing the VCFWriter
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1429 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1e8b97b560
commit
63d90702d6
|
|
@ -1,15 +1,8 @@
|
||||||
package org.broadinstitute.sting.utils.genotype.vcf;
|
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.LinkedHashSet;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -30,7 +23,7 @@ public class VCFHeader {
|
||||||
private Set<HEADER_FIELDS> mHeaderFields = new LinkedHashSet<HEADER_FIELDS>();
|
private Set<HEADER_FIELDS> mHeaderFields = new LinkedHashSet<HEADER_FIELDS>();
|
||||||
|
|
||||||
// the associated meta data
|
// the associated meta data
|
||||||
private final List<Pair<String, String>> mMetaData = new ArrayList<Pair<String, String>>();
|
private final Map<String, String> mMetaData = new HashMap<String, String>();
|
||||||
|
|
||||||
// the list of auxillary tags
|
// the list of auxillary tags
|
||||||
private final List<String> auxillaryTags = new ArrayList<String>();
|
private final List<String> auxillaryTags = new ArrayList<String>();
|
||||||
|
|
@ -41,62 +34,25 @@ public class VCFHeader {
|
||||||
// the header string indicator
|
// the header string indicator
|
||||||
public static final String HEADER_INDICATOR = "#";
|
public static final String HEADER_INDICATOR = "#";
|
||||||
|
|
||||||
/**
|
/** our log, which we use to capture anything from this class */
|
||||||
* our log, which we want to capture anything from this class
|
|
||||||
*/
|
|
||||||
private static Logger logger = Logger.getLogger(VCFHeader.class);
|
private static Logger logger = Logger.getLogger(VCFHeader.class);
|
||||||
|
|
||||||
// patterns we use for detecting meta data and header lines
|
|
||||||
private static Pattern pMeta = Pattern.compile("^" + METADATA_INDICATOR + "\\s*(\\S+)\\s*=\\s*(\\S+)\\s*$");
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a VCF header, given an array of strings that all start with at least the # character
|
* create a VCF header, given a list of meta data and auxillary tags
|
||||||
*
|
*
|
||||||
* @param headerStrings a list of header strings
|
* @param metaData
|
||||||
|
* @param additionalColumns
|
||||||
*/
|
*/
|
||||||
public VCFHeader(List<String> headerStrings) {
|
public VCFHeader(Set<HEADER_FIELDS> headerFields, Map<String, String> metaData, List<String> additionalColumns) {
|
||||||
try {
|
for (HEADER_FIELDS field : headerFields) mHeaderFields.add(field);
|
||||||
Thread.sleep(5000);
|
for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key));
|
||||||
} catch (InterruptedException e) {
|
for (String col : additionalColumns) auxillaryTags.add(col);
|
||||||
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
|
|
||||||
}
|
|
||||||
|
|
||||||
// iterate over all the passed in strings
|
|
||||||
for (String str : headerStrings) {
|
|
||||||
Matcher matcher = pMeta.matcher(str);
|
|
||||||
if (matcher.matches()) {
|
|
||||||
String metaKey = "";
|
|
||||||
String metaValue = "";
|
|
||||||
if (matcher.groupCount() < 1) continue;
|
|
||||||
if (matcher.groupCount() == 2) metaValue = matcher.group(2);
|
|
||||||
metaKey = matcher.group(1);
|
|
||||||
mMetaData.add(new Pair<String, String>(metaKey, metaValue));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// iterate over all the passed in strings
|
|
||||||
for (String str : headerStrings) {
|
|
||||||
if (str.startsWith("#") && !str.startsWith("##")) {
|
|
||||||
String[] strings = str.substring(1).split("\\s+");
|
|
||||||
for (String s : strings) {
|
|
||||||
if (mHeaderFields.contains(s)) throw new StingException("Header field duplication is not allowed");
|
|
||||||
try {
|
|
||||||
mHeaderFields.add(HEADER_FIELDS.valueOf(s));
|
|
||||||
} catch (IllegalArgumentException e) {
|
|
||||||
this.auxillaryTags.add(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (mHeaderFields.size() != HEADER_FIELDS.values().length) {
|
|
||||||
throw new StingException("The VCF header is missing " + (HEADER_FIELDS.values().length - mHeaderFields.size()) + " required fields");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the header fieldsm in order they're presented in the input file
|
* get the header fields in order they're presented in the input file
|
||||||
* @return
|
*
|
||||||
|
* @return a set of the header fields, in order
|
||||||
*/
|
*/
|
||||||
public Set<HEADER_FIELDS> getHeaderFields() {
|
public Set<HEADER_FIELDS> getHeaderFields() {
|
||||||
return mHeaderFields;
|
return mHeaderFields;
|
||||||
|
|
@ -104,15 +60,17 @@ public class VCFHeader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the meta data, associated with this header
|
* get the meta data, associated with this header
|
||||||
* @return
|
*
|
||||||
|
* @return a map of the meta data
|
||||||
*/
|
*/
|
||||||
public List<Pair<String, String>> getMetaData() {
|
public Map<String, String> getMetaData() {
|
||||||
return mMetaData;
|
return mMetaData;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the auxillary tags
|
* get the auxillary tags
|
||||||
* @return
|
*
|
||||||
|
* @return a list of the extra column names, in order
|
||||||
*/
|
*/
|
||||||
public List<String> getAuxillaryTags() {
|
public List<String> getAuxillaryTags() {
|
||||||
return auxillaryTags;
|
return auxillaryTags;
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,16 @@
|
||||||
package org.broadinstitute.sting.utils.genotype.vcf;
|
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/** The VCFReader class, which given a valid vcf file, parses out the header and VCF records */
|
||||||
* The VCFReader class, which given a valid vcf file, parses out the header and VCF records
|
public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
||||||
*/
|
|
||||||
public class VCFReader implements Iterator<VCFRecord> {
|
|
||||||
|
|
||||||
// our VCF header
|
// our VCF header
|
||||||
private VCFHeader mHeader;
|
private VCFHeader mHeader;
|
||||||
|
|
@ -23,10 +21,13 @@ public class VCFReader implements Iterator<VCFRecord> {
|
||||||
// our next record
|
// our next record
|
||||||
private VCFRecord mNextRecord = null;
|
private VCFRecord mNextRecord = null;
|
||||||
|
|
||||||
|
// a pattern we use for detecting meta data and header lines
|
||||||
|
private static Pattern pMeta = Pattern.compile("^" + VCFHeader.METADATA_INDICATOR + "\\s*(\\S+)\\s*=\\s*(\\S+)\\s*$");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a VCF reader, given a VCF file
|
* Create a VCF reader, given a VCF file
|
||||||
*
|
*
|
||||||
* @param vcfFile
|
* @param vcfFile the vcf file to write
|
||||||
*/
|
*/
|
||||||
public VCFReader(File vcfFile) {
|
public VCFReader(File vcfFile) {
|
||||||
Charset utf8 = Charset.forName("UTF-8");
|
Charset utf8 = Charset.forName("UTF-8");
|
||||||
|
|
@ -47,7 +48,7 @@ public class VCFReader implements Iterator<VCFRecord> {
|
||||||
lines.add(line);
|
lines.add(line);
|
||||||
line = mReader.readLine();
|
line = mReader.readLine();
|
||||||
}
|
}
|
||||||
mHeader = new VCFHeader(lines);
|
mHeader = this.createHeader(lines);
|
||||||
mNextRecord = new VCFRecord(mHeader, line);
|
mNextRecord = new VCFRecord(mHeader, line);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new StingException("Failed to parse VCF File on line: " + line, e);
|
throw new StingException("Failed to parse VCF File on line: " + line, e);
|
||||||
|
|
@ -55,16 +56,14 @@ public class VCFReader implements Iterator<VCFRecord> {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** @return true if we have another VCF record to return */
|
||||||
*
|
|
||||||
* @return true if we have another VCF record to return
|
|
||||||
*/
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return (mNextRecord != null);
|
return (mNextRecord != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return the next available VCF record. Make sure to check availability with a call to hasNext!
|
* return the next available VCF record. Make sure to check availability with a call to hasNext!
|
||||||
|
*
|
||||||
* @return a VCFRecord, representing the next record in the file
|
* @return a VCFRecord, representing the next record in the file
|
||||||
*/
|
*/
|
||||||
public VCFRecord next() {
|
public VCFRecord next() {
|
||||||
|
|
@ -79,10 +78,74 @@ public class VCFReader implements Iterator<VCFRecord> {
|
||||||
return rec;
|
return rec;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Remove is not supported */
|
||||||
* Remove is not supported
|
|
||||||
*/
|
|
||||||
public void remove() {
|
public void remove() {
|
||||||
throw new UnsupportedOperationException("Unsupported operation");
|
throw new UnsupportedOperationException("Unsupported operation");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a VCF header, given an array of strings that all start with at least the # character. This function is
|
||||||
|
* package protected so that the VCFReader can access this function
|
||||||
|
*
|
||||||
|
* @param headerStrings a list of header strings
|
||||||
|
*/
|
||||||
|
protected VCFHeader createHeader(List<String> headerStrings) {
|
||||||
|
|
||||||
|
Map<String,String> metaData = new HashMap<String,String>();
|
||||||
|
Set<VCFHeader.HEADER_FIELDS> headerFields = new LinkedHashSet<VCFHeader.HEADER_FIELDS>();
|
||||||
|
List<String> auxTags = new ArrayList<String>();
|
||||||
|
// iterate over all the passed in strings
|
||||||
|
for (String str : headerStrings) {
|
||||||
|
Matcher matcher = pMeta.matcher(str);
|
||||||
|
if (matcher.matches()) {
|
||||||
|
String metaKey = "";
|
||||||
|
String metaValue = "";
|
||||||
|
if (matcher.groupCount() < 1) continue;
|
||||||
|
if (matcher.groupCount() == 2) metaValue = matcher.group(2);
|
||||||
|
metaKey = matcher.group(1);
|
||||||
|
metaData.put(metaKey, metaValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate over all the passed in strings
|
||||||
|
for (String str : headerStrings) {
|
||||||
|
if (str.startsWith("#") && !str.startsWith("##")) {
|
||||||
|
String[] strings = str.substring(1).split("\\s+");
|
||||||
|
for (String s : strings) {
|
||||||
|
if (headerFields.contains(s)) throw new StingException("Header field duplication is not allowed");
|
||||||
|
try {
|
||||||
|
headerFields.add(VCFHeader.HEADER_FIELDS.valueOf(s));
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
auxTags.add(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (headerFields.size() != VCFHeader.HEADER_FIELDS.values().length) {
|
||||||
|
throw new StingException("The VCF header is missing " + (VCFHeader.HEADER_FIELDS.values().length - headerFields.size()) + " required fields");
|
||||||
|
}
|
||||||
|
return new VCFHeader(headerFields,metaData,auxTags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return get the header associated with this reader
|
||||||
|
*/
|
||||||
|
public VCFHeader getHeader() {
|
||||||
|
return this.mHeader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<VCFRecord> iterator() {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
try {
|
||||||
|
mReader.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// we don't really care
|
||||||
|
Utils.warnUser("Unable to close VCF reader file, this is not fatal, but is worth noting");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,8 @@ package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the basic VCF record type
|
* the basic VCF record type
|
||||||
|
|
@ -17,12 +17,14 @@ public class VCFRecord {
|
||||||
private Map<String, String> mAuxValues = new HashMap<String, String>();
|
private Map<String, String> mAuxValues = new HashMap<String, String>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a VCFRecord, given a VCF header and the the values in this field
|
* create a VCFRecord, given a VCF header and the the values in this field. THis is protected, so that the reader is
|
||||||
|
* the only accessing object
|
||||||
|
* TODO: this seems like a bad design
|
||||||
*
|
*
|
||||||
* @param header the VCF header
|
* @param header the VCF header
|
||||||
* @param line the line to parse into individual fields
|
* @param line the line to parse into individual fields
|
||||||
*/
|
*/
|
||||||
public VCFRecord(VCFHeader header, String line) {
|
protected VCFRecord(VCFHeader header, String line) {
|
||||||
String tokens[] = line.split("\\s+");
|
String tokens[] = line.split("\\s+");
|
||||||
if (tokens.length != (header.getAuxillaryTags().size() + header.getHeaderFields().size())) {
|
if (tokens.length != (header.getAuxillaryTags().size() + header.getHeaderFields().size())) {
|
||||||
throw new StingException("Line:" + line + " didn't parse into " + (header.getAuxillaryTags().size() + header.getHeaderFields().size()) + " fields");
|
throw new StingException("Line:" + line + " didn't parse into " + (header.getAuxillaryTags().size() + header.getHeaderFields().size()) + " fields");
|
||||||
|
|
@ -39,6 +41,22 @@ public class VCFRecord {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public VCFRecord(VCFHeader header, List<String> values) {
|
||||||
|
if (values.size() != (header.getAuxillaryTags().size() + header.getHeaderFields().size())) {
|
||||||
|
throw new StingException("The input list doesn't contain enough fields, it should have " + (header.getAuxillaryTags().size() + header.getHeaderFields().size()) + " fields");
|
||||||
|
}
|
||||||
|
int index = 0;
|
||||||
|
for (VCFHeader.HEADER_FIELDS field: header.getHeaderFields()) {
|
||||||
|
mValues.put(field,values.get(index));
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
for (String str: header.getAuxillaryTags()) {
|
||||||
|
mAuxValues.put(str,values.get(index));
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* lookup a value, given it's column name
|
* lookup a value, given it's column name
|
||||||
*
|
*
|
||||||
|
|
@ -150,4 +168,12 @@ public class VCFRecord {
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the number of columnsof data we're storing
|
||||||
|
*/
|
||||||
|
public int getColumnCount() {
|
||||||
|
return this.mAuxValues.size() + this.mValues.size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,91 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
|
||||||
|
/** this class writers VCF files */
|
||||||
|
public class VCFWriter {
|
||||||
|
|
||||||
|
// the VCF header we're storing
|
||||||
|
private VCFHeader mHeader;
|
||||||
|
|
||||||
|
// the print stream we're writting to
|
||||||
|
BufferedWriter mWriter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a VCF writer, given a VCF header and a file to write to
|
||||||
|
*
|
||||||
|
* @param header the VCF header
|
||||||
|
* @param location the file location to write to
|
||||||
|
*/
|
||||||
|
public VCFWriter(VCFHeader header, File location) {
|
||||||
|
this.mHeader = header;
|
||||||
|
Charset utf8 = Charset.forName("UTF-8");
|
||||||
|
try {
|
||||||
|
mWriter = new BufferedWriter(
|
||||||
|
new OutputStreamWriter(
|
||||||
|
new FileOutputStream(location),
|
||||||
|
utf8));
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new StingException("Unable to create VCF file: " + location, e);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
|
||||||
|
// write the header meta-data out
|
||||||
|
for (String metadata : header.getMetaData().keySet()) {
|
||||||
|
mWriter.write(VCFHeader.METADATA_INDICATOR + metadata + "=" + header.getMetaData().get(metadata) + "\n");
|
||||||
|
}
|
||||||
|
// write out the column line
|
||||||
|
StringBuilder b = new StringBuilder();
|
||||||
|
b.append(VCFHeader.HEADER_INDICATOR);
|
||||||
|
for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) b.append(field + "\t");
|
||||||
|
for (String field : header.getAuxillaryTags()) b.append(field + "\t");
|
||||||
|
mWriter.write(b.toString() + "\n");
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new StingException("IOException writing the VCF header", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* output a record to the VCF file
|
||||||
|
* @param record the record to output
|
||||||
|
*/
|
||||||
|
public void addRecord(VCFRecord record) {
|
||||||
|
if (record.getColumnCount() != mHeader.getAuxillaryTags().size() + mHeader.getHeaderFields().size()) {
|
||||||
|
throw new StingException("Record has " + record.getColumnCount() +
|
||||||
|
" columns, when is should have " + (mHeader.getAuxillaryTags().size() +
|
||||||
|
mHeader.getHeaderFields().size()));
|
||||||
|
}
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
// first output the required fields in order
|
||||||
|
boolean first = true;
|
||||||
|
for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields()) {
|
||||||
|
if (first) { first = false; builder.append(record.getValue(field)); }
|
||||||
|
else builder.append("\t" + record.getValue(field));
|
||||||
|
}
|
||||||
|
for (String auxTag : mHeader.getAuxillaryTags()) {
|
||||||
|
builder.append("\t" + record.getValue(auxTag));
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
mWriter.write(builder.toString() + "\n");
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new StingException("Unable to write the VCF object to a file");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* attempt to close the VCF file
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
try {
|
||||||
|
mWriter.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new StingException("Unable to close VCFFile");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,13 +1,10 @@
|
||||||
package org.broadinstitute.sting.utils.genotype.vcf;
|
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.*;
|
import java.util.*;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -20,24 +17,45 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class VCFHeaderTest extends BaseTest {
|
public class VCFHeaderTest extends BaseTest {
|
||||||
|
|
||||||
|
private Set<VCFHeader.HEADER_FIELDS> headerFields = new LinkedHashSet<VCFHeader.HEADER_FIELDS>();
|
||||||
|
private Map<String, String> metaData = new HashMap();
|
||||||
|
private List<String> additionalColumns = new ArrayList<String>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* give it fake data, and make sure we get back the right fake data
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void test1() {
|
public void testHeaderConstructor() {
|
||||||
File in = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf");
|
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||||
if (!in.exists()) throw new StingException("vfc doesn't exist");
|
headerFields.add(field);
|
||||||
List<String> array = new ArrayList<String>();
|
|
||||||
try {
|
|
||||||
BufferedReader reader = new BufferedReader(new FileReader("vcfexample.vcf"));
|
|
||||||
String line = reader.readLine();
|
|
||||||
while (line.startsWith("#")) {
|
|
||||||
array.add(line);
|
|
||||||
line = reader.readLine();
|
|
||||||
}
|
}
|
||||||
VCFHeader header = new VCFHeader(array);
|
metaData.put("one","1");
|
||||||
} catch (FileNotFoundException e) {
|
metaData.put("two","2");
|
||||||
Assert.fail("File not found exception in VCFHeaderTest");
|
additionalColumns.add("extra1");
|
||||||
} catch (IOException e) {
|
additionalColumns.add("extra2");
|
||||||
Assert.fail("IO exception in VCFHeaderTest");
|
// this should create a header that is valid
|
||||||
|
|
||||||
|
VCFHeader header = new VCFHeader(headerFields, metaData, additionalColumns);
|
||||||
|
|
||||||
|
// check the fields
|
||||||
|
int index = 0;
|
||||||
|
for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
|
||||||
|
Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index],field);
|
||||||
|
index++;
|
||||||
}
|
}
|
||||||
|
index = 0;
|
||||||
|
for (String key: header.getMetaData().keySet()) {
|
||||||
|
Assert.assertEquals(header.getMetaData().get(key),metaData.get(key));
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
Assert.assertEquals(metaData.size(),index);
|
||||||
|
index = 0;
|
||||||
|
for (String key: header.getAuxillaryTags()) {
|
||||||
|
Assert.assertTrue(additionalColumns.contains(key));
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
Assert.assertEquals(additionalColumns.size(),index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,96 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* <p/>
|
||||||
|
* Class VCFWriterTest
|
||||||
|
* <p/>
|
||||||
|
* This class tests out the ability of the VCF writer to correctly write VCF files
|
||||||
|
*/
|
||||||
|
public class VCFWriterTest extends BaseTest {
|
||||||
|
private Set<VCFHeader.HEADER_FIELDS> headerFields = new LinkedHashSet<VCFHeader.HEADER_FIELDS>();
|
||||||
|
private Map<String, String> metaData = new HashMap();
|
||||||
|
private List<String> additionalColumns = new ArrayList<String>();
|
||||||
|
private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
|
||||||
|
|
||||||
|
/** test, using the writer and reader, that we can output and input a VCF file without problems */
|
||||||
|
@Test
|
||||||
|
public void testBasicWriteAndRead() {
|
||||||
|
VCFHeader header = createFakeHeader();
|
||||||
|
VCFWriter writer = new VCFWriter(header,fakeVCFFile);
|
||||||
|
writer.addRecord(createVCFRecord(header));
|
||||||
|
writer.addRecord(createVCFRecord(header));
|
||||||
|
writer.close();
|
||||||
|
VCFReader reader = new VCFReader(fakeVCFFile);
|
||||||
|
int counter = 0;
|
||||||
|
// validate what we're reading in
|
||||||
|
validateHeader(reader.getHeader());
|
||||||
|
for(VCFRecord rec :reader) {
|
||||||
|
counter++;
|
||||||
|
}
|
||||||
|
Assert.assertEquals(2,counter);
|
||||||
|
reader.close();
|
||||||
|
fakeVCFFile.delete();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a fake header of known quantity
|
||||||
|
* @return a fake VCF header
|
||||||
|
*/
|
||||||
|
private VCFHeader createFakeHeader() {
|
||||||
|
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||||
|
headerFields.add(field);
|
||||||
|
}
|
||||||
|
metaData.put("one", "1");
|
||||||
|
metaData.put("two", "2");
|
||||||
|
additionalColumns.add("extra1");
|
||||||
|
additionalColumns.add("extra2");
|
||||||
|
// this should create a header that is valid
|
||||||
|
|
||||||
|
return new VCFHeader(headerFields, metaData, additionalColumns);
|
||||||
|
}
|
||||||
|
|
||||||
|
private VCFRecord createVCFRecord(VCFHeader header) {
|
||||||
|
int totalVals = header.getHeaderFields().size() + header.getAuxillaryTags().size();
|
||||||
|
List<String> array = new ArrayList<String>();
|
||||||
|
for (int x = 0; x < totalVals; x++) {
|
||||||
|
array.add(String.valueOf(x));
|
||||||
|
}
|
||||||
|
return new VCFRecord(header,array);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* validate a VCF header
|
||||||
|
* @param header the header to validate
|
||||||
|
*/
|
||||||
|
public void validateHeader(VCFHeader header) {
|
||||||
|
// check the fields
|
||||||
|
int index = 0;
|
||||||
|
for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
|
||||||
|
Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index], field);
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
index = 0;
|
||||||
|
for (String key : header.getMetaData().keySet()) {
|
||||||
|
Assert.assertEquals(header.getMetaData().get(key), metaData.get(key));
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
Assert.assertEquals(metaData.size(), index);
|
||||||
|
index = 0;
|
||||||
|
for (String key : header.getAuxillaryTags()) {
|
||||||
|
Assert.assertTrue(additionalColumns.contains(key));
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
Assert.assertEquals(additionalColumns.size(), index);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue