diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java
new file mode 100644
index 000000000..1052998d7
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java
@@ -0,0 +1,136 @@
+package org.broadinstitute.sting.utils.genotype.vcf;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+
+/**
+ * @author aaron
+ *
+ * Class VCFGenotypeRecord
+ *
+ * The genotype record in VCF store a considerable amount of information,
+ * so they were broken off into their own class
+ */
+public class VCFGenotypeRecord {
+ // what kind of phasing this genotype has
+ enum GT_GENOTYPE {
+ UNPHASED, PHASED, PHASED_SWITCH_PROB
+ }
+
+ // our phasing
+ private GT_GENOTYPE phaseType;
+
+ // our reference bases(s)
+ private final char reference;
+
+ // our bases(s)
+ private final List bases = new ArrayList();
+
+ // our mapping of the format fields to values
+ private final Map fields = new HashMap();
+
+ // our pattern matching for the genotype fields
+ private static final Pattern basicSplit = Pattern.compile("([0-9]*)([\\\\|\\/])([0-9]*):(\\S*)");
+
+ /**
+ * generate a VCF genotype record, given it's format string, the genotype string, and allele info
+ *
+ * @param formatString the format string for this record, which contains the keys for the genotype parameters
+ * @param genotypeString contains the phasing information, allele information, and values for genotype parameters
+ * @param altAlleles the alternate allele string array, which we index into based on the field parameters
+ * @param referenceBase the reference base
+ */
+ protected VCFGenotypeRecord(String formatString, String genotypeString, String altAlleles[], char referenceBase) {
+ reference = referenceBase;
+ // check that the first format field is GT, which is required
+ String keys[] = formatString.split(":");
+ if (keys.length < 0 || !keys[0].equals("GT"))
+ throw new IllegalArgumentException("The format string must have fields, and the first must be GT (genotype)");
+
+ // find the values for each of the keys, of which the GT field should be the first
+ Matcher match = basicSplit.matcher(genotypeString);
+ if (!match.matches() || match.groupCount() < 3)
+ throw new IllegalArgumentException("Unable to match genotype string to expected regex");
+
+ // add the alternate base (which can be ref by specifying 0)
+ addAllele(match.group(1), altAlleles, referenceBase);
+
+ determinePhase(match.group(2));
+
+ // do we have a second alt allele?
+ if (match.group(3).length() > 0) {
+ addAllele(match.group(3), altAlleles, referenceBase);
+ }
+
+ // check to see what other records we have
+ if (match.groupCount() == 4) {
+ // make sure we'll have enough occurances
+ String tokens[] = match.group(4).split(":{1}"); // the {1} was required, since string.split does a greedy match of the specified regex, like :+
+ int keyIndex = 1;
+ for (String token: tokens) {
+ this.fields.put(keys[keyIndex],token);
+ keyIndex++;
+ }
+ if (keyIndex + 1 == tokens.length) fields.put(keys[++keyIndex],""); // if the last value is blank, split will leave it off
+ if (keyIndex == 1 && match.group(4).contains(":")) {
+ // there was a string of all semicolons, split doesn't handle this well (or at all)
+ while(keyIndex < keys.length) this.fields.put(keys[keyIndex++],"");
+ }
+ }
+
+ }
+
+ /**
+ * add an alternate allele to the list of alleles we have
+ *
+ * @param alleleNumber the allele number, as a string
+ * @param altAlleles the list of alternate alleles
+ * @param referenceBase the reference base
+ */
+ private void addAllele(String alleleNumber, String[] altAlleles, char referenceBase) {
+ if (Integer.valueOf(alleleNumber) == 0)
+ bases.add(String.valueOf(referenceBase));
+ else
+ bases.add(altAlleles[Integer.valueOf(alleleNumber) - 1]);
+ }
+
+ /**
+ * determine the phase of the genotype
+ *
+ * @param phase the string that contains the phase character
+ */
+ private void determinePhase(String phase) {
+ // find the phasing information
+ if (phase.equals("/"))
+ phaseType = GT_GENOTYPE.UNPHASED;
+ else if (phase.equals("|"))
+ phaseType = GT_GENOTYPE.PHASED;
+ else if (phase.equals("\\"))
+ phaseType = GT_GENOTYPE.PHASED_SWITCH_PROB;
+ else
+ throw new IllegalArgumentException("Unknown genotype phasing parameter");
+ }
+
+ /** getter methods */
+
+ public GT_GENOTYPE getPhaseType() {
+ return phaseType;
+ }
+
+ public char getReference() {
+ return reference;
+ }
+
+ public List getAllele() {
+ return bases;
+ }
+
+ public Map getFields() {
+ return fields;
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java
index 777ec5b19..63266ef23 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java
@@ -1,8 +1,11 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.StingException;
import java.util.*;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
/**
@@ -26,7 +29,7 @@ public class VCFHeader {
private final Map mMetaData = new HashMap();
// the list of auxillary tags
- private final List auxillaryTags = new ArrayList();
+ private final List mGenotypeSampleNames = new ArrayList();
// the character string that indicates meta data
public static final String METADATA_INDICATOR = "##";
@@ -34,19 +37,60 @@ public class VCFHeader {
// the header string indicator
public static final String HEADER_INDICATOR = "#";
- /** our log, which we use to capture anything from this class */
+ /**
+ * our log, which we use to capture anything from this class
+ */
private static Logger logger = Logger.getLogger(VCFHeader.class);
+ /**
+ * do we have genotying data?
+ */
+ private boolean hasGenotypingData = false;
+
+ /**
+ * the current vcf version we support.
+ */
+ private static final String VCF_VERSION = "VCFv3.2";
+
/**
* create a VCF header, given a list of meta data and auxillary tags
*
- * @param metaData
- * @param additionalColumns
+ * @param headerFields the required header fields, in order they're presented
+ * @param metaData the meta data associated with this header
*/
- public VCFHeader(Set headerFields, Map metaData, List additionalColumns) {
+ protected VCFHeader(Set headerFields, Map metaData) {
for (HEADER_FIELDS field : headerFields) mHeaderFields.add(field);
for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key));
- for (String col : additionalColumns) auxillaryTags.add(col);
+ checkVCFVersion();
+ }
+
+ /**
+ * create a VCF header, given a list of meta data and auxillary tags
+ *
+ * @param headerFields the required header fields, in order they're presented
+ * @param metaData the meta data associated with this header
+ * @param genotypeSampleNames the genotype format field, and the sample names
+ */
+ protected VCFHeader(Set headerFields, Map metaData, List genotypeSampleNames) {
+ for (HEADER_FIELDS field : headerFields) mHeaderFields.add(field);
+ for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key));
+ for (String col : genotypeSampleNames) mGenotypeSampleNames.add(col);
+ hasGenotypingData = true;
+ checkVCFVersion();
+ }
+
+ /**
+ * check our metadata for a VCF version tag, and throw an exception if the version is out of date
+ * or the version is not present
+ */
+ public void checkVCFVersion() {
+ if (mMetaData.containsKey("format")) {
+ if (mMetaData.get("format").equals(VCF_VERSION))
+ return;
+ throw new StingException("VCFHeader: VCF version of " + mMetaData.get("format") +
+ " doesn't match the supported version of " + VCF_VERSION);
+ }
+ throw new StingException("VCFHeader: VCF version isn't present");
}
/**
@@ -68,12 +112,28 @@ public class VCFHeader {
}
/**
- * get the auxillary tags
+ * get the genotyping sample names
*
- * @return a list of the extra column names, in order
+ * @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
*/
- public List getAuxillaryTags() {
- return auxillaryTags;
+ public List getGenotypeSamples() {
+ return mGenotypeSampleNames;
+ }
+
+ /**
+ * do we have genotyping data?
+ *
+ * @return true if we have genotyping columns, false otherwise
+ */
+ public boolean hasGenotypingData() {
+ return hasGenotypingData;
+ }
+
+ /**
+ * @return the column count,
+ */
+ public int getColumnCount() {
+ return mHeaderFields.size() + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
}
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java
index 5ca28ca3e..0660e1d13 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java
@@ -37,7 +37,7 @@ public class VCFReader implements Iterator, Iterable {
new FileInputStream(vcfFile),
utf8));
} catch (FileNotFoundException e) {
- throw new StingException("Unable to find VCF file: " + vcfFile, e);
+ throw new StingException("VCFReader: Unable to find VCF file: " + vcfFile, e);
}
String line = null;
@@ -51,7 +51,7 @@ public class VCFReader implements Iterator, Iterable {
mHeader = this.createHeader(lines);
mNextRecord = new VCFRecord(mHeader, line);
} catch (IOException e) {
- throw new StingException("Failed to parse VCF File on line: " + line, e);
+ throw new StingException("VCFReader: Failed to parse VCF File on line: " + line, e);
}
}
@@ -112,17 +112,19 @@ public class VCFReader implements Iterator, Iterable {
if (str.startsWith("#") && !str.startsWith("##")) {
String[] strings = str.substring(1).split("\\s+");
for (String s : strings) {
- if (headerFields.contains(s)) throw new StingException("Header field duplication is not allowed");
+ if (headerFields.contains(s)) throw new StingException("VCFReader: Header field duplication is not allowed");
try {
headerFields.add(VCFHeader.HEADER_FIELDS.valueOf(s));
} catch (IllegalArgumentException e) {
+ if (!s.equals("FORMAT"))
auxTags.add(s);
}
}
}
}
if (headerFields.size() != VCFHeader.HEADER_FIELDS.values().length) {
- throw new StingException("The VCF header is missing " + (VCFHeader.HEADER_FIELDS.values().length - headerFields.size()) + " required fields");
+ throw new StingException("VCFReader: The VCF column header line is missing " + (VCFHeader.HEADER_FIELDS.values().length - headerFields.size())
+ + " of the " + VCFHeader.HEADER_FIELDS.values().length + " required fields");
}
return new VCFHeader(headerFields,metaData,auxTags);
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
index 20fcd5bf3..e92089215 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
@@ -2,73 +2,84 @@ package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.StingException;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-/**
- * the basic VCF record type
- */
+/** the basic VCF record type */
public class VCFRecord {
// required field values
- private Map mValues = new HashMap();
+ private final Map mValues = new HashMap();
- // our auxillary values
- private Map mAuxValues = new HashMap();
+ // our genotype sample fields
+ private final Map mGenotypeFields = new HashMap();
+
+ // the format String, which specifies what each genotype can contain for values
+ private String formatString;
/**
* create a VCFRecord, given a VCF header and the the values in this field. THis is protected, so that the reader is
* the only accessing object
- * TODO: this seems like a bad design
*
* @param header the VCF header
* @param line the line to parse into individual fields
*/
protected VCFRecord(VCFHeader header, String line) {
String tokens[] = line.split("\\s+");
- if (tokens.length != (header.getAuxillaryTags().size() + header.getHeaderFields().size())) {
- throw new StingException("Line:" + line + " didn't parse into " + (header.getAuxillaryTags().size() + header.getHeaderFields().size()) + " fields");
- }
-
- int tokenCount = 0;
- for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
- mValues.put(field, tokens[tokenCount]);
- tokenCount++;
- }
- for (String aux : header.getAuxillaryTags()) {
- mAuxValues.put(aux, tokens[tokenCount]);
- tokenCount++;
- }
+ List values = new ArrayList();
+ for (String str : tokens) values.add(str);
+ initialize(header, values);
}
+ /**
+ * given a VCF header, and the values for each of the columns, create a VCF record
+ *
+ * @param header the VCF header
+ * @param values the values, as a list, for each of the columns
+ */
public VCFRecord(VCFHeader header, List values) {
- if (values.size() != (header.getAuxillaryTags().size() + header.getHeaderFields().size())) {
- throw new StingException("The input list doesn't contain enough fields, it should have " + (header.getAuxillaryTags().size() + header.getHeaderFields().size()) + " fields");
+ initialize(header, values);
+ }
+
+ /**
+ * create the VCFRecord
+ *
+ * @param header the VCF header
+ * @param values the list of strings that make up the columns of the record
+ */
+ private void initialize(VCFHeader header, List values) {
+ if (values.size() != header.getColumnCount()) {
+ throw new StingException("The input list doesn't contain enough fields, it should have " + header.getColumnCount() + " fields");
}
int index = 0;
- for (VCFHeader.HEADER_FIELDS field: header.getHeaderFields()) {
- mValues.put(field,values.get(index));
+ for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
+ mValues.put(field, values.get(index));
index++;
}
- for (String str: header.getAuxillaryTags()) {
- mAuxValues.put(str,values.get(index));
+ if (header.hasGenotypingData()) {
+ formatString = values.get(index);
index++;
+ for (String str : header.getGenotypeSamples()) {
+ mGenotypeFields.put(str, values.get(index));
+ index++;
+ }
}
}
-
/**
* lookup a value, given it's column name
*
* @param key the column name, which is looked up in both the set columns and the auxillary columns
+ *
* @return a String representing the column values, or null if the field doesn't exist in this record
*/
public String getValue(String key) {
try {
return mValues.get(VCFHeader.HEADER_FIELDS.valueOf(key));
} catch (IllegalArgumentException e) {
- if (this.mAuxValues.containsKey(key)) {
- return mAuxValues.get(key);
+ if (this.mGenotypeFields.containsKey(key)) {
+ return mGenotypeFields.get(key);
}
return null;
}
@@ -77,30 +88,25 @@ public class VCFRecord {
/**
* get a required field, given the field tag
*
- * @param field
- * @return
+ * @param field the key for the field
+ *
+ * @return the field value
*/
public String getValue(VCFHeader.HEADER_FIELDS field) {
return mValues.get(field);
}
- /**
- * @return the string for the chromosome that this VCF record is associated with
- */
+ /** @return the string for the chromosome that this VCF record is associated with */
public String getChromosome() {
return this.mValues.get(VCFHeader.HEADER_FIELDS.CHROM);
}
- /**
- * @return this VCF records position on the specified chromosome
- */
+ /** @return this VCF records position on the specified chromosome */
public long getPosition() {
return Long.valueOf(this.mValues.get(VCFHeader.HEADER_FIELDS.POS));
}
- /**
- * @return the ID value for this record
- */
+ /** @return the ID value for this record */
public String getID() {
return this.mValues.get(VCFHeader.HEADER_FIELDS.ID);
}
@@ -131,9 +137,7 @@ public class VCFRecord {
return getAlternateAlleles() != null;
}
- /**
- * @return the phred-scaled quality score
- */
+ /** @return the phred-scaled quality score */
public int getQual() {
return Integer.valueOf(this.mValues.get(VCFHeader.HEADER_FIELDS.QUAL));
}
@@ -156,24 +160,37 @@ public class VCFRecord {
/**
* get the information key-value pairs as a Map<>
+ *
* @return a map, of the info key-value pairs
*/
- public Map getInfoValues() {
- Map ret = new HashMap();
+ public Map getInfoValues() {
+ Map ret = new HashMap();
String infoSplit[] = mValues.get(VCFHeader.HEADER_FIELDS.INFO).split(";");
- for (String s: infoSplit) {
+ for (String s : infoSplit) {
String keyValue[] = s.split("=");
- if (keyValue.length != 2) throw new StingException("Key value pairs must have both a key and a value; pair: " + s);
- ret.put(keyValue[0],keyValue[1]);
+ if (keyValue.length != 2)
+ throw new StingException("Key value pairs must have both a key and a value; pair: " + s);
+ ret.put(keyValue[0], keyValue[1]);
}
return ret;
}
- /**
- *
- * @return the number of columnsof data we're storing
- */
+ /** @return the number of columnsof data we're storing */
public int getColumnCount() {
- return this.mAuxValues.size() + this.mValues.size();
+ return this.mGenotypeFields.size() + this.mValues.size();
}
+
+ /**
+ * return the mapping of the format tags to the specified sample's values
+ * @param sampleName the sample name to get the genotyping tags for
+ * @return a VCFGenotypeRecord
+ */
+ public VCFGenotypeRecord getVCFGenotypeRecord(String sampleName) {
+ if (!this.mGenotypeFields.containsKey(sampleName)) {
+ throw new IllegalArgumentException("Sample Name: " + sampleName + " doesn't exist in this VCF record");
+ }
+ return new VCFGenotypeRecord(formatString,mGenotypeFields.get(sampleName),this.getAlternateAlleles(),this.getReferenceBase());
+
+ }
+
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java
new file mode 100644
index 000000000..b37bb3929
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java
@@ -0,0 +1,57 @@
+package org.broadinstitute.sting.utils.genotype.vcf;
+
+
+import java.io.File;
+
+
+/**
+ * @author aaron
+ *
+ * Class VCFValidator
+ *
+ * validate a VCF file
+ */
+public class VCFValidator {
+
+ private static final String VCF_VERSION = "VCFv3.2";
+
+ /**
+ * about as simple as things come right now. We open the file, process all the entries in the file,
+ * and if no errors pop up in processing, well hey, looks good to us.
+ * TODO: add validation to individual records fields as they make sense
+ *
+ * @param args the vcf file is the only parameter
+ */
+ public static void main(String[] args) {
+ if (args.length != 1) {
+ printUsage();
+ return;
+ }
+ File vcfFile = new File(args[0]);
+ if (!vcfFile.exists()) {
+ System.err.println("Specified VCF file doesn't exist, please check the input file\n");
+ printUsage();
+ return;
+ }
+ int counter = 0;
+ try {
+ VCFReader reader = new VCFReader(vcfFile);
+ while (reader.hasNext()) {
+ counter++;
+ reader.next();
+ }
+ } catch (Exception e) {
+ System.err.println("VCF Validation failed, after parsing " + counter + " entries.");
+ System.err.println("The reason given was: " + e.getMessage());
+ }
+ System.err.println("Viewed " + counter + " VCF record entries.");
+ }
+
+ public static void printUsage() {
+ System.err.println("VCF validator (VCF Version " + VCF_VERSION + ")");
+ System.err.println("Usage:");
+ System.err.println("vcfvalidator ");
+ System.err.println("");
+ }
+
+}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java
index c61444f64..c848ece17 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java
@@ -41,7 +41,7 @@ public class VCFWriter {
StringBuilder b = new StringBuilder();
b.append(VCFHeader.HEADER_INDICATOR);
for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) b.append(field + "\t");
- for (String field : header.getAuxillaryTags()) b.append(field + "\t");
+ for (String field : header.getGenotypeSamples()) b.append(field + "\t");
mWriter.write(b.toString() + "\n");
}
catch (IOException e) {
@@ -54,10 +54,9 @@ public class VCFWriter {
* @param record the record to output
*/
public void addRecord(VCFRecord record) {
- if (record.getColumnCount() != mHeader.getAuxillaryTags().size() + mHeader.getHeaderFields().size()) {
+ if (record.getColumnCount() != mHeader.getGenotypeSamples().size() + mHeader.getHeaderFields().size()) {
throw new StingException("Record has " + record.getColumnCount() +
- " columns, when is should have " + (mHeader.getAuxillaryTags().size() +
- mHeader.getHeaderFields().size()));
+ " columns, when is should have " + mHeader.getColumnCount());
}
StringBuilder builder = new StringBuilder();
// first output the required fields in order
@@ -66,7 +65,7 @@ public class VCFWriter {
if (first) { first = false; builder.append(record.getValue(field)); }
else builder.append("\t" + record.getValue(field));
}
- for (String auxTag : mHeader.getAuxillaryTags()) {
+ for (String auxTag : mHeader.getGenotypeSamples()) {
builder.append("\t" + record.getValue(auxTag));
}
try {
diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java
deleted file mode 100644
index fd7f5f9f8..000000000
--- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package org.broadinstitute.sting.utils.sam;
-
-
-/**
- *
- * @author aaron
- *
- * Class ArtificialSAMGenerator
- *
- * This provides for an external utility, that creates sam files and associates fasta files
- */
-public class ArtificialSAMGenerator {
-}
-
-
-
-
-
-
-
-
-
-
-class ArtificialFASTAUtils {
-
-
-
-}
\ No newline at end of file
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecordTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecordTest.java
new file mode 100644
index 000000000..1ffec2762
--- /dev/null
+++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecordTest.java
@@ -0,0 +1,83 @@
+package org.broadinstitute.sting.utils.genotype.vcf;
+
+import org.broadinstitute.sting.BaseTest;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Map;
+
+
+/**
+ *
+ * @author aaron
+ *
+ * Class VCFGenotypeRecordTest
+ *
+ * A descriptions should go here. Blame aaron if it's missing.
+ */
+public class VCFGenotypeRecordTest extends BaseTest {
+
+ /**
+ * test the basic parsing
+ */
+ @Test
+ public void testBasicParsing() {
+ String formatString = "GT:B:C:D";
+ String genotypeString = "0|1:2:3:4";
+ String altAlleles[] = {"A","C","G","T"};
+ char referenceBase = 'N';
+ VCFGenotypeRecord rec = new VCFGenotypeRecord(formatString,genotypeString,altAlleles,referenceBase);
+ Assert.assertEquals(VCFGenotypeRecord.GT_GENOTYPE.PHASED,rec.getPhaseType());
+ Assert.assertEquals(referenceBase,rec.getReference());
+ Assert.assertEquals("N",rec.getAllele().get(0));
+ Assert.assertEquals("A",rec.getAllele().get(1));
+ Map values = rec.getFields();
+ Assert.assertEquals(3,values.size());
+ Assert.assertTrue(values.get("B").equals("2"));
+ Assert.assertTrue(values.get("C").equals("3"));
+ Assert.assertTrue(values.get("D").equals("4"));
+ }
+
+
+ /**
+ * test the parsing of a genotype field with missing parameters
+ */
+ @Test
+ public void testMissingFieldParsing() {
+ String formatString = "GT:B:C:D";
+ String genotypeString = "0|1:::4";
+ String altAlleles[] = {"A","C","G","T"};
+ char referenceBase = 'N';
+ VCFGenotypeRecord rec = new VCFGenotypeRecord(formatString,genotypeString,altAlleles,referenceBase);
+ Assert.assertEquals(VCFGenotypeRecord.GT_GENOTYPE.PHASED,rec.getPhaseType());
+ Assert.assertEquals(referenceBase,rec.getReference());
+ Assert.assertEquals("N",rec.getAllele().get(0));
+ Assert.assertEquals("A",rec.getAllele().get(1));
+ Map values = rec.getFields();
+ Assert.assertEquals(3,values.size());
+ Assert.assertTrue(values.get("B").equals(""));
+ Assert.assertTrue(values.get("C").equals(""));
+ Assert.assertTrue(values.get("D").equals("4"));
+ }
+
+ /**
+ * test the parsing of a genotype field with different missing parameters
+ */
+ @Test
+ public void testMissingAllFields() {
+ String formatString = "GT:B:C:D";
+ String genotypeString = "0|1:::";
+ String altAlleles[] = {"A","C","G","T"};
+ char referenceBase = 'N';
+ VCFGenotypeRecord rec = new VCFGenotypeRecord(formatString,genotypeString,altAlleles,referenceBase);
+ Assert.assertEquals(VCFGenotypeRecord.GT_GENOTYPE.PHASED,rec.getPhaseType());
+ Assert.assertEquals(referenceBase,rec.getReference());
+ Assert.assertEquals("N",rec.getAllele().get(0));
+ Assert.assertEquals("A",rec.getAllele().get(1));
+ Map values = rec.getFields();
+ Assert.assertEquals(3,values.size());
+ Assert.assertTrue(values.get("B").equals(""));
+ Assert.assertTrue(values.get("C").equals(""));
+ Assert.assertTrue(values.get("D").equals(""));
+ }
+}
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java
index 83a3231c8..b99a865d4 100644
--- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java
+++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java
@@ -29,7 +29,7 @@ public class VCFHeaderTest extends BaseTest {
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
headerFields.add(field);
}
- metaData.put("one","1");
+ metaData.put("format","VCFv3.2");
metaData.put("two","2");
additionalColumns.add("extra1");
additionalColumns.add("extra2");
@@ -50,7 +50,7 @@ public class VCFHeaderTest extends BaseTest {
}
Assert.assertEquals(metaData.size(),index);
index = 0;
- for (String key: header.getAuxillaryTags()) {
+ for (String key: header.getGenotypeSamples()) {
Assert.assertTrue(additionalColumns.contains(key));
index++;
}
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java
index 44f802f3f..acd3c6f78 100644
--- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java
+++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java
@@ -20,8 +20,9 @@ public class VCFReaderTest extends BaseTest {
while (reader.hasNext()) {
counter++;
reader.next();
- System.err.println(counter);
}
Assert.assertEquals(5,counter);
}
+
+
}
diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java
index 123b632c7..417b4eae2 100644
--- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java
+++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java
@@ -39,7 +39,6 @@ public class VCFWriterTest extends BaseTest {
Assert.assertEquals(2,counter);
reader.close();
fakeVCFFile.delete();
-
}
/**
@@ -50,8 +49,9 @@ public class VCFWriterTest extends BaseTest {
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
headerFields.add(field);
}
- metaData.put("one", "1");
+ metaData.put("format", "VCFv3.2"); // required
metaData.put("two", "2");
+ additionalColumns.add("FORMAT");
additionalColumns.add("extra1");
additionalColumns.add("extra2");
// this should create a header that is valid
@@ -60,7 +60,7 @@ public class VCFWriterTest extends BaseTest {
}
private VCFRecord createVCFRecord(VCFHeader header) {
- int totalVals = header.getHeaderFields().size() + header.getAuxillaryTags().size();
+ int totalVals = header.getColumnCount();
List array = new ArrayList();
for (int x = 0; x < totalVals; x++) {
array.add(String.valueOf(x));
@@ -87,10 +87,10 @@ public class VCFWriterTest extends BaseTest {
}
Assert.assertEquals(metaData.size(), index);
index = 0;
- for (String key : header.getAuxillaryTags()) {
+ for (String key : header.getGenotypeSamples()) {
Assert.assertTrue(additionalColumns.contains(key));
index++;
}
- Assert.assertEquals(additionalColumns.size(), index);
+ Assert.assertEquals(additionalColumns.size(), index+1 /* for the header field we don't see */);
}
}