diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java new file mode 100644 index 000000000..1052998d7 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -0,0 +1,136 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +/** + * @author aaron + *

+ * Class VCFGenotypeRecord + *

+ * The genotype record in VCF store a considerable amount of information, + * so they were broken off into their own class + */ +public class VCFGenotypeRecord { + // what kind of phasing this genotype has + enum GT_GENOTYPE { + UNPHASED, PHASED, PHASED_SWITCH_PROB + } + + // our phasing + private GT_GENOTYPE phaseType; + + // our reference bases(s) + private final char reference; + + // our bases(s) + private final List bases = new ArrayList(); + + // our mapping of the format fields to values + private final Map fields = new HashMap(); + + // our pattern matching for the genotype fields + private static final Pattern basicSplit = Pattern.compile("([0-9]*)([\\\\|\\/])([0-9]*):(\\S*)"); + + /** + * generate a VCF genotype record, given it's format string, the genotype string, and allele info + * + * @param formatString the format string for this record, which contains the keys for the genotype parameters + * @param genotypeString contains the phasing information, allele information, and values for genotype parameters + * @param altAlleles the alternate allele string array, which we index into based on the field parameters + * @param referenceBase the reference base + */ + protected VCFGenotypeRecord(String formatString, String genotypeString, String altAlleles[], char referenceBase) { + reference = referenceBase; + // check that the first format field is GT, which is required + String keys[] = formatString.split(":"); + if (keys.length < 0 || !keys[0].equals("GT")) + throw new IllegalArgumentException("The format string must have fields, and the first must be GT (genotype)"); + + // find the values for each of the keys, of which the GT field should be the first + Matcher match = basicSplit.matcher(genotypeString); + if (!match.matches() || match.groupCount() < 3) + throw new IllegalArgumentException("Unable to match genotype string to expected regex"); + + // add the alternate base (which can be ref by specifying 0) + addAllele(match.group(1), altAlleles, referenceBase); + + determinePhase(match.group(2)); + + // do we have a second alt allele? + if (match.group(3).length() > 0) { + addAllele(match.group(3), altAlleles, referenceBase); + } + + // check to see what other records we have + if (match.groupCount() == 4) { + // make sure we'll have enough occurances + String tokens[] = match.group(4).split(":{1}"); // the {1} was required, since string.split does a greedy match of the specified regex, like :+ + int keyIndex = 1; + for (String token: tokens) { + this.fields.put(keys[keyIndex],token); + keyIndex++; + } + if (keyIndex + 1 == tokens.length) fields.put(keys[++keyIndex],""); // if the last value is blank, split will leave it off + if (keyIndex == 1 && match.group(4).contains(":")) { + // there was a string of all semicolons, split doesn't handle this well (or at all) + while(keyIndex < keys.length) this.fields.put(keys[keyIndex++],""); + } + } + + } + + /** + * add an alternate allele to the list of alleles we have + * + * @param alleleNumber the allele number, as a string + * @param altAlleles the list of alternate alleles + * @param referenceBase the reference base + */ + private void addAllele(String alleleNumber, String[] altAlleles, char referenceBase) { + if (Integer.valueOf(alleleNumber) == 0) + bases.add(String.valueOf(referenceBase)); + else + bases.add(altAlleles[Integer.valueOf(alleleNumber) - 1]); + } + + /** + * determine the phase of the genotype + * + * @param phase the string that contains the phase character + */ + private void determinePhase(String phase) { + // find the phasing information + if (phase.equals("/")) + phaseType = GT_GENOTYPE.UNPHASED; + else if (phase.equals("|")) + phaseType = GT_GENOTYPE.PHASED; + else if (phase.equals("\\")) + phaseType = GT_GENOTYPE.PHASED_SWITCH_PROB; + else + throw new IllegalArgumentException("Unknown genotype phasing parameter"); + } + + /** getter methods */ + + public GT_GENOTYPE getPhaseType() { + return phaseType; + } + + public char getReference() { + return reference; + } + + public List getAllele() { + return bases; + } + + public Map getFields() { + return fields; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java index 777ec5b19..63266ef23 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java @@ -1,8 +1,11 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.StingException; import java.util.*; +import java.util.regex.Pattern; +import java.util.regex.Matcher; /** @@ -26,7 +29,7 @@ public class VCFHeader { private final Map mMetaData = new HashMap(); // the list of auxillary tags - private final List auxillaryTags = new ArrayList(); + private final List mGenotypeSampleNames = new ArrayList(); // the character string that indicates meta data public static final String METADATA_INDICATOR = "##"; @@ -34,19 +37,60 @@ public class VCFHeader { // the header string indicator public static final String HEADER_INDICATOR = "#"; - /** our log, which we use to capture anything from this class */ + /** + * our log, which we use to capture anything from this class + */ private static Logger logger = Logger.getLogger(VCFHeader.class); + /** + * do we have genotying data? + */ + private boolean hasGenotypingData = false; + + /** + * the current vcf version we support. + */ + private static final String VCF_VERSION = "VCFv3.2"; + /** * create a VCF header, given a list of meta data and auxillary tags * - * @param metaData - * @param additionalColumns + * @param headerFields the required header fields, in order they're presented + * @param metaData the meta data associated with this header */ - public VCFHeader(Set headerFields, Map metaData, List additionalColumns) { + protected VCFHeader(Set headerFields, Map metaData) { for (HEADER_FIELDS field : headerFields) mHeaderFields.add(field); for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key)); - for (String col : additionalColumns) auxillaryTags.add(col); + checkVCFVersion(); + } + + /** + * create a VCF header, given a list of meta data and auxillary tags + * + * @param headerFields the required header fields, in order they're presented + * @param metaData the meta data associated with this header + * @param genotypeSampleNames the genotype format field, and the sample names + */ + protected VCFHeader(Set headerFields, Map metaData, List genotypeSampleNames) { + for (HEADER_FIELDS field : headerFields) mHeaderFields.add(field); + for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key)); + for (String col : genotypeSampleNames) mGenotypeSampleNames.add(col); + hasGenotypingData = true; + checkVCFVersion(); + } + + /** + * check our metadata for a VCF version tag, and throw an exception if the version is out of date + * or the version is not present + */ + public void checkVCFVersion() { + if (mMetaData.containsKey("format")) { + if (mMetaData.get("format").equals(VCF_VERSION)) + return; + throw new StingException("VCFHeader: VCF version of " + mMetaData.get("format") + + " doesn't match the supported version of " + VCF_VERSION); + } + throw new StingException("VCFHeader: VCF version isn't present"); } /** @@ -68,12 +112,28 @@ public class VCFHeader { } /** - * get the auxillary tags + * get the genotyping sample names * - * @return a list of the extra column names, in order + * @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false */ - public List getAuxillaryTags() { - return auxillaryTags; + public List getGenotypeSamples() { + return mGenotypeSampleNames; + } + + /** + * do we have genotyping data? + * + * @return true if we have genotyping columns, false otherwise + */ + public boolean hasGenotypingData() { + return hasGenotypingData; + } + + /** + * @return the column count, + */ + public int getColumnCount() { + return mHeaderFields.size() + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0); } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index 5ca28ca3e..0660e1d13 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -37,7 +37,7 @@ public class VCFReader implements Iterator, Iterable { new FileInputStream(vcfFile), utf8)); } catch (FileNotFoundException e) { - throw new StingException("Unable to find VCF file: " + vcfFile, e); + throw new StingException("VCFReader: Unable to find VCF file: " + vcfFile, e); } String line = null; @@ -51,7 +51,7 @@ public class VCFReader implements Iterator, Iterable { mHeader = this.createHeader(lines); mNextRecord = new VCFRecord(mHeader, line); } catch (IOException e) { - throw new StingException("Failed to parse VCF File on line: " + line, e); + throw new StingException("VCFReader: Failed to parse VCF File on line: " + line, e); } } @@ -112,17 +112,19 @@ public class VCFReader implements Iterator, Iterable { if (str.startsWith("#") && !str.startsWith("##")) { String[] strings = str.substring(1).split("\\s+"); for (String s : strings) { - if (headerFields.contains(s)) throw new StingException("Header field duplication is not allowed"); + if (headerFields.contains(s)) throw new StingException("VCFReader: Header field duplication is not allowed"); try { headerFields.add(VCFHeader.HEADER_FIELDS.valueOf(s)); } catch (IllegalArgumentException e) { + if (!s.equals("FORMAT")) auxTags.add(s); } } } } if (headerFields.size() != VCFHeader.HEADER_FIELDS.values().length) { - throw new StingException("The VCF header is missing " + (VCFHeader.HEADER_FIELDS.values().length - headerFields.size()) + " required fields"); + throw new StingException("VCFReader: The VCF column header line is missing " + (VCFHeader.HEADER_FIELDS.values().length - headerFields.size()) + + " of the " + VCFHeader.HEADER_FIELDS.values().length + " required fields"); } return new VCFHeader(headerFields,metaData,auxTags); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 20fcd5bf3..e92089215 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -2,73 +2,84 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broadinstitute.sting.utils.StingException; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -/** - * the basic VCF record type - */ +/** the basic VCF record type */ public class VCFRecord { // required field values - private Map mValues = new HashMap(); + private final Map mValues = new HashMap(); - // our auxillary values - private Map mAuxValues = new HashMap(); + // our genotype sample fields + private final Map mGenotypeFields = new HashMap(); + + // the format String, which specifies what each genotype can contain for values + private String formatString; /** * create a VCFRecord, given a VCF header and the the values in this field. THis is protected, so that the reader is * the only accessing object - * TODO: this seems like a bad design * * @param header the VCF header * @param line the line to parse into individual fields */ protected VCFRecord(VCFHeader header, String line) { String tokens[] = line.split("\\s+"); - if (tokens.length != (header.getAuxillaryTags().size() + header.getHeaderFields().size())) { - throw new StingException("Line:" + line + " didn't parse into " + (header.getAuxillaryTags().size() + header.getHeaderFields().size()) + " fields"); - } - - int tokenCount = 0; - for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) { - mValues.put(field, tokens[tokenCount]); - tokenCount++; - } - for (String aux : header.getAuxillaryTags()) { - mAuxValues.put(aux, tokens[tokenCount]); - tokenCount++; - } + List values = new ArrayList(); + for (String str : tokens) values.add(str); + initialize(header, values); } + /** + * given a VCF header, and the values for each of the columns, create a VCF record + * + * @param header the VCF header + * @param values the values, as a list, for each of the columns + */ public VCFRecord(VCFHeader header, List values) { - if (values.size() != (header.getAuxillaryTags().size() + header.getHeaderFields().size())) { - throw new StingException("The input list doesn't contain enough fields, it should have " + (header.getAuxillaryTags().size() + header.getHeaderFields().size()) + " fields"); + initialize(header, values); + } + + /** + * create the VCFRecord + * + * @param header the VCF header + * @param values the list of strings that make up the columns of the record + */ + private void initialize(VCFHeader header, List values) { + if (values.size() != header.getColumnCount()) { + throw new StingException("The input list doesn't contain enough fields, it should have " + header.getColumnCount() + " fields"); } int index = 0; - for (VCFHeader.HEADER_FIELDS field: header.getHeaderFields()) { - mValues.put(field,values.get(index)); + for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) { + mValues.put(field, values.get(index)); index++; } - for (String str: header.getAuxillaryTags()) { - mAuxValues.put(str,values.get(index)); + if (header.hasGenotypingData()) { + formatString = values.get(index); index++; + for (String str : header.getGenotypeSamples()) { + mGenotypeFields.put(str, values.get(index)); + index++; + } } } - /** * lookup a value, given it's column name * * @param key the column name, which is looked up in both the set columns and the auxillary columns + * * @return a String representing the column values, or null if the field doesn't exist in this record */ public String getValue(String key) { try { return mValues.get(VCFHeader.HEADER_FIELDS.valueOf(key)); } catch (IllegalArgumentException e) { - if (this.mAuxValues.containsKey(key)) { - return mAuxValues.get(key); + if (this.mGenotypeFields.containsKey(key)) { + return mGenotypeFields.get(key); } return null; } @@ -77,30 +88,25 @@ public class VCFRecord { /** * get a required field, given the field tag * - * @param field - * @return + * @param field the key for the field + * + * @return the field value */ public String getValue(VCFHeader.HEADER_FIELDS field) { return mValues.get(field); } - /** - * @return the string for the chromosome that this VCF record is associated with - */ + /** @return the string for the chromosome that this VCF record is associated with */ public String getChromosome() { return this.mValues.get(VCFHeader.HEADER_FIELDS.CHROM); } - /** - * @return this VCF records position on the specified chromosome - */ + /** @return this VCF records position on the specified chromosome */ public long getPosition() { return Long.valueOf(this.mValues.get(VCFHeader.HEADER_FIELDS.POS)); } - /** - * @return the ID value for this record - */ + /** @return the ID value for this record */ public String getID() { return this.mValues.get(VCFHeader.HEADER_FIELDS.ID); } @@ -131,9 +137,7 @@ public class VCFRecord { return getAlternateAlleles() != null; } - /** - * @return the phred-scaled quality score - */ + /** @return the phred-scaled quality score */ public int getQual() { return Integer.valueOf(this.mValues.get(VCFHeader.HEADER_FIELDS.QUAL)); } @@ -156,24 +160,37 @@ public class VCFRecord { /** * get the information key-value pairs as a Map<> + * * @return a map, of the info key-value pairs */ - public Map getInfoValues() { - Map ret = new HashMap(); + public Map getInfoValues() { + Map ret = new HashMap(); String infoSplit[] = mValues.get(VCFHeader.HEADER_FIELDS.INFO).split(";"); - for (String s: infoSplit) { + for (String s : infoSplit) { String keyValue[] = s.split("="); - if (keyValue.length != 2) throw new StingException("Key value pairs must have both a key and a value; pair: " + s); - ret.put(keyValue[0],keyValue[1]); + if (keyValue.length != 2) + throw new StingException("Key value pairs must have both a key and a value; pair: " + s); + ret.put(keyValue[0], keyValue[1]); } return ret; } - /** - * - * @return the number of columnsof data we're storing - */ + /** @return the number of columnsof data we're storing */ public int getColumnCount() { - return this.mAuxValues.size() + this.mValues.size(); + return this.mGenotypeFields.size() + this.mValues.size(); } + + /** + * return the mapping of the format tags to the specified sample's values + * @param sampleName the sample name to get the genotyping tags for + * @return a VCFGenotypeRecord + */ + public VCFGenotypeRecord getVCFGenotypeRecord(String sampleName) { + if (!this.mGenotypeFields.containsKey(sampleName)) { + throw new IllegalArgumentException("Sample Name: " + sampleName + " doesn't exist in this VCF record"); + } + return new VCFGenotypeRecord(formatString,mGenotypeFields.get(sampleName),this.getAlternateAlleles(),this.getReferenceBase()); + + } + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java new file mode 100644 index 000000000..b37bb3929 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFValidator.java @@ -0,0 +1,57 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + + +import java.io.File; + + +/** + * @author aaron + *

+ * Class VCFValidator + *

+ * validate a VCF file + */ +public class VCFValidator { + + private static final String VCF_VERSION = "VCFv3.2"; + + /** + * about as simple as things come right now. We open the file, process all the entries in the file, + * and if no errors pop up in processing, well hey, looks good to us. + * TODO: add validation to individual records fields as they make sense + * + * @param args the vcf file is the only parameter + */ + public static void main(String[] args) { + if (args.length != 1) { + printUsage(); + return; + } + File vcfFile = new File(args[0]); + if (!vcfFile.exists()) { + System.err.println("Specified VCF file doesn't exist, please check the input file\n"); + printUsage(); + return; + } + int counter = 0; + try { + VCFReader reader = new VCFReader(vcfFile); + while (reader.hasNext()) { + counter++; + reader.next(); + } + } catch (Exception e) { + System.err.println("VCF Validation failed, after parsing " + counter + " entries."); + System.err.println("The reason given was: " + e.getMessage()); + } + System.err.println("Viewed " + counter + " VCF record entries."); + } + + public static void printUsage() { + System.err.println("VCF validator (VCF Version " + VCF_VERSION + ")"); + System.err.println("Usage:"); + System.err.println("vcfvalidator "); + System.err.println(""); + } + +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index c61444f64..c848ece17 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -41,7 +41,7 @@ public class VCFWriter { StringBuilder b = new StringBuilder(); b.append(VCFHeader.HEADER_INDICATOR); for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) b.append(field + "\t"); - for (String field : header.getAuxillaryTags()) b.append(field + "\t"); + for (String field : header.getGenotypeSamples()) b.append(field + "\t"); mWriter.write(b.toString() + "\n"); } catch (IOException e) { @@ -54,10 +54,9 @@ public class VCFWriter { * @param record the record to output */ public void addRecord(VCFRecord record) { - if (record.getColumnCount() != mHeader.getAuxillaryTags().size() + mHeader.getHeaderFields().size()) { + if (record.getColumnCount() != mHeader.getGenotypeSamples().size() + mHeader.getHeaderFields().size()) { throw new StingException("Record has " + record.getColumnCount() + - " columns, when is should have " + (mHeader.getAuxillaryTags().size() + - mHeader.getHeaderFields().size())); + " columns, when is should have " + mHeader.getColumnCount()); } StringBuilder builder = new StringBuilder(); // first output the required fields in order @@ -66,7 +65,7 @@ public class VCFWriter { if (first) { first = false; builder.append(record.getValue(field)); } else builder.append("\t" + record.getValue(field)); } - for (String auxTag : mHeader.getAuxillaryTags()) { + for (String auxTag : mHeader.getGenotypeSamples()) { builder.append("\t" + record.getValue(auxTag)); } try { diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java deleted file mode 100644 index fd7f5f9f8..000000000 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMGenerator.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.broadinstitute.sting.utils.sam; - - -/** - * - * @author aaron - * - * Class ArtificialSAMGenerator - * - * This provides for an external utility, that creates sam files and associates fasta files - */ -public class ArtificialSAMGenerator { -} - - - - - - - - - - -class ArtificialFASTAUtils { - - - -} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecordTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecordTest.java new file mode 100644 index 000000000..1ffec2762 --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecordTest.java @@ -0,0 +1,83 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + +import org.broadinstitute.sting.BaseTest; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; + + +/** + * + * @author aaron + * + * Class VCFGenotypeRecordTest + * + * A descriptions should go here. Blame aaron if it's missing. + */ +public class VCFGenotypeRecordTest extends BaseTest { + + /** + * test the basic parsing + */ + @Test + public void testBasicParsing() { + String formatString = "GT:B:C:D"; + String genotypeString = "0|1:2:3:4"; + String altAlleles[] = {"A","C","G","T"}; + char referenceBase = 'N'; + VCFGenotypeRecord rec = new VCFGenotypeRecord(formatString,genotypeString,altAlleles,referenceBase); + Assert.assertEquals(VCFGenotypeRecord.GT_GENOTYPE.PHASED,rec.getPhaseType()); + Assert.assertEquals(referenceBase,rec.getReference()); + Assert.assertEquals("N",rec.getAllele().get(0)); + Assert.assertEquals("A",rec.getAllele().get(1)); + Map values = rec.getFields(); + Assert.assertEquals(3,values.size()); + Assert.assertTrue(values.get("B").equals("2")); + Assert.assertTrue(values.get("C").equals("3")); + Assert.assertTrue(values.get("D").equals("4")); + } + + + /** + * test the parsing of a genotype field with missing parameters + */ + @Test + public void testMissingFieldParsing() { + String formatString = "GT:B:C:D"; + String genotypeString = "0|1:::4"; + String altAlleles[] = {"A","C","G","T"}; + char referenceBase = 'N'; + VCFGenotypeRecord rec = new VCFGenotypeRecord(formatString,genotypeString,altAlleles,referenceBase); + Assert.assertEquals(VCFGenotypeRecord.GT_GENOTYPE.PHASED,rec.getPhaseType()); + Assert.assertEquals(referenceBase,rec.getReference()); + Assert.assertEquals("N",rec.getAllele().get(0)); + Assert.assertEquals("A",rec.getAllele().get(1)); + Map values = rec.getFields(); + Assert.assertEquals(3,values.size()); + Assert.assertTrue(values.get("B").equals("")); + Assert.assertTrue(values.get("C").equals("")); + Assert.assertTrue(values.get("D").equals("4")); + } + + /** + * test the parsing of a genotype field with different missing parameters + */ + @Test + public void testMissingAllFields() { + String formatString = "GT:B:C:D"; + String genotypeString = "0|1:::"; + String altAlleles[] = {"A","C","G","T"}; + char referenceBase = 'N'; + VCFGenotypeRecord rec = new VCFGenotypeRecord(formatString,genotypeString,altAlleles,referenceBase); + Assert.assertEquals(VCFGenotypeRecord.GT_GENOTYPE.PHASED,rec.getPhaseType()); + Assert.assertEquals(referenceBase,rec.getReference()); + Assert.assertEquals("N",rec.getAllele().get(0)); + Assert.assertEquals("A",rec.getAllele().get(1)); + Map values = rec.getFields(); + Assert.assertEquals(3,values.size()); + Assert.assertTrue(values.get("B").equals("")); + Assert.assertTrue(values.get("C").equals("")); + Assert.assertTrue(values.get("D").equals("")); + } +} diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java index 83a3231c8..b99a865d4 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java @@ -29,7 +29,7 @@ public class VCFHeaderTest extends BaseTest { for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) { headerFields.add(field); } - metaData.put("one","1"); + metaData.put("format","VCFv3.2"); metaData.put("two","2"); additionalColumns.add("extra1"); additionalColumns.add("extra2"); @@ -50,7 +50,7 @@ public class VCFHeaderTest extends BaseTest { } Assert.assertEquals(metaData.size(),index); index = 0; - for (String key: header.getAuxillaryTags()) { + for (String key: header.getGenotypeSamples()) { Assert.assertTrue(additionalColumns.contains(key)); index++; } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java index 44f802f3f..acd3c6f78 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java @@ -20,8 +20,9 @@ public class VCFReaderTest extends BaseTest { while (reader.hasNext()) { counter++; reader.next(); - System.err.println(counter); } Assert.assertEquals(5,counter); } + + } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java index 123b632c7..417b4eae2 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java @@ -39,7 +39,6 @@ public class VCFWriterTest extends BaseTest { Assert.assertEquals(2,counter); reader.close(); fakeVCFFile.delete(); - } /** @@ -50,8 +49,9 @@ public class VCFWriterTest extends BaseTest { for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) { headerFields.add(field); } - metaData.put("one", "1"); + metaData.put("format", "VCFv3.2"); // required metaData.put("two", "2"); + additionalColumns.add("FORMAT"); additionalColumns.add("extra1"); additionalColumns.add("extra2"); // this should create a header that is valid @@ -60,7 +60,7 @@ public class VCFWriterTest extends BaseTest { } private VCFRecord createVCFRecord(VCFHeader header) { - int totalVals = header.getHeaderFields().size() + header.getAuxillaryTags().size(); + int totalVals = header.getColumnCount(); List array = new ArrayList(); for (int x = 0; x < totalVals; x++) { array.add(String.valueOf(x)); @@ -87,10 +87,10 @@ public class VCFWriterTest extends BaseTest { } Assert.assertEquals(metaData.size(), index); index = 0; - for (String key : header.getAuxillaryTags()) { + for (String key : header.getGenotypeSamples()) { Assert.assertTrue(additionalColumns.contains(key)); index++; } - Assert.assertEquals(additionalColumns.size(), index); + Assert.assertEquals(additionalColumns.size(), index+1 /* for the header field we don't see */); } }