diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 7becf3cfc..83b4909ff 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -16,12 +16,12 @@ import java.util.Map; */ public class VCFGenotypeRecord { // what kind of phasing this genotype has - enum GT_GENOTYPE { - UNPHASED, PHASED, PHASED_SWITCH_PROB + enum PHASE { + UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN } // our phasing - private GT_GENOTYPE phaseType; + private PHASE phaseType; // our reference bases(s) private final char mReferenceBase; @@ -31,7 +31,9 @@ public class VCFGenotypeRecord { // our mapping of the format mFields to values private final Map mFields = new HashMap(); - + + // our sample name + private final String mSampleName; /** * create a VCF record * @@ -40,12 +42,12 @@ public class VCFGenotypeRecord { * @param phasing the phasing of the the genotype * @param referenceBase the reference base */ - public VCFGenotypeRecord(Map keyValues, List Alleles, GT_GENOTYPE phasing, char referenceBase) { - // validate - this.mReferenceBase = referenceBase; - this.mFields.putAll(keyValues); - this.mAlleleBases.addAll(Alleles); - this.phaseType = phasing; + public VCFGenotypeRecord(String sampleName, Map keyValues, List Alleles, PHASE phasing, char referenceBase) { + mSampleName = sampleName; + mReferenceBase = referenceBase; + mFields.putAll(keyValues); + mAlleleBases.addAll(Alleles); + phaseType = phasing; } /** @@ -53,21 +55,21 @@ public class VCFGenotypeRecord { * * @param phase the string that contains the phase character */ - static GT_GENOTYPE determinePhase(String phase) { + static PHASE determinePhase(String phase) { // find the phasing information if (phase.equals("/")) - return GT_GENOTYPE.UNPHASED; + return PHASE.UNPHASED; else if (phase.equals("|")) - return GT_GENOTYPE.PHASED; + return PHASE.PHASED; else if (phase.equals("\\")) - return GT_GENOTYPE.PHASED_SWITCH_PROB; + return PHASE.PHASED_SWITCH_PROB; else throw new IllegalArgumentException("Unknown genotype phasing parameter"); } /** getter methods */ - public GT_GENOTYPE getPhaseType() { + public PHASE getPhaseType() { return phaseType; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java index 63266ef23..cc0682b14 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java @@ -1,11 +1,8 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.StingException; import java.util.*; -import java.util.regex.Pattern; -import java.util.regex.Matcher; /** @@ -37,19 +34,13 @@ public class VCFHeader { // the header string indicator public static final String HEADER_INDICATOR = "#"; - /** - * our log, which we use to capture anything from this class - */ + /** our log, which we use to capture anything from this class */ private static Logger logger = Logger.getLogger(VCFHeader.class); - /** - * do we have genotying data? - */ + /** do we have genotying data? */ private boolean hasGenotypingData = false; - /** - * the current vcf version we support. - */ + /** the current vcf version we support. */ private static final String VCF_VERSION = "VCFv3.2"; /** @@ -74,7 +65,10 @@ public class VCFHeader { protected VCFHeader(Set headerFields, Map metaData, List genotypeSampleNames) { for (HEADER_FIELDS field : headerFields) mHeaderFields.add(field); for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key)); - for (String col : genotypeSampleNames) mGenotypeSampleNames.add(col); + for (String col : genotypeSampleNames) { + if (!col.equals("FORMAT")) + mGenotypeSampleNames.add(col); + } hasGenotypingData = true; checkVCFVersion(); } @@ -87,10 +81,10 @@ public class VCFHeader { if (mMetaData.containsKey("format")) { if (mMetaData.get("format").equals(VCF_VERSION)) return; - throw new StingException("VCFHeader: VCF version of " + mMetaData.get("format") + + throw new RuntimeException("VCFHeader: VCF version of " + mMetaData.get("format") + " doesn't match the supported version of " + VCF_VERSION); } - throw new StingException("VCFHeader: VCF version isn't present"); + throw new RuntimeException("VCFHeader: VCF version isn't present"); } /** @@ -129,9 +123,7 @@ public class VCFHeader { return hasGenotypingData; } - /** - * @return the column count, - */ + /** @return the column count, */ public int getColumnCount() { return mHeaderFields.size() + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index 459bcd161..8b47ce81d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.utils.genotype.vcf; -import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import java.io.*; @@ -25,7 +24,7 @@ public class VCFReader implements Iterator, Iterable { private static Pattern pMeta = Pattern.compile("^" + VCFHeader.METADATA_INDICATOR + "\\s*(\\S+)\\s*=\\s*(\\S+)\\s*$"); // our pattern matching for the genotype mFields - private static final Pattern basicSplit = Pattern.compile("([0-9]*)([\\\\|\\/])([0-9]*):(\\S*)"); + private static final Pattern gtPattern = Pattern.compile("([0-9]+)([\\\\|\\/])([0-9]*)"); /** * Create a VCF reader, given a VCF file @@ -40,7 +39,7 @@ public class VCFReader implements Iterator, Iterable { new FileInputStream(vcfFile), utf8)); } catch (FileNotFoundException e) { - throw new StingException("VCFReader: Unable to find VCF file: " + vcfFile, e); + throw new RuntimeException("VCFReader: Unable to find VCF file: " + vcfFile, e); } String line = null; @@ -52,9 +51,9 @@ public class VCFReader implements Iterator, Iterable { line = mReader.readLine(); } mHeader = this.createHeader(lines); - mNextRecord = createRecord(mReader.readLine()); + mNextRecord = createRecord(line, mHeader); } catch (IOException e) { - throw new StingException("VCFReader: Failed to parse VCF File on line: " + line, e); + throw new RuntimeException("VCFReader: Failed to parse VCF File on line: " + line, e); } } @@ -74,7 +73,7 @@ public class VCFReader implements Iterator, Iterable { try { String line = mReader.readLine(); if (line == null) mNextRecord = null; - else mNextRecord = createRecord(line); + else mNextRecord = createRecord(line, mHeader); } catch (IOException e) { mNextRecord = null; } @@ -116,7 +115,7 @@ public class VCFReader implements Iterator, Iterable { String[] strings = str.substring(1).split("\\s+"); for (String s : strings) { if (headerFields.contains(s)) - throw new StingException("VCFReader: Header field duplication is not allowed"); + throw new RuntimeException("VCFReader: Header field duplication is not allowed"); try { headerFields.add(VCFHeader.HEADER_FIELDS.valueOf(s)); } catch (IllegalArgumentException e) { @@ -127,7 +126,7 @@ public class VCFReader implements Iterator, Iterable { } } if (headerFields.size() != VCFHeader.HEADER_FIELDS.values().length) { - throw new StingException("VCFReader: The VCF column header line is missing " + (VCFHeader.HEADER_FIELDS.values().length - headerFields.size()) + throw new RuntimeException("VCFReader: The VCF column header line is missing " + (VCFHeader.HEADER_FIELDS.values().length - headerFields.size()) + " of the " + VCFHeader.HEADER_FIELDS.values().length + " required fields"); } return new VCFHeader(headerFields, metaData, auxTags); @@ -140,14 +139,14 @@ public class VCFReader implements Iterator, Iterable { * * @return the VCFRecord */ - public VCFRecord createRecord(String line) { + public static VCFRecord createRecord(String line, VCFHeader mHeader) { // things we need to make a VCF record Map values = new HashMap(); String tokens[] = line.split("\\s+"); // check to ensure that the column count of tokens is right if (tokens.length != mHeader.getColumnCount()) { - throw new StingException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has" + values.size()); + throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length); } int index = 0; @@ -159,10 +158,10 @@ public class VCFReader implements Iterator, Iterable { List genotypeRecords = new ArrayList(); index++; for (String str : mHeader.getGenotypeSamples()) { - genotypeRecords.add(getVCFGenotype(mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0))); + genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0))); index++; } - return new VCFRecord(mHeader,values,mFormatString,genotypeRecords); + return new VCFRecord(mHeader, values, mFormatString, genotypeRecords); } return new VCFRecord(mHeader, values); } @@ -170,61 +169,46 @@ public class VCFReader implements Iterator, Iterable { /** * generate a VCF genotype record, given it's format string, the genotype string, and allele info * + * @param sampleName the sample name * @param formatString the format string for this record, which contains the keys for the genotype parameters * @param genotypeString contains the phasing information, allele information, and values for genotype parameters * @param altAlleles the alternate allele string array, which we index into based on the field parameters * @param referenceBase the reference base */ - public VCFGenotypeRecord getVCFGenotype(String formatString, String genotypeString, String altAlleles[], char referenceBase) { - // check that the first format field is GT, which is required - String keys[] = formatString.split(":"); - List alleles = new ArrayList(); - if (keys.length < 0 || !keys[0].equals("GT")) - throw new IllegalArgumentException("The format string must have fields, and the first must be GT (genotype)"); + public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) { + // parameters to create the VCF genotype record + Map tagToValue = new HashMap(); + VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN; + List bases = new ArrayList(); - // find the values for each of the keys, of which the GT field should be the first - Matcher match = basicSplit.matcher(genotypeString); - if (!match.matches() || match.groupCount() < 3) - throw new IllegalArgumentException("Unable to match genotype string to expected regex"); - - // add the alternate base (which can be ref by specifying 0) - addAllele(match.group(1), altAlleles, referenceBase, alleles); - - VCFGenotypeRecord.GT_GENOTYPE phase = VCFGenotypeRecord.determinePhase(match.group(2)); - - // do we have a second alt allele? - if (match.group(3).length() > 0) { - addAllele(match.group(3), altAlleles, referenceBase, alleles); + String keyStrings[] = formatString.split(":"); + for (String key : keyStrings) { + String parse; + int nextDivider; + if (!genotypeString.contains(":")) { + nextDivider = genotypeString.length(); + parse = genotypeString; + } else { + nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":"); + parse = genotypeString.substring(0, nextDivider); + } + if (key.equals("GT")) { + Matcher m = gtPattern.matcher(parse); + if (!m.matches()) + throw new RuntimeException("Ubable to match GT genotype flag to it's regular expression"); + phase = VCFGenotypeRecord.determinePhase(m.group(2)); + addAllele(m.group(1),altAlleles,referenceBase,bases); + if (m.group(3).length() > 0) addAllele(m.group(3),altAlleles,referenceBase,bases); + } + tagToValue.put(key,parse); + if (nextDivider+1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1; + genotypeString = genotypeString.substring(nextDivider+1,genotypeString.length()); } - - Map fields = new HashMap(); - // check to see what other records we have - if (match.groupCount() == 4) { - // make sure we'll have enough occurances - String tokens[] = match.group(4).split(":{1}"); // the {1} was required, since string.split does a greedy match of the specified regex, like :+ - int keyIndex = 1; - try { - for (String token : tokens) { - fields.put(keys[keyIndex], token); - keyIndex++; - } - } - // we catch the follow exception. What this generally means is that the format string specified less mFields then the genotype string contains - catch (ArrayIndexOutOfBoundsException e) { - throw new StingException("VCFGenotypeRecord: ArrayIndexOutOfBoundsException, most likely the field list was less then the genotype " + "" + - "values provided. Format String = " + formatString + ", genotype value string = " + genotypeString, e); - } - - // you're allowed to leave out mFields, if any field doesn't have a value fill it in - if (keyIndex < tokens.length && match.group(4).contains(":")) { - while (keyIndex < keys.length) - if (!fields.containsKey(keys[keyIndex])) - fields.put(keys[keyIndex++], ""); - } - } - return new VCFGenotypeRecord(fields, alleles, phase, referenceBase); + if (keyStrings.length != tagToValue.size() || genotypeString.length() > 0) throw new RuntimeException("genotype value count doesn't match the key count"); + return new VCFGenotypeRecord(sampleName,tagToValue,bases,phase,referenceBase); } + /** * add an alternate allele to the list of alleles we have for a VCF genotype record * @@ -232,7 +216,7 @@ public class VCFReader implements Iterator, Iterable { * @param altAlleles the list of alternate alleles * @param referenceBase the reference base */ - private void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { + private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { if (Integer.valueOf(alleleNumber) == 0) bases.add(String.valueOf(referenceBase)); else diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 4fa4d0de4..65deb37fe 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.utils.genotype.vcf; -import org.broadinstitute.sting.utils.StingException; import java.util.ArrayList; import java.util.HashMap; @@ -153,7 +152,7 @@ public class VCFRecord { for (String s : infoSplit) { String keyValue[] = s.split("="); if (keyValue.length != 2) - throw new StingException("Key value pairs must have both a key and a value; pair: " + s); + throw new RuntimeException("Key value pairs must have both a key and a value; pair: " + s); ret.put(keyValue[0], keyValue[1]); } return ret; @@ -161,7 +160,8 @@ public class VCFRecord { /** @return the number of columnsof data we're storing */ public int getColumnCount() { - return mGenotypeFields.size() + mValues.size(); + if (this.hasGenotypeData()) return mGenotypeFields.size() + mValues.size(); + return mValues.size(); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index 4b07aed88..8451f5172 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -1,6 +1,5 @@ package org.broadinstitute.sting.utils.genotype.vcf; -import org.broadinstitute.sting.utils.StingException; import java.io.*; import java.nio.charset.Charset; @@ -13,6 +12,7 @@ public class VCFWriter { // the print stream we're writting to BufferedWriter mWriter; + private final String FIELD_SEPERATOR = "\t"; /** * create a VCF writer, given a VCF header and a file to write to @@ -29,10 +29,9 @@ public class VCFWriter { new FileOutputStream(location), utf8)); } catch (FileNotFoundException e) { - throw new StingException("Unable to create VCF file: " + location, e); + throw new RuntimeException("Unable to create VCF file: " + location, e); } try { - // write the header meta-data out for (String metadata : header.getMetaData().keySet()) { mWriter.write(VCFHeader.METADATA_INDICATOR + metadata + "=" + header.getMetaData().get(metadata) + "\n"); @@ -40,12 +39,15 @@ public class VCFWriter { // write out the column line StringBuilder b = new StringBuilder(); b.append(VCFHeader.HEADER_INDICATOR); - for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) b.append(field + "\t"); - for (String field : header.getGenotypeSamples()) b.append(field + "\t"); - mWriter.write(b.toString() + "\n"); + for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) b.append(field + FIELD_SEPERATOR); + if (header.hasGenotypingData()) { + b.append("FORMAT" + FIELD_SEPERATOR); + for (String field : header.getGenotypeSamples()) b.append(field + FIELD_SEPERATOR); + mWriter.write(b.toString() + "\n"); + } } catch (IOException e) { - throw new StingException("IOException writing the VCF header", e); + throw new RuntimeException("IOException writing the VCF header", e); } } @@ -56,7 +58,7 @@ public class VCFWriter { */ public void addRecord(VCFRecord record) { if (record.getColumnCount() != mHeader.getGenotypeSamples().size() + mHeader.getHeaderFields().size()) { - throw new StingException("Record has " + record.getColumnCount() + + throw new RuntimeException("Record has " + record.getColumnCount() + " columns, when is should have " + mHeader.getColumnCount()); } StringBuilder builder = new StringBuilder(); @@ -67,17 +69,24 @@ public class VCFWriter { if (first) { first = false; builder.append(record.getValue(field)); - } else builder.append("\t" + record.getValue(field)); + } else builder.append(FIELD_SEPERATOR + record.getValue(field)); } - for (VCFGenotypeRecord rec : record.getVCFGenotypeRecords()) { - builder.append("\t"); - for (String s : rec.getFields().keySet()) - builder.append(":" + rec.getFields().get(s)); - } - try { - mWriter.write(builder.toString() + "\n"); - } catch (IOException e) { - throw new StingException("Unable to write the VCF object to a file"); + if (record.hasGenotypeData()) { + builder.append(FIELD_SEPERATOR + record.getFormatString()); + for (VCFGenotypeRecord rec : record.getVCFGenotypeRecords()) { + builder.append(FIELD_SEPERATOR); + boolean ft = true; + for (String s : rec.getFields().keySet()) { + if (!ft) builder.append(":"); + else ft = true; + builder.append(rec.getFields().get(s)); + } + } + try { + mWriter.write(builder.toString() + "\n"); + } catch (IOException e) { + throw new RuntimeException("Unable to write the VCF object to a file"); + } } } @@ -86,7 +95,7 @@ public class VCFWriter { try { mWriter.close(); } catch (IOException e) { - throw new StingException("Unable to close VCFFile"); + throw new RuntimeException("Unable to close VCFFile"); } } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecordTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecordTest.java deleted file mode 100644 index 1ffec2762..000000000 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecordTest.java +++ /dev/null @@ -1,83 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broadinstitute.sting.BaseTest; -import org.junit.Assert; -import org.junit.Test; - -import java.util.Map; - - -/** - * - * @author aaron - * - * Class VCFGenotypeRecordTest - * - * A descriptions should go here. Blame aaron if it's missing. - */ -public class VCFGenotypeRecordTest extends BaseTest { - - /** - * test the basic parsing - */ - @Test - public void testBasicParsing() { - String formatString = "GT:B:C:D"; - String genotypeString = "0|1:2:3:4"; - String altAlleles[] = {"A","C","G","T"}; - char referenceBase = 'N'; - VCFGenotypeRecord rec = new VCFGenotypeRecord(formatString,genotypeString,altAlleles,referenceBase); - Assert.assertEquals(VCFGenotypeRecord.GT_GENOTYPE.PHASED,rec.getPhaseType()); - Assert.assertEquals(referenceBase,rec.getReference()); - Assert.assertEquals("N",rec.getAllele().get(0)); - Assert.assertEquals("A",rec.getAllele().get(1)); - Map values = rec.getFields(); - Assert.assertEquals(3,values.size()); - Assert.assertTrue(values.get("B").equals("2")); - Assert.assertTrue(values.get("C").equals("3")); - Assert.assertTrue(values.get("D").equals("4")); - } - - - /** - * test the parsing of a genotype field with missing parameters - */ - @Test - public void testMissingFieldParsing() { - String formatString = "GT:B:C:D"; - String genotypeString = "0|1:::4"; - String altAlleles[] = {"A","C","G","T"}; - char referenceBase = 'N'; - VCFGenotypeRecord rec = new VCFGenotypeRecord(formatString,genotypeString,altAlleles,referenceBase); - Assert.assertEquals(VCFGenotypeRecord.GT_GENOTYPE.PHASED,rec.getPhaseType()); - Assert.assertEquals(referenceBase,rec.getReference()); - Assert.assertEquals("N",rec.getAllele().get(0)); - Assert.assertEquals("A",rec.getAllele().get(1)); - Map values = rec.getFields(); - Assert.assertEquals(3,values.size()); - Assert.assertTrue(values.get("B").equals("")); - Assert.assertTrue(values.get("C").equals("")); - Assert.assertTrue(values.get("D").equals("4")); - } - - /** - * test the parsing of a genotype field with different missing parameters - */ - @Test - public void testMissingAllFields() { - String formatString = "GT:B:C:D"; - String genotypeString = "0|1:::"; - String altAlleles[] = {"A","C","G","T"}; - char referenceBase = 'N'; - VCFGenotypeRecord rec = new VCFGenotypeRecord(formatString,genotypeString,altAlleles,referenceBase); - Assert.assertEquals(VCFGenotypeRecord.GT_GENOTYPE.PHASED,rec.getPhaseType()); - Assert.assertEquals(referenceBase,rec.getReference()); - Assert.assertEquals("N",rec.getAllele().get(0)); - Assert.assertEquals("A",rec.getAllele().get(1)); - Map values = rec.getFields(); - Assert.assertEquals(3,values.size()); - Assert.assertTrue(values.get("B").equals("")); - Assert.assertTrue(values.get("C").equals("")); - Assert.assertTrue(values.get("D").equals("")); - } -} diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java index acd3c6f78..37059a38b 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java @@ -5,6 +5,7 @@ import org.junit.Assert; import org.broadinstitute.sting.BaseTest; import java.io.File; +import java.util.Map; /** * test the VCFReader class test @@ -24,5 +25,67 @@ public class VCFReaderTest extends BaseTest { Assert.assertEquals(5,counter); } + /** + * test the basic parsing + */ + @Test + public void testBasicParsing() { + String formatString = "GT:B:C:D"; + String genotypeString = "0|1:2:3:4"; + String altAlleles[] = {"A","C","G","T"}; + char referenceBase = 'N'; + VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase); + Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType()); + Assert.assertEquals(referenceBase,rec.getReference()); + Assert.assertEquals("N",rec.getAllele().get(0)); + Assert.assertEquals("A",rec.getAllele().get(1)); + Map values = rec.getFields(); + Assert.assertEquals(4,values.size()); + Assert.assertTrue(values.get("B").equals("2")); + Assert.assertTrue(values.get("C").equals("3")); + Assert.assertTrue(values.get("D").equals("4")); + } + + /** + * test the parsing of a genotype field with missing parameters + */ + @Test + public void testMissingFieldParsing() { + String formatString = "GT:B:C:D"; + String genotypeString = "0|1:::4"; + String altAlleles[] = {"A","C","G","T"}; + char referenceBase = 'N'; + VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase); + Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType()); + Assert.assertEquals(referenceBase,rec.getReference()); + Assert.assertEquals("N",rec.getAllele().get(0)); + Assert.assertEquals("A",rec.getAllele().get(1)); + Map values = rec.getFields(); + Assert.assertEquals(4,values.size()); + Assert.assertTrue(values.get("B").equals("")); + Assert.assertTrue(values.get("C").equals("")); + Assert.assertTrue(values.get("D").equals("4")); + } + + /** + * test the parsing of a genotype field with different missing parameters + */ + @Test + public void testMissingAllFields() { + String formatString = "GT:B:C:D"; + String genotypeString = "0|1:::"; + String altAlleles[] = {"A","C","G","T"}; + char referenceBase = 'N'; + VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase); + Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType()); + Assert.assertEquals(referenceBase,rec.getReference()); + Assert.assertEquals("N",rec.getAllele().get(0)); + Assert.assertEquals("A",rec.getAllele().get(1)); + Map values = rec.getFields(); + Assert.assertEquals(4,values.size()); + Assert.assertTrue(values.get("B").equals("")); + Assert.assertTrue(values.get("C").equals("")); + Assert.assertTrue(values.get("D").equals("")); + } } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java index 417b4eae2..72b15f5dc 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java @@ -54,18 +54,27 @@ public class VCFWriterTest extends BaseTest { additionalColumns.add("FORMAT"); additionalColumns.add("extra1"); additionalColumns.add("extra2"); - // this should create a header that is valid - return new VCFHeader(headerFields, metaData, additionalColumns); } + /** + * create a fake VCF record + * @param header the VCF header + * @return a VCFRecord + */ private VCFRecord createVCFRecord(VCFHeader header) { - int totalVals = header.getColumnCount(); - List array = new ArrayList(); - for (int x = 0; x < totalVals; x++) { - array.add(String.valueOf(x)); + Map map = new HashMap(); + for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) + map.put(field,String.valueOf(1)); + List gt = new ArrayList(); + for (String name : header.getGenotypeSamples()) { + Map str = new HashMap(); + str.put("key","0|0"); + List alleles = new ArrayList(); + alleles.add("AAA"); + gt.add(new VCFGenotypeRecord(name,str,alleles, VCFGenotypeRecord.PHASE.PHASED,'A')); } - return new VCFRecord(header,array); + return new VCFRecord(header,map,"GT",gt); }