diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index e6a3399a5..b1823531a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -15,6 +15,9 @@ import java.util.Map; * so they were broken off into their own class */ public class VCFGenotypeRecord { + // the symbol for a empty genotype + public static final String EMPTY_GENOTYPE = "."; + // what kind of phasing this genotype has public enum PHASE { UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 6e1251dae..117f22b64 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -54,6 +54,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { // setup the header fields hInfo.put("format", "VCRv3.2"); hInfo.put("source", mSource); + hInfo.put("reference", mReferenceName); // setup the sample names mHeader = new VCFHeader(hInfo, sampleNames); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java index 798dfb54c..1cd45dc4a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java @@ -74,7 +74,7 @@ class VCFParameters { } public String getFormatString() { - return Utils.join(";", formatList); + return Utils.join(VCFRecord.FORMAT_FIELD_SEPERATOR, formatList); } public List getGenotypesRecords() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index 08b23d269..33c56e05d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -192,7 +192,8 @@ public class VCFReader implements Iterator, Iterable { List genotypeRecords = new ArrayList(); index++; for (String str : mHeader.getGenotypeSamples()) { - genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0))); + if (!tokens[index].equalsIgnoreCase(VCFGenotypeRecord.EMPTY_GENOTYPE)) + genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0))); index++; } return new VCFRecord(values, mFormatString, genotypeRecords); @@ -217,6 +218,7 @@ public class VCFReader implements Iterator, Iterable { VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN; List bases = new ArrayList(); String keyStrings[] = formatString.split(":"); + for (String key : keyStrings) { String parse; int nextDivider; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 0ed8c1ca1..79ea208d2 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -7,7 +7,14 @@ import java.util.*; /** the basic VCF record type */ public class VCFRecord { + // commonly used strings that are in the standard + public static final String FORMAT_FIELD_SEPERATOR = ":"; + public static final String GENOTYPE_FIELD_SEPERATOR = ":"; public static final String FIELD_SEPERATOR = "\t"; + public static final String FILTER_CODE_SEPERATOR = ";"; + public static final String INFO_FIELD_SEPERATOR = ";"; + public static final String EMPTY_INFO_FIELD = "."; + public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f"; // the reference base private char mReferenceBase; // our contig @@ -146,10 +153,7 @@ public class VCFRecord { */ public boolean hasGenotypeData() { - if (mGenotypeFields.size() < 1) { - return false; - } - return true; + return (mGenotypeFields.size() > 0); } /** @return the string for the chromosome that this VCF record is associated with */ @@ -321,14 +325,14 @@ public class VCFRecord { String alts = ""; for (String str : this.getAlternateAlleles()) alts += str + ","; builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR); - builder.append(String.format("%.2f",getQual()) + FIELD_SEPERATOR); - builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR); + builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING,getQual()) + FIELD_SEPERATOR); + builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes()) + FIELD_SEPERATOR); String info = ""; for (String str : this.getInfoValues().keySet()) { - if (str.equals(".")) - info = "."; + if (str.equals(EMPTY_INFO_FIELD)) + info = EMPTY_INFO_FIELD; else - info += str + "=" + getInfoValues().get(str) + ";"; + info += str + "=" + getInfoValues().get(str) + INFO_FIELD_SEPERATOR; } if (info.length() > 1) builder.append(info.substring(0, info.length() - 1)); @@ -363,12 +367,12 @@ public class VCFRecord { builder.append(rec.toGenotypeString(this.mAlts)); for (String s : rec.getFields().keySet()) { if (rec.getFields().get(s).equals("")) continue; - builder.append(":"); + builder.append(GENOTYPE_FIELD_SEPERATOR); builder.append(rec.getFields().get(s)); } gMap.remove(genotype); } else { - builder.append("."); + builder.append(VCFGenotypeRecord.EMPTY_GENOTYPE); } } if (gMap.size() != 0) {