clean-up and fixes to the VCF input

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1849 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-15 04:53:59 +00:00
parent a32470cea1
commit a9094c835c
5 changed files with 23 additions and 13 deletions

View File

@ -15,6 +15,9 @@ import java.util.Map;
* so they were broken off into their own class * so they were broken off into their own class
*/ */
public class VCFGenotypeRecord { public class VCFGenotypeRecord {
// the symbol for a empty genotype
public static final String EMPTY_GENOTYPE = ".";
// what kind of phasing this genotype has // what kind of phasing this genotype has
public enum PHASE { public enum PHASE {
UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN

View File

@ -54,6 +54,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
// setup the header fields // setup the header fields
hInfo.put("format", "VCRv3.2"); hInfo.put("format", "VCRv3.2");
hInfo.put("source", mSource); hInfo.put("source", mSource);
hInfo.put("reference", mReferenceName);
// setup the sample names // setup the sample names
mHeader = new VCFHeader(hInfo, sampleNames); mHeader = new VCFHeader(hInfo, sampleNames);

View File

@ -74,7 +74,7 @@ class VCFParameters {
} }
public String getFormatString() { public String getFormatString() {
return Utils.join(";", formatList); return Utils.join(VCFRecord.FORMAT_FIELD_SEPERATOR, formatList);
} }
public List<VCFGenotypeRecord> getGenotypesRecords() { public List<VCFGenotypeRecord> getGenotypesRecords() {

View File

@ -192,7 +192,8 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>(); List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
index++; index++;
for (String str : mHeader.getGenotypeSamples()) { for (String str : mHeader.getGenotypeSamples()) {
genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0))); if (!tokens[index].equalsIgnoreCase(VCFGenotypeRecord.EMPTY_GENOTYPE))
genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
index++; index++;
} }
return new VCFRecord(values, mFormatString, genotypeRecords); return new VCFRecord(values, mFormatString, genotypeRecords);
@ -217,6 +218,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN; VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN;
List<String> bases = new ArrayList<String>(); List<String> bases = new ArrayList<String>();
String keyStrings[] = formatString.split(":"); String keyStrings[] = formatString.split(":");
for (String key : keyStrings) { for (String key : keyStrings) {
String parse; String parse;
int nextDivider; int nextDivider;

View File

@ -7,7 +7,14 @@ import java.util.*;
/** the basic VCF record type */ /** the basic VCF record type */
public class VCFRecord { public class VCFRecord {
// commonly used strings that are in the standard
public static final String FORMAT_FIELD_SEPERATOR = ":";
public static final String GENOTYPE_FIELD_SEPERATOR = ":";
public static final String FIELD_SEPERATOR = "\t"; public static final String FIELD_SEPERATOR = "\t";
public static final String FILTER_CODE_SEPERATOR = ";";
public static final String INFO_FIELD_SEPERATOR = ";";
public static final String EMPTY_INFO_FIELD = ".";
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
// the reference base // the reference base
private char mReferenceBase; private char mReferenceBase;
// our contig // our contig
@ -146,10 +153,7 @@ public class VCFRecord {
*/ */
public boolean hasGenotypeData() { public boolean hasGenotypeData() {
if (mGenotypeFields.size() < 1) { return (mGenotypeFields.size() > 0);
return false;
}
return true;
} }
/** @return the string for the chromosome that this VCF record is associated with */ /** @return the string for the chromosome that this VCF record is associated with */
@ -321,14 +325,14 @@ public class VCFRecord {
String alts = ""; String alts = "";
for (String str : this.getAlternateAlleles()) alts += str + ","; for (String str : this.getAlternateAlleles()) alts += str + ",";
builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR); builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR);
builder.append(String.format("%.2f",getQual()) + FIELD_SEPERATOR); builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING,getQual()) + FIELD_SEPERATOR);
builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR); builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes()) + FIELD_SEPERATOR);
String info = ""; String info = "";
for (String str : this.getInfoValues().keySet()) { for (String str : this.getInfoValues().keySet()) {
if (str.equals(".")) if (str.equals(EMPTY_INFO_FIELD))
info = "."; info = EMPTY_INFO_FIELD;
else else
info += str + "=" + getInfoValues().get(str) + ";"; info += str + "=" + getInfoValues().get(str) + INFO_FIELD_SEPERATOR;
} }
if (info.length() > 1) builder.append(info.substring(0, info.length() - 1)); if (info.length() > 1) builder.append(info.substring(0, info.length() - 1));
@ -363,12 +367,12 @@ public class VCFRecord {
builder.append(rec.toGenotypeString(this.mAlts)); builder.append(rec.toGenotypeString(this.mAlts));
for (String s : rec.getFields().keySet()) { for (String s : rec.getFields().keySet()) {
if (rec.getFields().get(s).equals("")) continue; if (rec.getFields().get(s).equals("")) continue;
builder.append(":"); builder.append(GENOTYPE_FIELD_SEPERATOR);
builder.append(rec.getFields().get(s)); builder.append(rec.getFields().get(s));
} }
gMap.remove(genotype); gMap.remove(genotype);
} else { } else {
builder.append("."); builder.append(VCFGenotypeRecord.EMPTY_GENOTYPE);
} }
} }
if (gMap.size() != 0) { if (gMap.size() != 0) {