clean-up and fixes to the VCF input

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1849 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-15 04:53:59 +00:00
parent a32470cea1
commit a9094c835c
5 changed files with 23 additions and 13 deletions

View File

@ -15,6 +15,9 @@ import java.util.Map;
* so they were broken off into their own class
*/
public class VCFGenotypeRecord {
// the symbol for a empty genotype
public static final String EMPTY_GENOTYPE = ".";
// what kind of phasing this genotype has
public enum PHASE {
UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN

View File

@ -54,6 +54,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
// setup the header fields
hInfo.put("format", "VCRv3.2");
hInfo.put("source", mSource);
hInfo.put("reference", mReferenceName);
// setup the sample names
mHeader = new VCFHeader(hInfo, sampleNames);

View File

@ -74,7 +74,7 @@ class VCFParameters {
}
public String getFormatString() {
return Utils.join(";", formatList);
return Utils.join(VCFRecord.FORMAT_FIELD_SEPERATOR, formatList);
}
public List<VCFGenotypeRecord> getGenotypesRecords() {

View File

@ -192,7 +192,8 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
index++;
for (String str : mHeader.getGenotypeSamples()) {
genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
if (!tokens[index].equalsIgnoreCase(VCFGenotypeRecord.EMPTY_GENOTYPE))
genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
index++;
}
return new VCFRecord(values, mFormatString, genotypeRecords);
@ -217,6 +218,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN;
List<String> bases = new ArrayList<String>();
String keyStrings[] = formatString.split(":");
for (String key : keyStrings) {
String parse;
int nextDivider;

View File

@ -7,7 +7,14 @@ import java.util.*;
/** the basic VCF record type */
public class VCFRecord {
// commonly used strings that are in the standard
public static final String FORMAT_FIELD_SEPERATOR = ":";
public static final String GENOTYPE_FIELD_SEPERATOR = ":";
public static final String FIELD_SEPERATOR = "\t";
public static final String FILTER_CODE_SEPERATOR = ";";
public static final String INFO_FIELD_SEPERATOR = ";";
public static final String EMPTY_INFO_FIELD = ".";
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
// the reference base
private char mReferenceBase;
// our contig
@ -146,10 +153,7 @@ public class VCFRecord {
*/
public boolean hasGenotypeData() {
if (mGenotypeFields.size() < 1) {
return false;
}
return true;
return (mGenotypeFields.size() > 0);
}
/** @return the string for the chromosome that this VCF record is associated with */
@ -321,14 +325,14 @@ public class VCFRecord {
String alts = "";
for (String str : this.getAlternateAlleles()) alts += str + ",";
builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR);
builder.append(String.format("%.2f",getQual()) + FIELD_SEPERATOR);
builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR);
builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING,getQual()) + FIELD_SEPERATOR);
builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes()) + FIELD_SEPERATOR);
String info = "";
for (String str : this.getInfoValues().keySet()) {
if (str.equals("."))
info = ".";
if (str.equals(EMPTY_INFO_FIELD))
info = EMPTY_INFO_FIELD;
else
info += str + "=" + getInfoValues().get(str) + ";";
info += str + "=" + getInfoValues().get(str) + INFO_FIELD_SEPERATOR;
}
if (info.length() > 1) builder.append(info.substring(0, info.length() - 1));
@ -363,12 +367,12 @@ public class VCFRecord {
builder.append(rec.toGenotypeString(this.mAlts));
for (String s : rec.getFields().keySet()) {
if (rec.getFields().get(s).equals("")) continue;
builder.append(":");
builder.append(GENOTYPE_FIELD_SEPERATOR);
builder.append(rec.getFields().get(s));
}
gMap.remove(genotype);
} else {
builder.append(".");
builder.append(VCFGenotypeRecord.EMPTY_GENOTYPE);
}
}
if (gMap.size() != 0) {