clean-up and fixes to the VCF input
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1849 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a32470cea1
commit
a9094c835c
|
|
@ -15,6 +15,9 @@ import java.util.Map;
|
|||
* so they were broken off into their own class
|
||||
*/
|
||||
public class VCFGenotypeRecord {
|
||||
// the symbol for a empty genotype
|
||||
public static final String EMPTY_GENOTYPE = ".";
|
||||
|
||||
// what kind of phasing this genotype has
|
||||
public enum PHASE {
|
||||
UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
// setup the header fields
|
||||
hInfo.put("format", "VCRv3.2");
|
||||
hInfo.put("source", mSource);
|
||||
hInfo.put("reference", mReferenceName);
|
||||
|
||||
// setup the sample names
|
||||
mHeader = new VCFHeader(hInfo, sampleNames);
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ class VCFParameters {
|
|||
}
|
||||
|
||||
public String getFormatString() {
|
||||
return Utils.join(";", formatList);
|
||||
return Utils.join(VCFRecord.FORMAT_FIELD_SEPERATOR, formatList);
|
||||
}
|
||||
|
||||
public List<VCFGenotypeRecord> getGenotypesRecords() {
|
||||
|
|
|
|||
|
|
@ -192,7 +192,8 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
||||
index++;
|
||||
for (String str : mHeader.getGenotypeSamples()) {
|
||||
genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
|
||||
if (!tokens[index].equalsIgnoreCase(VCFGenotypeRecord.EMPTY_GENOTYPE))
|
||||
genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
|
||||
index++;
|
||||
}
|
||||
return new VCFRecord(values, mFormatString, genotypeRecords);
|
||||
|
|
@ -217,6 +218,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN;
|
||||
List<String> bases = new ArrayList<String>();
|
||||
String keyStrings[] = formatString.split(":");
|
||||
|
||||
for (String key : keyStrings) {
|
||||
String parse;
|
||||
int nextDivider;
|
||||
|
|
|
|||
|
|
@ -7,7 +7,14 @@ import java.util.*;
|
|||
|
||||
/** the basic VCF record type */
|
||||
public class VCFRecord {
|
||||
// commonly used strings that are in the standard
|
||||
public static final String FORMAT_FIELD_SEPERATOR = ":";
|
||||
public static final String GENOTYPE_FIELD_SEPERATOR = ":";
|
||||
public static final String FIELD_SEPERATOR = "\t";
|
||||
public static final String FILTER_CODE_SEPERATOR = ";";
|
||||
public static final String INFO_FIELD_SEPERATOR = ";";
|
||||
public static final String EMPTY_INFO_FIELD = ".";
|
||||
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
|
||||
// the reference base
|
||||
private char mReferenceBase;
|
||||
// our contig
|
||||
|
|
@ -146,10 +153,7 @@ public class VCFRecord {
|
|||
*/
|
||||
|
||||
public boolean hasGenotypeData() {
|
||||
if (mGenotypeFields.size() < 1) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return (mGenotypeFields.size() > 0);
|
||||
}
|
||||
|
||||
/** @return the string for the chromosome that this VCF record is associated with */
|
||||
|
|
@ -321,14 +325,14 @@ public class VCFRecord {
|
|||
String alts = "";
|
||||
for (String str : this.getAlternateAlleles()) alts += str + ",";
|
||||
builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR);
|
||||
builder.append(String.format("%.2f",getQual()) + FIELD_SEPERATOR);
|
||||
builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR);
|
||||
builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING,getQual()) + FIELD_SEPERATOR);
|
||||
builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes()) + FIELD_SEPERATOR);
|
||||
String info = "";
|
||||
for (String str : this.getInfoValues().keySet()) {
|
||||
if (str.equals("."))
|
||||
info = ".";
|
||||
if (str.equals(EMPTY_INFO_FIELD))
|
||||
info = EMPTY_INFO_FIELD;
|
||||
else
|
||||
info += str + "=" + getInfoValues().get(str) + ";";
|
||||
info += str + "=" + getInfoValues().get(str) + INFO_FIELD_SEPERATOR;
|
||||
}
|
||||
|
||||
if (info.length() > 1) builder.append(info.substring(0, info.length() - 1));
|
||||
|
|
@ -363,12 +367,12 @@ public class VCFRecord {
|
|||
builder.append(rec.toGenotypeString(this.mAlts));
|
||||
for (String s : rec.getFields().keySet()) {
|
||||
if (rec.getFields().get(s).equals("")) continue;
|
||||
builder.append(":");
|
||||
builder.append(GENOTYPE_FIELD_SEPERATOR);
|
||||
builder.append(rec.getFields().get(s));
|
||||
}
|
||||
gMap.remove(genotype);
|
||||
} else {
|
||||
builder.append(".");
|
||||
builder.append(VCFGenotypeRecord.EMPTY_GENOTYPE);
|
||||
}
|
||||
}
|
||||
if (gMap.size() != 0) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue