Permit empty fields in INFO and FORMAT structures - not fully tested yet but at least failing cases before now pass. Also, corrected a bug where in case we were reading 3.3 VCF's, or VCFs with no original allele encodings, we'd always print 2 bases per allele.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3698 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
15a6be5d50
commit
dc4715c9c6
|
|
@ -290,10 +290,22 @@ public class VCFWriter {
|
|||
}
|
||||
|
||||
} else {
|
||||
// no original Allele information: add one common base to all alleles (reference at this location)
|
||||
trailingBases = new String(refBases);
|
||||
numTrailingBases = 1;
|
||||
position--;
|
||||
// no original Allele information: see first if all alleles have a base encoding (ie no deletions)
|
||||
// if so, add one common base to all alleles (reference at this location)
|
||||
boolean hasBasesInAllAlleles = true;
|
||||
for ( Allele a : vc.getAlleles() ) {
|
||||
String alleleString = new String(a.getBases());
|
||||
if (alleleString.length()==0) {
|
||||
hasBasesInAllAlleles = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasBasesInAllAlleles) {
|
||||
trailingBases = new String(refBases);
|
||||
numTrailingBases = 1;
|
||||
position--;
|
||||
}
|
||||
}
|
||||
|
||||
for ( Allele a : vc.getAlleles() ) {
|
||||
|
|
@ -370,24 +382,33 @@ public class VCFWriter {
|
|||
} */
|
||||
|
||||
|
||||
|
||||
VCFFormatHeaderLine.FORMAT_TYPE formatType = typeUsedForFormatString.get(key);
|
||||
Object newVal;
|
||||
if (!val.getClass().equals(String.class))
|
||||
newVal = formatType.convert(String.valueOf(val));
|
||||
else
|
||||
newVal = val;
|
||||
if (typeUsedForFormatString.containsKey(key)) {
|
||||
VCFFormatHeaderLine.FORMAT_TYPE formatType = typeUsedForFormatString.get(key);
|
||||
if (!val.getClass().equals(String.class))
|
||||
newVal = formatType.convert(String.valueOf(val));
|
||||
else
|
||||
newVal = val;
|
||||
|
||||
if (numberUsedForFormatFields.get(key)>1 && val.equals(MISSING_GENOTYPE_FIELD)) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct new string with all fields.
|
||||
// for example for Number =2, string has to be ".,."
|
||||
StringBuilder v = new StringBuilder(MISSING_GENOTYPE_FIELD);
|
||||
for ( int i = 1; i < numberUsedForFormatFields.get(key); i++ ) {
|
||||
v.append(",");
|
||||
v.append(MISSING_GENOTYPE_FIELD);
|
||||
}
|
||||
newVal = v.toString();
|
||||
}
|
||||
else {
|
||||
newVal = val;
|
||||
}
|
||||
|
||||
if (numberUsedForFormatFields.containsKey(key)){
|
||||
int numInFormatField = numberUsedForFormatFields.get(key);
|
||||
if (numInFormatField>1 && val.equals(MISSING_GENOTYPE_FIELD)) {
|
||||
// If we have a missing field but multiple values are expected, we need to construct new string with all fields.
|
||||
// for example for Number =2, string has to be ".,."
|
||||
StringBuilder v = new StringBuilder(MISSING_GENOTYPE_FIELD);
|
||||
for ( int i = 1; i < numInFormatField; i++ ) {
|
||||
v.append(",");
|
||||
v.append(MISSING_GENOTYPE_FIELD);
|
||||
}
|
||||
newVal = v.toString();
|
||||
}
|
||||
}
|
||||
// assume that if key is absent, given string encoding suffices.
|
||||
String outputValue = formatVCFField(key, newVal);
|
||||
|
||||
|
||||
|
|
@ -531,11 +552,17 @@ public class VCFWriter {
|
|||
isFirst = false;
|
||||
else
|
||||
info.append(INFO_FIELD_SEPARATOR);
|
||||
|
||||
info.append(entry.getKey());
|
||||
|
||||
if ( entry.getValue() != null && !entry.getValue().equals("") ) {
|
||||
int numVals = 1;
|
||||
if (this.writingVCF40Format) {
|
||||
numVals = numberUsedForInfoFields.get(entry.getKey());
|
||||
String key = entry.getKey();
|
||||
if (numberUsedForInfoFields.containsKey(key)) {
|
||||
numVals = numberUsedForInfoFields.get(key);
|
||||
}
|
||||
|
||||
// take care of unbounded encoding
|
||||
if (numVals == VCFInfoHeaderLine.UNBOUNDED)
|
||||
numVals = 1;
|
||||
|
|
|
|||
Loading…
Reference in New Issue