Fixes for missing key values that can create null pointer exceptions when reading from 3.3-generated variant contexts. Also, chop missing genotype fields correctly from right to left
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3706 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
87470d5fe5
commit
61c07c6f90
|
|
@ -176,6 +176,9 @@ public class VCFWriter {
|
||||||
if ( mHeader == null )
|
if ( mHeader == null )
|
||||||
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
||||||
|
|
||||||
|
if (!writingVCF40Format)
|
||||||
|
throw new IllegalStateException("VCFWriter can only support add() method with a variant context if writing VCF4.0. Use VCFWriter(output, true) when constructing object");
|
||||||
|
|
||||||
String vcfString = toStringEncoding(vc, mHeader, refBases);
|
String vcfString = toStringEncoding(vc, mHeader, refBases);
|
||||||
try {
|
try {
|
||||||
mWriter.write(vcfString + "\n");
|
mWriter.write(vcfString + "\n");
|
||||||
|
|
@ -357,11 +360,16 @@ public class VCFWriter {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
||||||
Object val = g.getAttribute(key);
|
Object val;
|
||||||
|
if (g.hasAttribute(key))
|
||||||
|
val = g.getAttribute(key);
|
||||||
|
else
|
||||||
|
val = new String(MISSING_GENOTYPE_FIELD);
|
||||||
|
|
||||||
// some exceptions
|
// some exceptions
|
||||||
if ( key.equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) ) {
|
if ( key.equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) ) {
|
||||||
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
|
if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 )
|
||||||
val = VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY;
|
val = MISSING_GENOTYPE_FIELD;
|
||||||
else {
|
else {
|
||||||
// TODO - check whether we need to saturate quality to 99 as in VCF3.3 coder. For now allow unbounded values
|
// TODO - check whether we need to saturate quality to 99 as in VCF3.3 coder. For now allow unbounded values
|
||||||
// val = Math.min(g.getPhredScaledQual(), VCFGenotypeRecord.MAX_QUAL_VALUE);
|
// val = Math.min(g.getPhredScaledQual(), VCFGenotypeRecord.MAX_QUAL_VALUE);
|
||||||
|
|
@ -373,13 +381,9 @@ public class VCFWriter {
|
||||||
if ( pileup != null )
|
if ( pileup != null )
|
||||||
val = pileup.size();
|
val = pileup.size();
|
||||||
} else if ( key.equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) ) {
|
} else if ( key.equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) ) {
|
||||||
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : PASSES_FILTERS;
|
// VCF 4.0 key for no filters is "."
|
||||||
|
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : UNFILTERED;
|
||||||
}
|
}
|
||||||
// TODO - do I need this?
|
|
||||||
/*else if (val == null) {
|
|
||||||
// generic case when there's no value associated with entry:
|
|
||||||
val = MISSING_GENOTYPE_FIELD;
|
|
||||||
} */
|
|
||||||
|
|
||||||
|
|
||||||
Object newVal;
|
Object newVal;
|
||||||
|
|
@ -486,6 +490,8 @@ public class VCFWriter {
|
||||||
*
|
*
|
||||||
* @param builder the string builder
|
* @param builder the string builder
|
||||||
* @param header the header object
|
* @param header the header object
|
||||||
|
* @param genotypeFormatString Genotype formatting string
|
||||||
|
* @param vcfAltAlleles alternate alleles at this site
|
||||||
*/
|
*/
|
||||||
private void addGenotypeData(StringBuilder builder, VCFHeader header,
|
private void addGenotypeData(StringBuilder builder, VCFHeader header,
|
||||||
String genotypeFormatString, List<VCFGenotypeEncoding>vcfAltAlleles) {
|
String genotypeFormatString, List<VCFGenotypeEncoding>vcfAltAlleles) {
|
||||||
|
|
@ -509,7 +515,21 @@ public class VCFWriter {
|
||||||
tempStr.append(FIELD_SEPARATOR);
|
tempStr.append(FIELD_SEPARATOR);
|
||||||
if ( gMap.containsKey(genotype) ) {
|
if ( gMap.containsKey(genotype) ) {
|
||||||
VCFGenotypeRecord rec = gMap.get(genotype);
|
VCFGenotypeRecord rec = gMap.get(genotype);
|
||||||
tempStr.append(rec.toStringEncoding(vcfAltAlleles, genotypeFormatStrings, true));
|
String genotypeString = rec.toStringEncoding(vcfAltAlleles, genotypeFormatStrings, true);
|
||||||
|
|
||||||
|
// Override default produced genotype string when there are trailing
|
||||||
|
String[] genotypeStrings = genotypeString.split(":");
|
||||||
|
int lastUsedPosition = 0;
|
||||||
|
for (int k=genotypeStrings.length-1; k >=1; k--) {
|
||||||
|
// see if string represents an empty field. If not, break.
|
||||||
|
if (!isEmptyField(genotypeStrings[k]) ) {
|
||||||
|
lastUsedPosition = k;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// now reconstruct genotypeString from 0 to lastUsedPosition
|
||||||
|
genotypeString = Utils.join(":",genotypeStrings, 0,lastUsedPosition+1);
|
||||||
|
tempStr.append(genotypeString);
|
||||||
gMap.remove(genotype);
|
gMap.remove(genotype);
|
||||||
} else {
|
} else {
|
||||||
tempStr.append(VCFGenotypeRecord.stringEncodingForEmptyGenotype(genotypeFormatStrings, true));
|
tempStr.append(VCFGenotypeRecord.stringEncodingForEmptyGenotype(genotypeFormatStrings, true));
|
||||||
|
|
@ -523,6 +543,21 @@ public class VCFWriter {
|
||||||
|
|
||||||
builder.append(tempStr);
|
builder.append(tempStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean isEmptyField(String field) {
|
||||||
|
// check if given genotype field is empty, ie either ".", or ".,.", or ".,.,.", etc.
|
||||||
|
String[] fields = field.split(",");
|
||||||
|
boolean isEmpty = true;
|
||||||
|
for (int k=0; k < fields.length; k++) {
|
||||||
|
if (!fields[k].matches(".")) {
|
||||||
|
isEmpty = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return isEmpty;
|
||||||
|
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* create a genotype mapping from a list and their sample names
|
* create a genotype mapping from a list and their sample names
|
||||||
*
|
*
|
||||||
|
|
@ -564,7 +599,8 @@ public class VCFWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
// take care of unbounded encoding
|
// take care of unbounded encoding
|
||||||
if (numVals == VCFInfoHeaderLine.UNBOUNDED)
|
// TODO - workaround for "-1" in original INFO header structure
|
||||||
|
if (numVals == VCFInfoHeaderLine.UNBOUNDED || numVals < 0)
|
||||||
numVals = 1;
|
numVals = 1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue