From 61c07c6f9084ebf9e9361df1f43da7f55b06eb8b Mon Sep 17 00:00:00 2001 From: delangel Date: Thu, 1 Jul 2010 20:17:03 +0000 Subject: [PATCH] Fixes for missing key values that can create null pointer exceptions when reading from 3.3-generated variant contexts. Also, chop missing genotype fields correctly from right to left git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3706 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/genotype/vcf/VCFWriter.java | 56 +++++++++++++++---- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index 1730f0c22..9e824cce6 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -176,6 +176,9 @@ public class VCFWriter { if ( mHeader == null ) throw new IllegalStateException("The VCF Header must be written before records can be added"); + if (!writingVCF40Format) + throw new IllegalStateException("VCFWriter can only support add() method with a variant context if writing VCF4.0. Use VCFWriter(output, true) when constructing object"); + String vcfString = toStringEncoding(vc, mHeader, refBases); try { mWriter.write(vcfString + "\n"); @@ -357,11 +360,16 @@ public class VCFWriter { continue; - Object val = g.getAttribute(key); + Object val; + if (g.hasAttribute(key)) + val = g.getAttribute(key); + else + val = new String(MISSING_GENOTYPE_FIELD); + // some exceptions if ( key.equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) ) { if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 ) - val = VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY; + val = MISSING_GENOTYPE_FIELD; else { // TODO - check whether we need to saturate quality to 99 as in VCF3.3 coder. For now allow unbounded values // val = Math.min(g.getPhredScaledQual(), VCFGenotypeRecord.MAX_QUAL_VALUE); @@ -373,13 +381,9 @@ public class VCFWriter { if ( pileup != null ) val = pileup.size(); } else if ( key.equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) ) { - val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : PASSES_FILTERS; + // VCF 4.0 key for no filters is "." + val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : UNFILTERED; } - // TODO - do I need this? - /*else if (val == null) { - // generic case when there's no value associated with entry: - val = MISSING_GENOTYPE_FIELD; - } */ Object newVal; @@ -486,6 +490,8 @@ public class VCFWriter { * * @param builder the string builder * @param header the header object + * @param genotypeFormatString Genotype formatting string + * @param vcfAltAlleles alternate alleles at this site */ private void addGenotypeData(StringBuilder builder, VCFHeader header, String genotypeFormatString, ListvcfAltAlleles) { @@ -509,7 +515,21 @@ public class VCFWriter { tempStr.append(FIELD_SEPARATOR); if ( gMap.containsKey(genotype) ) { VCFGenotypeRecord rec = gMap.get(genotype); - tempStr.append(rec.toStringEncoding(vcfAltAlleles, genotypeFormatStrings, true)); + String genotypeString = rec.toStringEncoding(vcfAltAlleles, genotypeFormatStrings, true); + + // Override default produced genotype string when there are trailing + String[] genotypeStrings = genotypeString.split(":"); + int lastUsedPosition = 0; + for (int k=genotypeStrings.length-1; k >=1; k--) { + // see if string represents an empty field. If not, break. + if (!isEmptyField(genotypeStrings[k]) ) { + lastUsedPosition = k; + break; + } + } + // now reconstruct genotypeString from 0 to lastUsedPosition + genotypeString = Utils.join(":",genotypeStrings, 0,lastUsedPosition+1); + tempStr.append(genotypeString); gMap.remove(genotype); } else { tempStr.append(VCFGenotypeRecord.stringEncodingForEmptyGenotype(genotypeFormatStrings, true)); @@ -523,6 +543,21 @@ public class VCFWriter { builder.append(tempStr); } + + boolean isEmptyField(String field) { + // check if given genotype field is empty, ie either ".", or ".,.", or ".,.,.", etc. + String[] fields = field.split(","); + boolean isEmpty = true; + for (int k=0; k < fields.length; k++) { + if (!fields[k].matches(".")) { + isEmpty = false; + break; + } + + } + return isEmpty; + + } /** * create a genotype mapping from a list and their sample names * @@ -564,7 +599,8 @@ public class VCFWriter { } // take care of unbounded encoding - if (numVals == VCFInfoHeaderLine.UNBOUNDED) + // TODO - workaround for "-1" in original INFO header structure + if (numVals == VCFInfoHeaderLine.UNBOUNDED || numVals < 0) numVals = 1; }