From e3fb4d5c70048b54285b52180a79ac901bb3907f Mon Sep 17 00:00:00 2001 From: delangel Date: Tue, 29 Jun 2010 20:57:09 +0000 Subject: [PATCH] Intermediate checkin, just to fix null pointer exception that happened when merging implementation with latest VCF4 decoder - field ORIGINAL_ALLELE_LIST in vc shouldn't be written in infoFields structure since this won't be output to file and there is no legal structure under this key. Base encoding for complex events is still brittle and most probably still has issues, fixes upcoming. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3671 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/genotype/vcf/VCFWriter.java | 56 ++++++++++++++++++- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index f8e35008a..41c2c0b20 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -246,21 +246,68 @@ public class VCFWriter { alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE)); // convenience for lookup List vcfAltAlleles = new ArrayList(); + int refIndex=0, numTrailingBases = 0, numPaddingBases = 0; + String paddingBases = new String(""); + String trailingBases = new String(""); + + ArrayList originalAlleles = (ArrayList)vc.getAttribute("ORIGINAL_ALLELE_LIST"); + boolean isComplexEvent = false; if (referenceBases.length()>1) { // complex event: by VCF 4.0, reference from previous position is included. position--; isComplexEvent = true; } + + // search for reference allele and find trailing and padding at the end. + if (originalAlleles != null) { + Allele originalReferenceAllele = null; + for (refIndex=0; refIndex < originalAlleles.size(); refIndex++) { + originalReferenceAllele = originalAlleles.get(refIndex); + if (originalReferenceAllele.isReference()) + break; + } + + if (originalReferenceAllele == null) + throw new IllegalStateException("At least one Allele must be reference"); + + for ( Allele a : vc.getAlleles() ) { + if (a.isNonReference()) + continue; + + String refString = new String(a.getBases()); + String originalRef = new String(originalReferenceAllele.getBases()); + numTrailingBases = originalRef.indexOf(refString); + numPaddingBases = originalRef.length()-refString.length()-numTrailingBases; + if (numTrailingBases > 0) { + + position -= numTrailingBases; + trailingBases = originalRef.substring(0,numTrailingBases); + } + if (numPaddingBases > 0) + paddingBases = originalRef.substring(refString.length(),originalRef.length()-1); + + } + } else { + // no original Allele information: + if (isComplexEvent) + position--; + } + for ( Allele a : vc.getAlleles() ) { VCFGenotypeEncoding encoding; String alleleString = new String(a.getBases()); - if (isComplexEvent) {// vc.getType() == VariantContext.Type.MIXED ) { + if ( isComplexEvent ) { // Mixed variants (e.g. microsatellites) have the reference before the event included - String s = new String(refBases)+alleleString; + String s; + if (numTrailingBases > 0) { + s = trailingBases+alleleString+paddingBases; + } else { + s = new String(refBases)+alleleString; + } encoding = new VCFGenotypeEncoding(s, true); if (a.isReference()) referenceString = s; @@ -378,7 +425,10 @@ public class VCFWriter { if ( key.equals("ID") ) continue; - //TODO - fixme + // Original alleles are not for reporting but only for internal bookkeeping + if (key.equals("ORIGINAL_ALLELE_LIST")) + continue; + String outputValue = formatVCFField(key, elt.getValue()); if ( outputValue != null ) infoFields.put(key, outputValue);