From 9a658e6b181fbf590937a750a2f8f822aee2b6e0 Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 7 Jan 2010 17:51:41 +0000 Subject: [PATCH] -Fixed VCF header line bug -Added useful trim() method for Strings for characters other than whitespace git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2538 348d0f76-0448-11de-a6fe-93d51630548a --- .../org/broadinstitute/sting/utils/Utils.java | 16 ++++++++++++++++ .../utils/genotype/vcf/VCFFilterHeaderLine.java | 6 ++++-- .../utils/genotype/vcf/VCFFormatHeaderLine.java | 6 ++++-- .../utils/genotype/vcf/VCFInfoHeaderLine.java | 6 ++++-- .../sting/utils/genotype/vcf/VCFReader.java | 4 ++-- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java index e0f95c22a..b89d06f92 100755 --- a/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/java/src/org/broadinstitute/sting/utils/Utils.java @@ -429,6 +429,22 @@ public class Utils { return count; } + // trim a string for the given character (i.e. not just whitespace) + public static String trim(String str, char ch) { + char[] array = str.toCharArray(); + + + int start = 0; + while ( start < array.length && array[start] == ch ) + start++; + + int end = array.length - 1; + while ( end > start && array[end] == ch ) + end--; + + return str.substring(start, end+1); + } + public static byte listMaxByte(List quals) { if (quals.size() == 0) return 0; byte m = quals.get(0); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFilterHeaderLine.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFilterHeaderLine.java index 7c6dcce4f..c50d8f824 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFilterHeaderLine.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFilterHeaderLine.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broadinstitute.sting.utils.Utils; + /** * @author ebanks @@ -38,10 +40,10 @@ public class VCFFilterHeaderLine extends VCFHeaderLine { throw new IllegalArgumentException("There are too few values in the VCF FILTER header line: " + line); mName = pieces[0]; - mDescription = pieces[1]; + mDescription = Utils.trim(pieces[1], '"'); // just in case there were some commas in the description for (int i = 1; i < pieces.length; i++) - mDescription += "," + pieces[i]; + mDescription += "," + Utils.trim(pieces[i], '"'); } protected String makeStringRep() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFormatHeaderLine.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFormatHeaderLine.java index 23312dcd2..adfc85896 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFormatHeaderLine.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFormatHeaderLine.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broadinstitute.sting.utils.Utils; + /** * @author ebanks @@ -51,10 +53,10 @@ public class VCFFormatHeaderLine extends VCFHeaderLine { mName = pieces[0]; mCount = Integer.valueOf(pieces[1]); mType = INFO_TYPE.valueOf(pieces[2]); - mDescription = pieces[3]; + mDescription = Utils.trim(pieces[3], '"'); // just in case there were some commas in the description for (int i = 4; i < pieces.length; i++) - mDescription += "," + pieces[i]; + mDescription += "," + Utils.trim(pieces[i], '"'); } protected String makeStringRep() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFInfoHeaderLine.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFInfoHeaderLine.java index e855c4487..2e05cd0b5 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFInfoHeaderLine.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFInfoHeaderLine.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broadinstitute.sting.utils.Utils; + /** * @author ebanks @@ -51,10 +53,10 @@ public class VCFInfoHeaderLine extends VCFHeaderLine { mName = pieces[0]; mCount = Integer.valueOf(pieces[1]); mType = INFO_TYPE.valueOf(pieces[2]); - mDescription = pieces[3]; + mDescription = Utils.trim(pieces[3], '"'); // just in case there were some commas in the description for (int i = 4; i < pieces.length; i++) - mDescription += "," + pieces[i]; + mDescription += "," + Utils.trim(pieces[i], '"'); } protected String makeStringRep() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index 761c7960d..0ab351ba6 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -172,9 +172,9 @@ public class VCFReader implements Iterator, Iterable { if ( str.startsWith("##INFO=") ) metaData.add(new VCFInfoHeaderLine(str.substring(7))); else if ( str.startsWith("##FILTER=") ) - metaData.add(new VCFFilterHeaderLine(str.substring(7))); + metaData.add(new VCFFilterHeaderLine(str.substring(9))); else if ( str.startsWith("##FORMAT=") ) - metaData.add(new VCFFormatHeaderLine(str.substring(7))); + metaData.add(new VCFFormatHeaderLine(str.substring(9))); else { int equals = str.indexOf("="); if ( equals != -1 )