From fa8963522bc31ff35998f2c6c417f6d13db72212 Mon Sep 17 00:00:00 2001 From: asivache Date: Fri, 28 Jan 2011 21:44:33 +0000 Subject: [PATCH] Ignore header line if it happens to be passed to the codec again, instead of crashing on it git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5116 348d0f76-0448-11de-a6fe-93d51630548a --- .../playground/gatk/features/maf/MafCodec.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/features/maf/MafCodec.java b/java/src/org/broadinstitute/sting/playground/gatk/features/maf/MafCodec.java index 8b45c1e97..b01baef58 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/features/maf/MafCodec.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/features/maf/MafCodec.java @@ -102,20 +102,21 @@ public class MafCodec implements FeatureCodec { if (line.startsWith("#")) return null; // split the line - String[] tokens = line.split("\\t"); + String[] tokens = line.split("\\t",-1); if ( expectedTokenCount == -1 ) { // do this only when we receive the first line and do not know the number of columns yet // we have not seen a single line yet, let's initialize the number of fields from the first line: expectedTokenCount = tokens.length; + log.info("MAF: line has "+expectedTokenCount+" fields (columns)"); if ( expectedTokenCount == 9 ) { mafType = MAF_TYPE.LITE; log.info("MAF file appears to be MAF Lite"); } else { - if ( expectedTokenCount == 63 ) { + if ( expectedTokenCount >= 63 ) { mafType = MAF_TYPE.ANNOTATED; log.info("MAF file appears to be MAF-Annotated"); } else { - log.info("MAF file has "+expectedTokenCount +" columns, unknown file type"); + log.info("MAF file has "+expectedTokenCount +" columns in first line, unknown file type"); } } if ( line.contains("Chromosome") && line.contains("Start") && line.contains("Build")) { @@ -135,11 +136,15 @@ public class MafCodec implements FeatureCodec { } - if (tokens.length != expectedTokenCount) { - log.error("MAF line contains wrong number of columns"); + if (tokens.length < expectedTokenCount) { + log.error("MAF line contains too few columns ("+tokens.length+")"); return null; } + if (tokens.length > expectedTokenCount) { + log.warn("MAF line contains more columns than expected ("+tokens.length+"); extra columns discarded"); + } + if ( tokens[CHR_COL].equals("Chromosome") ) return null; // if someone uses this codec manually and feeds it the header line multiple times... // create a new feature from the line: int start = Integer.valueOf(tokens[START_COL]);