Sped up parseGenomeLoc(..) by replacing regexp with String.indexOf(..)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3522 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
20167fd411
commit
adc4c4e577
|
|
@ -25,25 +25,24 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import net.sf.picard.util.IntervalList;
|
|
||||||
import net.sf.picard.util.Interval;
|
|
||||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
|
||||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
|
||||||
import org.broadinstitute.sting.utils.bed.BedParser;
|
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import net.sf.picard.util.Interval;
|
||||||
|
import net.sf.picard.util.IntervalList;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
|
import org.broadinstitute.sting.utils.bed.BedParser;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -55,7 +54,7 @@ import java.util.regex.Pattern;
|
||||||
public class GenomeLocParser {
|
public class GenomeLocParser {
|
||||||
private static Logger logger = Logger.getLogger(GenomeLocParser.class);
|
private static Logger logger = Logger.getLogger(GenomeLocParser.class);
|
||||||
|
|
||||||
private static final Pattern mPattern = Pattern.compile("([\\p{Print}&&[^:]]+):*([\\d,]+)?([\\+-])?([\\d,]+)?$"); // matches case 3
|
//private static final Pattern mPattern = Pattern.compile("([\\p{Print}&&[^:]]+):*([\\d,]+)?([\\+-])?([\\d,]+)?$"); // matches case 3
|
||||||
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -171,34 +170,35 @@ public class GenomeLocParser {
|
||||||
String contig = null;
|
String contig = null;
|
||||||
long start = 1;
|
long start = 1;
|
||||||
long stop = -1;
|
long stop = -1;
|
||||||
boolean bad = false;
|
|
||||||
|
|
||||||
Matcher match = mPattern.matcher(str);
|
final int colonIndex = str.indexOf(":");
|
||||||
try {
|
if(colonIndex == -1) {
|
||||||
if (match.matches() && match.groupCount() == 4) {
|
contig = str.substring(0, str.length()); // chr1
|
||||||
if (match.group(1) != null) contig = match.group(1);
|
|
||||||
if (match.group(2) != null) start = parsePosition(match.group(2));
|
|
||||||
if ((match.group(3) != null && match.group(3).equals("+")) || // chr:1+
|
|
||||||
(match.group(3) == null && match.group(4) == null && match.group(2) == null)) // chr1
|
|
||||||
stop = Integer.MAX_VALUE;
|
stop = Integer.MAX_VALUE;
|
||||||
else if (match.group(3) != null && match.group(3).equals("-")) // chr1:1-1
|
} else {
|
||||||
stop = parsePosition(match.group(4));
|
contig = str.substring(0, colonIndex);
|
||||||
else if (match.group(3) == null && match.group(4) == null) // chr1:1
|
final int dashIndex = str.indexOf('-', colonIndex);
|
||||||
|
try {
|
||||||
|
if(dashIndex == -1) {
|
||||||
|
if(str.charAt(str.length() - 1) == '+') {
|
||||||
|
start = parsePosition(str.substring(colonIndex + 1, str.length() - 1)); // chr:1+
|
||||||
|
stop = Integer.MAX_VALUE;
|
||||||
|
} else {
|
||||||
|
start = parsePosition(str.substring(colonIndex + 1)); // chr1:1
|
||||||
stop = start;
|
stop = start;
|
||||||
else {
|
|
||||||
bad = true;
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
start = parsePosition(str.substring(colonIndex + 1, dashIndex)); // chr1:1-1
|
||||||
|
stop = parsePosition(str.substring(dashIndex + 1));
|
||||||
}
|
}
|
||||||
} catch(Exception e) {
|
} catch(Exception e) {
|
||||||
bad = true;
|
throw new StingException("Failed to parse Genome Location string: " + str, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bad)
|
|
||||||
throw new StingException("Failed to parse Genome Location string: " + str);
|
|
||||||
|
|
||||||
// is the contig valid?
|
// is the contig valid?
|
||||||
if (!isContigValid(contig))
|
if (!isContigValid(contig))
|
||||||
throw new StingException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
|
throw new StingException("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
|
||||||
|
|
||||||
if (stop == Integer.MAX_VALUE && hasKnownContigOrdering())
|
if (stop == Integer.MAX_VALUE && hasKnownContigOrdering())
|
||||||
// lookup the actually stop position!
|
// lookup the actually stop position!
|
||||||
|
|
@ -214,9 +214,24 @@ public class GenomeLocParser {
|
||||||
// Parsing string representations
|
// Parsing string representations
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
/**
|
||||||
|
* Parses a number like 1,000,000 into a long.
|
||||||
|
* @param pos
|
||||||
|
*/
|
||||||
private static long parsePosition(final String pos) {
|
private static long parsePosition(final String pos) {
|
||||||
String x = pos.replaceAll(",", "");
|
//String x = pos.replaceAll(",", ""); - this was replaced because it uses regexps
|
||||||
return Long.parseLong(x);
|
if(pos.indexOf(',') != -1) {
|
||||||
|
final StringBuilder buffer = new StringBuilder();
|
||||||
|
for(int i = 0; i < pos.length(); i++) {
|
||||||
|
final char c = pos.charAt(i);
|
||||||
|
if(c != ',') {
|
||||||
|
buffer.append(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Long.parseLong(buffer.toString());
|
||||||
|
} else {
|
||||||
|
return Long.parseLong(pos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue