Working implementation of DecodeLoc for VCF parsing. Makes indexing 3x faster.
This commit is contained in:
parent
518b3dd291
commit
2c24b68a96
|
|
@ -154,9 +154,45 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
* @return a feature, (not guaranteed complete) that has the correct start and stop
|
||||
*/
|
||||
public Feature decodeLoc(String line) {
|
||||
return reallyDecode(line);
|
||||
String[] locParts = new String[6];
|
||||
ParsingUtils.split(line, locParts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
||||
|
||||
// get our alleles (because the end position depends on them)
|
||||
String ref = getCachedString(locParts[3].toUpperCase());
|
||||
String alts = getCachedString(locParts[4].toUpperCase());
|
||||
List<Allele> alleles = parseAlleles(ref, alts, lineNo);
|
||||
|
||||
// find out our location
|
||||
int start = Integer.valueOf(locParts[1]);
|
||||
int stop = start;
|
||||
|
||||
// ref alleles don't need to be single bases for monomorphic sites
|
||||
if ( alleles.size() == 1 ) {
|
||||
stop = start + alleles.get(0).length() - 1;
|
||||
} else if ( !isSingleNucleotideEvent(alleles) ) {
|
||||
stop = clipAlleles(start, ref, alleles, null, lineNo);
|
||||
}
|
||||
|
||||
return new VCFLocFeature(locParts[0], start, stop);
|
||||
}
|
||||
|
||||
private final static class VCFLocFeature implements Feature {
|
||||
|
||||
final String chr;
|
||||
final int start, stop;
|
||||
|
||||
private VCFLocFeature(String chr, int start, int stop) {
|
||||
this.chr = chr;
|
||||
this.start = start;
|
||||
this.stop = stop;
|
||||
}
|
||||
|
||||
public String getChr() { return chr; }
|
||||
public int getStart() { return start; }
|
||||
public int getEnd() { return stop; }
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* decode the line into a feature (VariantContext)
|
||||
* @param line the line
|
||||
|
|
@ -207,7 +243,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
|
||||
// parse out the required fields
|
||||
String contig = getCachedString(parts[0]);
|
||||
long pos = Long.valueOf(parts[1]);
|
||||
int pos = Integer.valueOf(parts[1]);
|
||||
String id = null;
|
||||
if ( parts[2].length() == 0 )
|
||||
generateException("The VCF specification requires a valid ID field");
|
||||
|
|
@ -227,7 +263,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
Map<String, Object> attributes = parseInfo(info, id);
|
||||
|
||||
// find out our current location, and clip the alleles down to their minimum length
|
||||
long loc = pos;
|
||||
int loc = pos;
|
||||
// ref alleles don't need to be single bases for monomorphic sites
|
||||
if ( alleles.size() == 1 ) {
|
||||
loc = pos + alleles.get(0).length() - 1;
|
||||
|
|
@ -506,9 +542,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
* @param ref the reference string
|
||||
* @param unclippedAlleles the list of unclipped alleles
|
||||
* @param clippedAlleles output list of clipped alleles
|
||||
* @return a list of alleles, clipped to the reference
|
||||
* @return the new reference end position of this event
|
||||
*/
|
||||
protected static long clipAlleles(long position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
||||
protected static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
||||
|
||||
// Note that the computation of forward clipping here is meant only to see whether there is a common
|
||||
// base to all alleles, and to correctly compute reverse clipping,
|
||||
|
|
@ -534,11 +570,13 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
if (clipping) reverseClipped++;
|
||||
}
|
||||
|
||||
for (Allele a : unclippedAlleles) {
|
||||
if (a.isSymbolic()) {
|
||||
clippedAlleles.add(a);
|
||||
} else {
|
||||
clippedAlleles.add(Allele.create(Arrays.copyOfRange(a.getBases(),0,a.getBases().length-reverseClipped),a.isReference()));
|
||||
if ( clippedAlleles != null ) {
|
||||
for ( Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() ) {
|
||||
clippedAlleles.add(a);
|
||||
} else {
|
||||
clippedAlleles.add(Allele.create(Arrays.copyOfRange(a.getBases(),0,a.getBases().length-reverseClipped),a.isReference()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue