diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 46242c302..a3100030e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -154,9 +154,45 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, * @return a feature, (not guaranteed complete) that has the correct start and stop */ public Feature decodeLoc(String line) { - return reallyDecode(line); + String[] locParts = new String[6]; + ParsingUtils.split(line, locParts, VCFConstants.FIELD_SEPARATOR_CHAR, true); + + // get our alleles (because the end position depends on them) + String ref = getCachedString(locParts[3].toUpperCase()); + String alts = getCachedString(locParts[4].toUpperCase()); + List alleles = parseAlleles(ref, alts, lineNo); + + // find out our location + int start = Integer.valueOf(locParts[1]); + int stop = start; + + // ref alleles don't need to be single bases for monomorphic sites + if ( alleles.size() == 1 ) { + stop = start + alleles.get(0).length() - 1; + } else if ( !isSingleNucleotideEvent(alleles) ) { + stop = clipAlleles(start, ref, alleles, null, lineNo); + } + + return new VCFLocFeature(locParts[0], start, stop); } + private final static class VCFLocFeature implements Feature { + + final String chr; + final int start, stop; + + private VCFLocFeature(String chr, int start, int stop) { + this.chr = chr; + this.start = start; + this.stop = stop; + } + + public String getChr() { return chr; } + public int getStart() { return start; } + public int getEnd() { return stop; } + } + + /** * decode the line into a feature (VariantContext) * @param line the line @@ -207,7 +243,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, // parse out the required fields String contig = getCachedString(parts[0]); - long pos = Long.valueOf(parts[1]); + int pos = Integer.valueOf(parts[1]); String id = null; if ( parts[2].length() == 0 ) generateException("The VCF specification requires a valid ID field"); @@ -227,7 +263,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, Map attributes = parseInfo(info, id); // find out our current location, and clip the alleles down to their minimum length - long loc = pos; + int loc = pos; // ref alleles don't need to be single bases for monomorphic sites if ( alleles.size() == 1 ) { loc = pos + alleles.get(0).length() - 1; @@ -506,9 +542,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, * @param ref the reference string * @param unclippedAlleles the list of unclipped alleles * @param clippedAlleles output list of clipped alleles - * @return a list of alleles, clipped to the reference + * @return the new reference end position of this event */ - protected static long clipAlleles(long position, String ref, List unclippedAlleles, List clippedAlleles, int lineNo) { + protected static int clipAlleles(int position, String ref, List unclippedAlleles, List clippedAlleles, int lineNo) { // Note that the computation of forward clipping here is meant only to see whether there is a common // base to all alleles, and to correctly compute reverse clipping, @@ -534,11 +570,13 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, if (clipping) reverseClipped++; } - for (Allele a : unclippedAlleles) { - if (a.isSymbolic()) { - clippedAlleles.add(a); - } else { - clippedAlleles.add(Allele.create(Arrays.copyOfRange(a.getBases(),0,a.getBases().length-reverseClipped),a.isReference())); + if ( clippedAlleles != null ) { + for ( Allele a : unclippedAlleles ) { + if ( a.isSymbolic() ) { + clippedAlleles.add(a); + } else { + clippedAlleles.add(Allele.create(Arrays.copyOfRange(a.getBases(),0,a.getBases().length-reverseClipped),a.isReference())); + } } }