diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index c8f8de770..0dec305d2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -341,9 +341,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { } catch (Exception e) { generateException("the END value in the INFO field is not valid"); } - } - // handle multi-positional events - else if ( !isSingleNucleotideEvent(alleles) ) { + } else if ( !isSingleNucleotideEvent(alleles) ) { ArrayList newAlleles = new ArrayList(); stop = clipAlleles(pos, ref, alleles, newAlleles, lineNo); alleles = newAlleles; @@ -611,11 +609,14 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { public static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; + int symbolicAlleleCount = 0; final byte ref0 = (byte)ref.charAt(0); for ( Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) + if ( a.isSymbolic() ) { + symbolicAlleleCount++; continue; + } if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { clipping = false; @@ -623,7 +624,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { } } - return (clipping) ? 1 : 0; + // don't clip if all alleles are symbolic + return (clipping && symbolicAlleleCount != unclippedAlleles.size()) ? 1 : 0; } protected static int computeReverseClipping(List unclippedAlleles, String ref, int forwardClipping, int lineNo) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index a3a841d97..5d2444b8d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1040,7 +1040,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati } private void validateReferencePadding() { - if (hasSymbolicAlleles()) // symbolic alleles don't need padding... + if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding... return; boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed @@ -1078,7 +1078,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati // if ( getReference().length() != (getLocation().size()-1) ) { long length = (stop - start) + 1; if ( (getReference().isNull() && length != 1 ) || - (getReference().isNonNull() && (length - getReference().length() > 1))) { + (!isSymbolic() && getReference().isNonNull() && (length - getReference().length() > 1))) { throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 73a9bb6bf..cba01e889 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -140,22 +140,22 @@ public class VariantContextUtils { public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { // see if we need to pad common reference base from all alleles - boolean padVC; + boolean padVC = false; // We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext. // This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention). - long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1; - if (inputVC.hasSymbolicAlleles()) - padVC = true; - else if (inputVC.getReference().length() == locLength) + final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1; + final int referenceLength = inputVC.getReference().length(); + if ( referenceLength == recordLength ) padVC = false; - else if (inputVC.getReference().length() == locLength-1) + else if ( referenceLength == recordLength - 1 ) padVC = true; - else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + + else if ( !inputVC.hasSymbolicAlleles() ) + throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); // nothing to do if we don't need to pad bases - if (padVC) { + if ( padVC ) { if ( !inputVC.hasReferenceBaseForIndel() ) throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); @@ -506,6 +506,7 @@ public class VariantContextUtils { final VariantContext first = VCs.get(0); final String name = first.getSource(); final Allele refAllele = determineReferenceAllele(VCs); + Byte referenceBaseForIndel = null; final Set alleles = new LinkedHashSet(); final Set filters = new TreeSet(); @@ -530,7 +531,7 @@ public class VariantContextUtils { // cycle through and add info from the other VCs, making sure the loc/reference matches for ( final VariantContext vc : VCs ) { - if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) ) + if ( loc.getStart() != vc.getStart() ) throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); if ( getLocation(genomeLocParser,vc).size() > loc.size() ) @@ -550,6 +551,9 @@ public class VariantContextUtils { filters.addAll(vc.getFilters()); + if ( referenceBaseForIndel == null ) + referenceBaseForIndel = vc.getReferenceBaseForIndel(); + // // add attributes // @@ -659,6 +663,7 @@ public class VariantContextUtils { builder.genotypes(genotypes); builder.log10PError(log10PError); builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); + builder.referenceBaseForIndel(referenceBaseForIndel); // Trim the padded bases of all alleles if necessary final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());