Wow, symbolic alleles were all busted internally and this finally bubbled up after my previous commit. For some reason we were inconsistently forcing allele trimming/padding if one was present. Not anymore.

This commit is contained in:
Eric Banks 2012-04-04 13:47:59 -04:00
parent 337ff7887a
commit 9e32a975f8
3 changed files with 23 additions and 16 deletions

View File

@ -341,9 +341,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
} catch (Exception e) {
generateException("the END value in the INFO field is not valid");
}
}
// handle multi-positional events
else if ( !isSingleNucleotideEvent(alleles) ) {
} else if ( !isSingleNucleotideEvent(alleles) ) {
ArrayList<Allele> newAlleles = new ArrayList<Allele>();
stop = clipAlleles(pos, ref, alleles, newAlleles, lineNo);
alleles = newAlleles;
@ -611,11 +609,14 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
boolean clipping = true;
int symbolicAlleleCount = 0;
final byte ref0 = (byte)ref.charAt(0);
for ( Allele a : unclippedAlleles ) {
if ( a.isSymbolic() )
if ( a.isSymbolic() ) {
symbolicAlleleCount++;
continue;
}
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
clipping = false;
@ -623,7 +624,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
}
}
return (clipping) ? 1 : 0;
// don't clip if all alleles are symbolic
return (clipping && symbolicAlleleCount != unclippedAlleles.size()) ? 1 : 0;
}
protected static int computeReverseClipping(List<Allele> unclippedAlleles, String ref, int forwardClipping, int lineNo) {

View File

@ -1040,7 +1040,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
private void validateReferencePadding() {
if (hasSymbolicAlleles()) // symbolic alleles don't need padding...
if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
return;
boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
@ -1078,7 +1078,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// if ( getReference().length() != (getLocation().size()-1) ) {
long length = (stop - start) + 1;
if ( (getReference().isNull() && length != 1 ) ||
(getReference().isNonNull() && (length - getReference().length() > 1))) {
(!isSymbolic() && getReference().isNonNull() && (length - getReference().length() > 1))) {
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
}
}

View File

@ -140,22 +140,22 @@ public class VariantContextUtils {
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
// see if we need to pad common reference base from all alleles
boolean padVC;
boolean padVC = false;
// We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext.
// This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention).
long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
if (inputVC.hasSymbolicAlleles())
padVC = true;
else if (inputVC.getReference().length() == locLength)
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
final int referenceLength = inputVC.getReference().length();
if ( referenceLength == recordLength )
padVC = false;
else if (inputVC.getReference().length() == locLength-1)
else if ( referenceLength == recordLength - 1 )
padVC = true;
else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
else if ( !inputVC.hasSymbolicAlleles() )
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
// nothing to do if we don't need to pad bases
if (padVC) {
if ( padVC ) {
if ( !inputVC.hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
@ -506,6 +506,7 @@ public class VariantContextUtils {
final VariantContext first = VCs.get(0);
final String name = first.getSource();
final Allele refAllele = determineReferenceAllele(VCs);
Byte referenceBaseForIndel = null;
final Set<Allele> alleles = new LinkedHashSet<Allele>();
final Set<String> filters = new TreeSet<String>();
@ -530,7 +531,7 @@ public class VariantContextUtils {
// cycle through and add info from the other VCs, making sure the loc/reference matches
for ( final VariantContext vc : VCs ) {
if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) )
if ( loc.getStart() != vc.getStart() )
throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
if ( getLocation(genomeLocParser,vc).size() > loc.size() )
@ -550,6 +551,9 @@ public class VariantContextUtils {
filters.addAll(vc.getFilters());
if ( referenceBaseForIndel == null )
referenceBaseForIndel = vc.getReferenceBaseForIndel();
//
// add attributes
//
@ -659,6 +663,7 @@ public class VariantContextUtils {
builder.genotypes(genotypes);
builder.log10PError(log10PError);
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
builder.referenceBaseForIndel(referenceBaseForIndel);
// Trim the padded bases of all alleles if necessary
final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());