Wow, symbolic alleles were all busted internally and this finally bubbled up after my previous commit. For some reason we were inconsistently forcing allele trimming/padding if one was present. Not anymore.

This commit is contained in:
Eric Banks 2012-04-04 13:47:59 -04:00
parent 337ff7887a
commit 9e32a975f8
3 changed files with 23 additions and 16 deletions

View File

@ -341,9 +341,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
} catch (Exception e) { } catch (Exception e) {
generateException("the END value in the INFO field is not valid"); generateException("the END value in the INFO field is not valid");
} }
} } else if ( !isSingleNucleotideEvent(alleles) ) {
// handle multi-positional events
else if ( !isSingleNucleotideEvent(alleles) ) {
ArrayList<Allele> newAlleles = new ArrayList<Allele>(); ArrayList<Allele> newAlleles = new ArrayList<Allele>();
stop = clipAlleles(pos, ref, alleles, newAlleles, lineNo); stop = clipAlleles(pos, ref, alleles, newAlleles, lineNo);
alleles = newAlleles; alleles = newAlleles;
@ -611,11 +609,14 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) { public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
boolean clipping = true; boolean clipping = true;
int symbolicAlleleCount = 0;
final byte ref0 = (byte)ref.charAt(0); final byte ref0 = (byte)ref.charAt(0);
for ( Allele a : unclippedAlleles ) { for ( Allele a : unclippedAlleles ) {
if ( a.isSymbolic() ) if ( a.isSymbolic() ) {
symbolicAlleleCount++;
continue; continue;
}
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
clipping = false; clipping = false;
@ -623,7 +624,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
} }
} }
return (clipping) ? 1 : 0; // don't clip if all alleles are symbolic
return (clipping && symbolicAlleleCount != unclippedAlleles.size()) ? 1 : 0;
} }
protected static int computeReverseClipping(List<Allele> unclippedAlleles, String ref, int forwardClipping, int lineNo) { protected static int computeReverseClipping(List<Allele> unclippedAlleles, String ref, int forwardClipping, int lineNo) {

View File

@ -1040,7 +1040,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
} }
private void validateReferencePadding() { private void validateReferencePadding() {
if (hasSymbolicAlleles()) // symbolic alleles don't need padding... if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
return; return;
boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
@ -1078,7 +1078,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// if ( getReference().length() != (getLocation().size()-1) ) { // if ( getReference().length() != (getLocation().size()-1) ) {
long length = (stop - start) + 1; long length = (stop - start) + 1;
if ( (getReference().isNull() && length != 1 ) || if ( (getReference().isNull() && length != 1 ) ||
(getReference().isNonNull() && (length - getReference().length() > 1))) { (!isSymbolic() && getReference().isNonNull() && (length - getReference().length() > 1))) {
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
} }
} }

View File

@ -140,22 +140,22 @@ public class VariantContextUtils {
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
// see if we need to pad common reference base from all alleles // see if we need to pad common reference base from all alleles
boolean padVC; boolean padVC = false;
// We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext. // We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext.
// This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention). // This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention).
long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1; final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
if (inputVC.hasSymbolicAlleles()) final int referenceLength = inputVC.getReference().length();
padVC = true; if ( referenceLength == recordLength )
else if (inputVC.getReference().length() == locLength)
padVC = false; padVC = false;
else if (inputVC.getReference().length() == locLength-1) else if ( referenceLength == recordLength - 1 )
padVC = true; padVC = true;
else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + else if ( !inputVC.hasSymbolicAlleles() )
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
// nothing to do if we don't need to pad bases // nothing to do if we don't need to pad bases
if (padVC) { if ( padVC ) {
if ( !inputVC.hasReferenceBaseForIndel() ) if ( !inputVC.hasReferenceBaseForIndel() )
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
@ -506,6 +506,7 @@ public class VariantContextUtils {
final VariantContext first = VCs.get(0); final VariantContext first = VCs.get(0);
final String name = first.getSource(); final String name = first.getSource();
final Allele refAllele = determineReferenceAllele(VCs); final Allele refAllele = determineReferenceAllele(VCs);
Byte referenceBaseForIndel = null;
final Set<Allele> alleles = new LinkedHashSet<Allele>(); final Set<Allele> alleles = new LinkedHashSet<Allele>();
final Set<String> filters = new TreeSet<String>(); final Set<String> filters = new TreeSet<String>();
@ -530,7 +531,7 @@ public class VariantContextUtils {
// cycle through and add info from the other VCs, making sure the loc/reference matches // cycle through and add info from the other VCs, making sure the loc/reference matches
for ( final VariantContext vc : VCs ) { for ( final VariantContext vc : VCs ) {
if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) ) if ( loc.getStart() != vc.getStart() )
throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
if ( getLocation(genomeLocParser,vc).size() > loc.size() ) if ( getLocation(genomeLocParser,vc).size() > loc.size() )
@ -550,6 +551,9 @@ public class VariantContextUtils {
filters.addAll(vc.getFilters()); filters.addAll(vc.getFilters());
if ( referenceBaseForIndel == null )
referenceBaseForIndel = vc.getReferenceBaseForIndel();
// //
// add attributes // add attributes
// //
@ -659,6 +663,7 @@ public class VariantContextUtils {
builder.genotypes(genotypes); builder.genotypes(genotypes);
builder.log10PError(log10PError); builder.log10PError(log10PError);
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes); builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
builder.referenceBaseForIndel(referenceBaseForIndel);
// Trim the padded bases of all alleles if necessary // Trim the padded bases of all alleles if necessary
final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make()); final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());