Wow, symbolic alleles were all busted internally and this finally bubbled up after my previous commit. For some reason we were inconsistently forcing allele trimming/padding if one was present. Not anymore.
This commit is contained in:
parent
337ff7887a
commit
9e32a975f8
|
|
@ -341,9 +341,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
} catch (Exception e) {
|
||||
generateException("the END value in the INFO field is not valid");
|
||||
}
|
||||
}
|
||||
// handle multi-positional events
|
||||
else if ( !isSingleNucleotideEvent(alleles) ) {
|
||||
} else if ( !isSingleNucleotideEvent(alleles) ) {
|
||||
ArrayList<Allele> newAlleles = new ArrayList<Allele>();
|
||||
stop = clipAlleles(pos, ref, alleles, newAlleles, lineNo);
|
||||
alleles = newAlleles;
|
||||
|
|
@ -611,11 +609,14 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
|
||||
public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
||||
boolean clipping = true;
|
||||
int symbolicAlleleCount = 0;
|
||||
final byte ref0 = (byte)ref.charAt(0);
|
||||
|
||||
for ( Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
if ( a.isSymbolic() ) {
|
||||
symbolicAlleleCount++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
|
||||
clipping = false;
|
||||
|
|
@ -623,7 +624,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
}
|
||||
}
|
||||
|
||||
return (clipping) ? 1 : 0;
|
||||
// don't clip if all alleles are symbolic
|
||||
return (clipping && symbolicAlleleCount != unclippedAlleles.size()) ? 1 : 0;
|
||||
}
|
||||
|
||||
protected static int computeReverseClipping(List<Allele> unclippedAlleles, String ref, int forwardClipping, int lineNo) {
|
||||
|
|
|
|||
|
|
@ -1040,7 +1040,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
}
|
||||
|
||||
private void validateReferencePadding() {
|
||||
if (hasSymbolicAlleles()) // symbolic alleles don't need padding...
|
||||
if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
|
||||
return;
|
||||
|
||||
boolean needsPadding = (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed
|
||||
|
|
@ -1078,7 +1078,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
// if ( getReference().length() != (getLocation().size()-1) ) {
|
||||
long length = (stop - start) + 1;
|
||||
if ( (getReference().isNull() && length != 1 ) ||
|
||||
(getReference().isNonNull() && (length - getReference().length() > 1))) {
|
||||
(!isSymbolic() && getReference().isNonNull() && (length - getReference().length() > 1))) {
|
||||
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -140,22 +140,22 @@ public class VariantContextUtils {
|
|||
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
// see if we need to pad common reference base from all alleles
|
||||
boolean padVC;
|
||||
boolean padVC = false;
|
||||
|
||||
// We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext.
|
||||
// This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention).
|
||||
long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
|
||||
if (inputVC.hasSymbolicAlleles())
|
||||
padVC = true;
|
||||
else if (inputVC.getReference().length() == locLength)
|
||||
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
|
||||
final int referenceLength = inputVC.getReference().length();
|
||||
if ( referenceLength == recordLength )
|
||||
padVC = false;
|
||||
else if (inputVC.getReference().length() == locLength-1)
|
||||
else if ( referenceLength == recordLength - 1 )
|
||||
padVC = true;
|
||||
else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
||||
else if ( !inputVC.hasSymbolicAlleles() )
|
||||
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
||||
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
|
||||
|
||||
// nothing to do if we don't need to pad bases
|
||||
if (padVC) {
|
||||
if ( padVC ) {
|
||||
if ( !inputVC.hasReferenceBaseForIndel() )
|
||||
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
|
||||
|
||||
|
|
@ -506,6 +506,7 @@ public class VariantContextUtils {
|
|||
final VariantContext first = VCs.get(0);
|
||||
final String name = first.getSource();
|
||||
final Allele refAllele = determineReferenceAllele(VCs);
|
||||
Byte referenceBaseForIndel = null;
|
||||
|
||||
final Set<Allele> alleles = new LinkedHashSet<Allele>();
|
||||
final Set<String> filters = new TreeSet<String>();
|
||||
|
|
@ -530,7 +531,7 @@ public class VariantContextUtils {
|
|||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) )
|
||||
if ( loc.getStart() != vc.getStart() )
|
||||
throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
|
||||
|
||||
if ( getLocation(genomeLocParser,vc).size() > loc.size() )
|
||||
|
|
@ -550,6 +551,9 @@ public class VariantContextUtils {
|
|||
|
||||
filters.addAll(vc.getFilters());
|
||||
|
||||
if ( referenceBaseForIndel == null )
|
||||
referenceBaseForIndel = vc.getReferenceBaseForIndel();
|
||||
|
||||
//
|
||||
// add attributes
|
||||
//
|
||||
|
|
@ -659,6 +663,7 @@ public class VariantContextUtils {
|
|||
builder.genotypes(genotypes);
|
||||
builder.log10PError(log10PError);
|
||||
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
|
||||
builder.referenceBaseForIndel(referenceBaseForIndel);
|
||||
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());
|
||||
|
|
|
|||
Loading…
Reference in New Issue