From c81acfc15d85fd9196df727c76c19a562f38ed94 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 8 May 2012 19:41:27 -0400 Subject: [PATCH] Working implementation of BCF2 -- Nearly complete on spec implementation. Slow but clean -- Some refactoring of VariantContext to support common functions for BCF and VCF --- .../walkers/variantutils/VariantsToTable.java | 16 +++-------- .../utils/codecs/vcf/AbstractVCFCodec.java | 4 +-- .../utils/variantcontext/VariantContext.java | 11 ++++++++ .../variantcontext/VariantContextBuilder.java | 4 +++ .../variantcontext/VariantContextUtils.java | 28 +++++++++++++------ 5 files changed, 41 insertions(+), 22 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 46a3ba39c..7f9df6644 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -278,7 +278,7 @@ public class VariantsToTable extends RodWalker { getters.put("REF", new Getter() { public String get(VariantContext vc) { StringBuilder x = new StringBuilder(); - x.append(getAlleleDisplayString(vc, vc.getReference())); + x.append(vc.getAlleleWithRefPadding(vc.getReference())); return x.toString(); } }); @@ -290,7 +290,7 @@ public class VariantsToTable extends RodWalker { for ( int i = 0; i < n; i++ ) { if ( i != 0 ) x.append(","); - x.append(getAlleleDisplayString(vc, vc.getAlternateAllele(i))); + x.append(vc.getAlleleWithRefPadding(vc.getAlternateAllele(i))); } return x.toString(); } @@ -329,22 +329,14 @@ public class VariantsToTable extends RodWalker { }}); } - private static String getAlleleDisplayString(VariantContext vc, Allele allele) { - StringBuilder sb = new StringBuilder(); - if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() ) - sb.append((char)vc.getReferenceBaseForIndel().byteValue()); - sb.append(allele.getDisplayString()); - return sb.toString(); - } - private static Object splitAltAlleles(VariantContext vc) { final int numAltAlleles = vc.getAlternateAlleles().size(); if ( numAltAlleles == 1 ) - return getAlleleDisplayString(vc, vc.getAlternateAllele(0)); + return vc.getAlleleWithRefPadding(vc.getAlternateAllele(0)); final List alleles = new ArrayList(numAltAlleles); for ( Allele allele : vc.getAlternateAlleles() ) - alleles.add(getAlleleDisplayString(vc, allele)); + alleles.add(vc.getAlleleWithRefPadding(allele)); return alleles; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 66ed908eb..2fcc3ec2f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -611,7 +611,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec alleles.add(allele); } - protected static boolean isSingleNucleotideEvent(List alleles) { + public static boolean isSingleNucleotideEvent(List alleles) { for ( Allele a : alleles ) { if ( a.length() != 1 ) return false; @@ -683,7 +683,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec * @param lineNo the current line number in the file * @return the new reference end position of this event */ - protected static int clipAlleles(int position, String ref, List unclippedAlleles, List clippedAlleles, int lineNo) { + public static int clipAlleles(int position, String ref, List unclippedAlleles, List clippedAlleles, int lineNo) { int forwardClipping = computeForwardClipping(unclippedAlleles, (byte)ref.charAt(0)); int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false, lineNo); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 7af4f35ad..6a861bcd3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -519,6 +519,17 @@ public class VariantContext implements Feature { // to enable tribble integratio return REFERENCE_BASE_FOR_INDEL; } + public String getAlleleWithRefPadding(final Allele allele) { + if ( hasReferenceBaseForIndel() && isIndel() ) { + StringBuilder sb = new StringBuilder(); + sb.append((char)getReferenceBaseForIndel().byteValue()); + sb.append(allele.getDisplayString()); + return sb.toString(); + } else + return allele.getDisplayString(); + } + + // --------------------------------------------------------------------------------------------------------- // // get routines to access context info fields diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index ff66162c8..4b85bfa9b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -384,6 +384,10 @@ public class VariantContextBuilder { return this; } + public long getStart() { + return start; + } + /** * Tells us that the resulting VariantContext should have the specified contig stop * @param stop diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 92915faaf..bda5ed4a1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -171,21 +171,33 @@ public class VariantContextUtils { return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); } - public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { - // see if we need to pad common reference base from all alleles - boolean padVC = false; - - // We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext. - // This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention). + /** + * Returns true if the alleles in inputVC should have reference bases added for padding + * + * We need to pad a VC with a common base if the length of the reference allele is + * less than the length of the VariantContext. This happens because the position of + * e.g. an indel is always one before the actual event (as per VCF convention). + * + * @param inputVC the VC to evaluate, cannot be null + * @return true if + */ + public static boolean needsPadding(final VariantContext inputVC) { final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1; final int referenceLength = inputVC.getReference().length(); + if ( referenceLength == recordLength ) - padVC = false; + return false; else if ( referenceLength == recordLength - 1 ) - padVC = true; + return true; else if ( !inputVC.hasSymbolicAlleles() ) throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); + else + return false; + } + + public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { + final boolean padVC = needsPadding(inputVC); // nothing to do if we don't need to pad bases if ( padVC ) {