Working implementation of BCF2

-- Nearly complete on spec implementation.  Slow but clean
-- Some refactoring of VariantContext to support common functions for BCF and VCF
This commit is contained in:
Mark DePristo 2012-05-08 19:41:27 -04:00
parent a5193c2399
commit c81acfc15d
5 changed files with 41 additions and 22 deletions

View File

@ -278,7 +278,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
getters.put("REF", new Getter() {
public String get(VariantContext vc) {
StringBuilder x = new StringBuilder();
x.append(getAlleleDisplayString(vc, vc.getReference()));
x.append(vc.getAlleleWithRefPadding(vc.getReference()));
return x.toString();
}
});
@ -290,7 +290,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
for ( int i = 0; i < n; i++ ) {
if ( i != 0 ) x.append(",");
x.append(getAlleleDisplayString(vc, vc.getAlternateAllele(i)));
x.append(vc.getAlleleWithRefPadding(vc.getAlternateAllele(i)));
}
return x.toString();
}
@ -329,22 +329,14 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
}});
}
private static String getAlleleDisplayString(VariantContext vc, Allele allele) {
StringBuilder sb = new StringBuilder();
if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() )
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
sb.append(allele.getDisplayString());
return sb.toString();
}
private static Object splitAltAlleles(VariantContext vc) {
final int numAltAlleles = vc.getAlternateAlleles().size();
if ( numAltAlleles == 1 )
return getAlleleDisplayString(vc, vc.getAlternateAllele(0));
return vc.getAlleleWithRefPadding(vc.getAlternateAllele(0));
final List<String> alleles = new ArrayList<String>(numAltAlleles);
for ( Allele allele : vc.getAlternateAlleles() )
alleles.add(getAlleleDisplayString(vc, allele));
alleles.add(vc.getAlleleWithRefPadding(allele));
return alleles;
}
}

View File

@ -611,7 +611,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
alleles.add(allele);
}
protected static boolean isSingleNucleotideEvent(List<Allele> alleles) {
public static boolean isSingleNucleotideEvent(List<Allele> alleles) {
for ( Allele a : alleles ) {
if ( a.length() != 1 )
return false;
@ -683,7 +683,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
* @param lineNo the current line number in the file
* @return the new reference end position of this event
*/
protected static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
public static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
int forwardClipping = computeForwardClipping(unclippedAlleles, (byte)ref.charAt(0));
int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false, lineNo);

View File

@ -519,6 +519,17 @@ public class VariantContext implements Feature { // to enable tribble integratio
return REFERENCE_BASE_FOR_INDEL;
}
public String getAlleleWithRefPadding(final Allele allele) {
if ( hasReferenceBaseForIndel() && isIndel() ) {
StringBuilder sb = new StringBuilder();
sb.append((char)getReferenceBaseForIndel().byteValue());
sb.append(allele.getDisplayString());
return sb.toString();
} else
return allele.getDisplayString();
}
// ---------------------------------------------------------------------------------------------------------
//
// get routines to access context info fields

View File

@ -384,6 +384,10 @@ public class VariantContextBuilder {
return this;
}
public long getStart() {
return start;
}
/**
* Tells us that the resulting VariantContext should have the specified contig stop
* @param stop

View File

@ -171,21 +171,33 @@ public class VariantContextUtils {
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
}
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
// see if we need to pad common reference base from all alleles
boolean padVC = false;
// We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext.
// This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention).
/**
* Returns true if the alleles in inputVC should have reference bases added for padding
*
* We need to pad a VC with a common base if the length of the reference allele is
* less than the length of the VariantContext. This happens because the position of
* e.g. an indel is always one before the actual event (as per VCF convention).
*
* @param inputVC the VC to evaluate, cannot be null
* @return true if
*/
public static boolean needsPadding(final VariantContext inputVC) {
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
final int referenceLength = inputVC.getReference().length();
if ( referenceLength == recordLength )
padVC = false;
return false;
else if ( referenceLength == recordLength - 1 )
padVC = true;
return true;
else if ( !inputVC.hasSymbolicAlleles() )
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
else
return false;
}
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
final boolean padVC = needsPadding(inputVC);
// nothing to do if we don't need to pad bases
if ( padVC ) {