Working implementation of BCF2
-- Nearly complete on spec implementation. Slow but clean -- Some refactoring of VariantContext to support common functions for BCF and VCF
This commit is contained in:
parent
a5193c2399
commit
c81acfc15d
|
|
@ -278,7 +278,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
getters.put("REF", new Getter() {
|
||||
public String get(VariantContext vc) {
|
||||
StringBuilder x = new StringBuilder();
|
||||
x.append(getAlleleDisplayString(vc, vc.getReference()));
|
||||
x.append(vc.getAlleleWithRefPadding(vc.getReference()));
|
||||
return x.toString();
|
||||
}
|
||||
});
|
||||
|
|
@ -290,7 +290,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
|
||||
for ( int i = 0; i < n; i++ ) {
|
||||
if ( i != 0 ) x.append(",");
|
||||
x.append(getAlleleDisplayString(vc, vc.getAlternateAllele(i)));
|
||||
x.append(vc.getAlleleWithRefPadding(vc.getAlternateAllele(i)));
|
||||
}
|
||||
return x.toString();
|
||||
}
|
||||
|
|
@ -329,22 +329,14 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
}});
|
||||
}
|
||||
|
||||
private static String getAlleleDisplayString(VariantContext vc, Allele allele) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() )
|
||||
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
||||
sb.append(allele.getDisplayString());
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static Object splitAltAlleles(VariantContext vc) {
|
||||
final int numAltAlleles = vc.getAlternateAlleles().size();
|
||||
if ( numAltAlleles == 1 )
|
||||
return getAlleleDisplayString(vc, vc.getAlternateAllele(0));
|
||||
return vc.getAlleleWithRefPadding(vc.getAlternateAllele(0));
|
||||
|
||||
final List<String> alleles = new ArrayList<String>(numAltAlleles);
|
||||
for ( Allele allele : vc.getAlternateAlleles() )
|
||||
alleles.add(getAlleleDisplayString(vc, allele));
|
||||
alleles.add(vc.getAlleleWithRefPadding(allele));
|
||||
return alleles;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -611,7 +611,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
alleles.add(allele);
|
||||
}
|
||||
|
||||
protected static boolean isSingleNucleotideEvent(List<Allele> alleles) {
|
||||
public static boolean isSingleNucleotideEvent(List<Allele> alleles) {
|
||||
for ( Allele a : alleles ) {
|
||||
if ( a.length() != 1 )
|
||||
return false;
|
||||
|
|
@ -683,7 +683,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
* @param lineNo the current line number in the file
|
||||
* @return the new reference end position of this event
|
||||
*/
|
||||
protected static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
||||
public static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
||||
|
||||
int forwardClipping = computeForwardClipping(unclippedAlleles, (byte)ref.charAt(0));
|
||||
int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false, lineNo);
|
||||
|
|
|
|||
|
|
@ -519,6 +519,17 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
return REFERENCE_BASE_FOR_INDEL;
|
||||
}
|
||||
|
||||
public String getAlleleWithRefPadding(final Allele allele) {
|
||||
if ( hasReferenceBaseForIndel() && isIndel() ) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append((char)getReferenceBaseForIndel().byteValue());
|
||||
sb.append(allele.getDisplayString());
|
||||
return sb.toString();
|
||||
} else
|
||||
return allele.getDisplayString();
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// get routines to access context info fields
|
||||
|
|
|
|||
|
|
@ -384,6 +384,10 @@ public class VariantContextBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
public long getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells us that the resulting VariantContext should have the specified contig stop
|
||||
* @param stop
|
||||
|
|
|
|||
|
|
@ -171,21 +171,33 @@ public class VariantContextUtils {
|
|||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
|
||||
}
|
||||
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
// see if we need to pad common reference base from all alleles
|
||||
boolean padVC = false;
|
||||
|
||||
// We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext.
|
||||
// This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention).
|
||||
/**
|
||||
* Returns true if the alleles in inputVC should have reference bases added for padding
|
||||
*
|
||||
* We need to pad a VC with a common base if the length of the reference allele is
|
||||
* less than the length of the VariantContext. This happens because the position of
|
||||
* e.g. an indel is always one before the actual event (as per VCF convention).
|
||||
*
|
||||
* @param inputVC the VC to evaluate, cannot be null
|
||||
* @return true if
|
||||
*/
|
||||
public static boolean needsPadding(final VariantContext inputVC) {
|
||||
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
|
||||
final int referenceLength = inputVC.getReference().length();
|
||||
|
||||
if ( referenceLength == recordLength )
|
||||
padVC = false;
|
||||
return false;
|
||||
else if ( referenceLength == recordLength - 1 )
|
||||
padVC = true;
|
||||
return true;
|
||||
else if ( !inputVC.hasSymbolicAlleles() )
|
||||
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
||||
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
final boolean padVC = needsPadding(inputVC);
|
||||
|
||||
// nothing to do if we don't need to pad bases
|
||||
if ( padVC ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue