Working implementation of BCF2
-- Nearly complete on spec implementation. Slow but clean -- Some refactoring of VariantContext to support common functions for BCF and VCF
This commit is contained in:
parent
a5193c2399
commit
c81acfc15d
|
|
@ -278,7 +278,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
getters.put("REF", new Getter() {
|
getters.put("REF", new Getter() {
|
||||||
public String get(VariantContext vc) {
|
public String get(VariantContext vc) {
|
||||||
StringBuilder x = new StringBuilder();
|
StringBuilder x = new StringBuilder();
|
||||||
x.append(getAlleleDisplayString(vc, vc.getReference()));
|
x.append(vc.getAlleleWithRefPadding(vc.getReference()));
|
||||||
return x.toString();
|
return x.toString();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
@ -290,7 +290,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
for ( int i = 0; i < n; i++ ) {
|
for ( int i = 0; i < n; i++ ) {
|
||||||
if ( i != 0 ) x.append(",");
|
if ( i != 0 ) x.append(",");
|
||||||
x.append(getAlleleDisplayString(vc, vc.getAlternateAllele(i)));
|
x.append(vc.getAlleleWithRefPadding(vc.getAlternateAllele(i)));
|
||||||
}
|
}
|
||||||
return x.toString();
|
return x.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -329,22 +329,14 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
}});
|
}});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getAlleleDisplayString(VariantContext vc, Allele allele) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
if ( vc.hasReferenceBaseForIndel() && !vc.isSNP() )
|
|
||||||
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
|
||||||
sb.append(allele.getDisplayString());
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Object splitAltAlleles(VariantContext vc) {
|
private static Object splitAltAlleles(VariantContext vc) {
|
||||||
final int numAltAlleles = vc.getAlternateAlleles().size();
|
final int numAltAlleles = vc.getAlternateAlleles().size();
|
||||||
if ( numAltAlleles == 1 )
|
if ( numAltAlleles == 1 )
|
||||||
return getAlleleDisplayString(vc, vc.getAlternateAllele(0));
|
return vc.getAlleleWithRefPadding(vc.getAlternateAllele(0));
|
||||||
|
|
||||||
final List<String> alleles = new ArrayList<String>(numAltAlleles);
|
final List<String> alleles = new ArrayList<String>(numAltAlleles);
|
||||||
for ( Allele allele : vc.getAlternateAlleles() )
|
for ( Allele allele : vc.getAlternateAlleles() )
|
||||||
alleles.add(getAlleleDisplayString(vc, allele));
|
alleles.add(vc.getAlleleWithRefPadding(allele));
|
||||||
return alleles;
|
return alleles;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -611,7 +611,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
alleles.add(allele);
|
alleles.add(allele);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static boolean isSingleNucleotideEvent(List<Allele> alleles) {
|
public static boolean isSingleNucleotideEvent(List<Allele> alleles) {
|
||||||
for ( Allele a : alleles ) {
|
for ( Allele a : alleles ) {
|
||||||
if ( a.length() != 1 )
|
if ( a.length() != 1 )
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -683,7 +683,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
* @param lineNo the current line number in the file
|
* @param lineNo the current line number in the file
|
||||||
* @return the new reference end position of this event
|
* @return the new reference end position of this event
|
||||||
*/
|
*/
|
||||||
protected static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
public static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
||||||
|
|
||||||
int forwardClipping = computeForwardClipping(unclippedAlleles, (byte)ref.charAt(0));
|
int forwardClipping = computeForwardClipping(unclippedAlleles, (byte)ref.charAt(0));
|
||||||
int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false, lineNo);
|
int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false, lineNo);
|
||||||
|
|
|
||||||
|
|
@ -519,6 +519,17 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
return REFERENCE_BASE_FOR_INDEL;
|
return REFERENCE_BASE_FOR_INDEL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getAlleleWithRefPadding(final Allele allele) {
|
||||||
|
if ( hasReferenceBaseForIndel() && isIndel() ) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append((char)getReferenceBaseForIndel().byteValue());
|
||||||
|
sb.append(allele.getDisplayString());
|
||||||
|
return sb.toString();
|
||||||
|
} else
|
||||||
|
return allele.getDisplayString();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// get routines to access context info fields
|
// get routines to access context info fields
|
||||||
|
|
|
||||||
|
|
@ -384,6 +384,10 @@ public class VariantContextBuilder {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getStart() {
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tells us that the resulting VariantContext should have the specified contig stop
|
* Tells us that the resulting VariantContext should have the specified contig stop
|
||||||
* @param stop
|
* @param stop
|
||||||
|
|
|
||||||
|
|
@ -171,21 +171,33 @@ public class VariantContextUtils {
|
||||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
|
return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
/**
|
||||||
// see if we need to pad common reference base from all alleles
|
* Returns true if the alleles in inputVC should have reference bases added for padding
|
||||||
boolean padVC = false;
|
*
|
||||||
|
* We need to pad a VC with a common base if the length of the reference allele is
|
||||||
// We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext.
|
* less than the length of the VariantContext. This happens because the position of
|
||||||
// This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention).
|
* e.g. an indel is always one before the actual event (as per VCF convention).
|
||||||
|
*
|
||||||
|
* @param inputVC the VC to evaluate, cannot be null
|
||||||
|
* @return true if
|
||||||
|
*/
|
||||||
|
public static boolean needsPadding(final VariantContext inputVC) {
|
||||||
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
|
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
|
||||||
final int referenceLength = inputVC.getReference().length();
|
final int referenceLength = inputVC.getReference().length();
|
||||||
|
|
||||||
if ( referenceLength == recordLength )
|
if ( referenceLength == recordLength )
|
||||||
padVC = false;
|
return false;
|
||||||
else if ( referenceLength == recordLength - 1 )
|
else if ( referenceLength == recordLength - 1 )
|
||||||
padVC = true;
|
return true;
|
||||||
else if ( !inputVC.hasSymbolicAlleles() )
|
else if ( !inputVC.hasSymbolicAlleles() )
|
||||||
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
||||||
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
|
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||||
|
final boolean padVC = needsPadding(inputVC);
|
||||||
|
|
||||||
// nothing to do if we don't need to pad bases
|
// nothing to do if we don't need to pad bases
|
||||||
if ( padVC ) {
|
if ( padVC ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue