If we are subsetting alleles in the UG (either because there were too many or because some were not polymorphic), then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). Thanks to Ryan for reporting this. Only one of the integration tests had even partially covered this case, so I added one that did.
This commit is contained in:
parent
4b81c75642
commit
1f23d99dfa
|
|
@ -417,6 +417,11 @@ public class UnifiedGenotyperEngine {
|
|||
builder.attributes(attributes);
|
||||
VariantContext vcCall = builder.make();
|
||||
|
||||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||
if ( myAlleles.size() != vc.getAlleles().size() )
|
||||
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
|
||||
if ( annotationEngine != null && !limitedContext && rawContext.hasBasePileup() ) {
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||
|
|
|
|||
|
|
@ -617,10 +617,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
return true;
|
||||
}
|
||||
|
||||
public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
|
||||
public static int computeForwardClipping(final List<Allele> unclippedAlleles, final byte ref0) {
|
||||
boolean clipping = true;
|
||||
int symbolicAlleleCount = 0;
|
||||
final byte ref0 = (byte)ref.charAt(0);
|
||||
|
||||
for ( Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() ) {
|
||||
|
|
@ -638,7 +637,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
return (clipping && symbolicAlleleCount != unclippedAlleles.size()) ? 1 : 0;
|
||||
}
|
||||
|
||||
protected static int computeReverseClipping(List<Allele> unclippedAlleles, String ref, int forwardClipping, int lineNo) {
|
||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles, final byte[] ref, final int forwardClipping, final boolean allowFullClip, final int lineNo) {
|
||||
int clipping = 0;
|
||||
boolean stillClipping = true;
|
||||
|
||||
|
|
@ -650,14 +649,20 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||
if ( a.length() - clipping == 0 )
|
||||
return clipping - 1;
|
||||
return clipping - (allowFullClip ? 0 : 1);
|
||||
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 )
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||
stillClipping = false;
|
||||
else if ( ref.length() == clipping )
|
||||
generateException("bad alleles encountered", lineNo);
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ((byte)ref.charAt(ref.length()-clipping-1)) )
|
||||
}
|
||||
else if ( ref.length == clipping ) {
|
||||
if ( allowFullClip )
|
||||
stillClipping = false;
|
||||
else
|
||||
generateException("bad alleles encountered", lineNo);
|
||||
}
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
}
|
||||
if ( stillClipping )
|
||||
clipping++;
|
||||
|
|
@ -678,8 +683,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
*/
|
||||
protected static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
||||
|
||||
int forwardClipping = computeForwardClipping(unclippedAlleles, ref);
|
||||
int reverseClipping = computeReverseClipping(unclippedAlleles, ref, forwardClipping, lineNo);
|
||||
int forwardClipping = computeForwardClipping(unclippedAlleles, (byte)ref.charAt(0));
|
||||
int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false, lineNo);
|
||||
|
||||
if ( clippedAlleles != null ) {
|
||||
for ( Allele a : unclippedAlleles ) {
|
||||
|
|
|
|||
|
|
@ -612,7 +612,7 @@ public class VariantContextUtils {
|
|||
continue;
|
||||
if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) {
|
||||
if ( ! genotypes.isEmpty() )
|
||||
logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
|
||||
logger.debug(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
|
||||
genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
|
||||
genotypes = stripPLs(genotypes);
|
||||
// this will remove stale AC,AF attributed from vc
|
||||
|
|
@ -714,8 +714,7 @@ public class VariantContextUtils {
|
|||
else if (refAllele.isNull())
|
||||
trimVC = false;
|
||||
else {
|
||||
trimVC = (AbstractVCFCodec.computeForwardClipping(new ArrayList<Allele>(inputVC.getAlternateAlleles()),
|
||||
inputVC.getReference().getDisplayString()) > 0);
|
||||
trimVC = (AbstractVCFCodec.computeForwardClipping(inputVC.getAlternateAlleles(), (byte)inputVC.getReference().getDisplayString().charAt(0)) > 0);
|
||||
}
|
||||
|
||||
// nothing to do if we don't need to trim bases
|
||||
|
|
@ -723,9 +722,6 @@ public class VariantContextUtils {
|
|||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
// set the reference base for indels in the attributes
|
||||
Map<String,Object> attributes = new TreeMap<String,Object>(inputVC.getAttributes());
|
||||
|
||||
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
|
|
@ -768,12 +764,55 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(inputVC);
|
||||
return builder.alleles(alleles).genotypes(genotypes).attributes(attributes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make();
|
||||
return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make();
|
||||
}
|
||||
|
||||
return inputVC;
|
||||
}
|
||||
|
||||
public static VariantContext reverseTrimAlleles(VariantContext inputVC) {
|
||||
// see if we need to trim common reference base from all alleles
|
||||
|
||||
final int trimExtent = AbstractVCFCodec.computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true, -1);
|
||||
if ( trimExtent <= 0 )
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
|
||||
List<Allele> originalAlleles = genotype.getAlleles();
|
||||
List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( final Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles));
|
||||
}
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length());
|
||||
return builder.alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
public static GenotypesContext stripPLs(GenotypesContext genotypes) {
|
||||
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("e948543b83bfd0640fcb994d72f8e234"));
|
||||
Arrays.asList("ec907c65da5ed9b6046404b0f81422d4"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,6 +74,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("a70593bbb5042e2d0e46e3c932cae170"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing compressed output
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class VCFCodecUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "AlleleClippingTestProvider")
|
||||
public void TestAlleleClipping(AlleleClippingTestProvider cfg) {
|
||||
int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref, 0, 1);
|
||||
int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false, 1);
|
||||
Assert.assertEquals(result, cfg.expectedClip);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue