diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 94d340926..caa3a6b6b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -417,6 +417,11 @@ public class UnifiedGenotyperEngine { builder.attributes(attributes); VariantContext vcCall = builder.make(); + // if we are subsetting alleles (either because there were too many or because some were not polymorphic) + // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). + if ( myAlleles.size() != vc.getAlleles().size() ) + vcCall = VariantContextUtils.reverseTrimAlleles(vcCall); + if ( annotationEngine != null && !limitedContext && rawContext.hasBasePileup() ) { // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations final ReadBackedPileup pileup = rawContext.getBasePileup(); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index a1127e35d..c2cbf23fb 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -617,10 +617,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { return true; } - public static int computeForwardClipping(List unclippedAlleles, String ref) { + public static int computeForwardClipping(final List unclippedAlleles, final byte ref0) { boolean clipping = true; int symbolicAlleleCount = 0; - final byte ref0 = (byte)ref.charAt(0); for ( Allele a : unclippedAlleles ) { if ( a.isSymbolic() ) { @@ -638,7 +637,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { return (clipping && symbolicAlleleCount != unclippedAlleles.size()) ? 1 : 0; } - protected static int computeReverseClipping(List unclippedAlleles, String ref, int forwardClipping, int lineNo) { + public static int computeReverseClipping(final List unclippedAlleles, final byte[] ref, final int forwardClipping, final boolean allowFullClip, final int lineNo) { int clipping = 0; boolean stillClipping = true; @@ -650,14 +649,20 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). if ( a.length() - clipping == 0 ) - return clipping - 1; + return clipping - (allowFullClip ? 0 : 1); - if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) + if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { stillClipping = false; - else if ( ref.length() == clipping ) - generateException("bad alleles encountered", lineNo); - else if ( a.getBases()[a.length()-clipping-1] != ((byte)ref.charAt(ref.length()-clipping-1)) ) + } + else if ( ref.length == clipping ) { + if ( allowFullClip ) + stillClipping = false; + else + generateException("bad alleles encountered", lineNo); + } + else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { stillClipping = false; + } } if ( stillClipping ) clipping++; @@ -678,8 +683,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { */ protected static int clipAlleles(int position, String ref, List unclippedAlleles, List clippedAlleles, int lineNo) { - int forwardClipping = computeForwardClipping(unclippedAlleles, ref); - int reverseClipping = computeReverseClipping(unclippedAlleles, ref, forwardClipping, lineNo); + int forwardClipping = computeForwardClipping(unclippedAlleles, (byte)ref.charAt(0)); + int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false, lineNo); if ( clippedAlleles != null ) { for ( Allele a : unclippedAlleles ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index cbaf705b4..de5deef57 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -612,7 +612,7 @@ public class VariantContextUtils { continue; if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) { if ( ! genotypes.isEmpty() ) - logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s", + logger.debug(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s", genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles)); genotypes = stripPLs(genotypes); // this will remove stale AC,AF attributed from vc @@ -714,8 +714,7 @@ public class VariantContextUtils { else if (refAllele.isNull()) trimVC = false; else { - trimVC = (AbstractVCFCodec.computeForwardClipping(new ArrayList(inputVC.getAlternateAlleles()), - inputVC.getReference().getDisplayString()) > 0); + trimVC = (AbstractVCFCodec.computeForwardClipping(inputVC.getAlternateAlleles(), (byte)inputVC.getReference().getDisplayString().charAt(0)) > 0); } // nothing to do if we don't need to trim bases @@ -723,9 +722,6 @@ public class VariantContextUtils { List alleles = new ArrayList(); GenotypesContext genotypes = GenotypesContext.create(); - // set the reference base for indels in the attributes - Map attributes = new TreeMap(inputVC.getAttributes()); - Map originalToTrimmedAlleleMap = new HashMap(); for (final Allele a : inputVC.getAlleles()) { @@ -768,12 +764,55 @@ public class VariantContextUtils { } final VariantContextBuilder builder = new VariantContextBuilder(inputVC); - return builder.alleles(alleles).genotypes(genotypes).attributes(attributes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make(); + return builder.alleles(alleles).genotypes(genotypes).referenceBaseForIndel(new Byte(inputVC.getReference().getBases()[0])).make(); } return inputVC; } + public static VariantContext reverseTrimAlleles(VariantContext inputVC) { + // see if we need to trim common reference base from all alleles + + final int trimExtent = AbstractVCFCodec.computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true, -1); + if ( trimExtent <= 0 ) + return inputVC; + + final List alleles = new ArrayList(); + GenotypesContext genotypes = GenotypesContext.create(); + + Map originalToTrimmedAlleleMap = new HashMap(); + + for (final Allele a : inputVC.getAlleles()) { + if (a.isSymbolic()) { + alleles.add(a); + originalToTrimmedAlleleMap.put(a, a); + } else { + // get bases for current allele and create a new one with trimmed bases + byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); + Allele trimmedAllele = Allele.create(newBases, a.isReference()); + alleles.add(trimmedAllele); + originalToTrimmedAlleleMap.put(a, trimmedAllele); + } + } + + // now we can recreate new genotypes with trimmed alleles + for ( final Genotype genotype : inputVC.getGenotypes() ) { + + List originalAlleles = genotype.getAlleles(); + List trimmedAlleles = new ArrayList(); + for ( final Allele a : originalAlleles ) { + if ( a.isCalled() ) + trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); + else + trimmedAlleles.add(Allele.NO_CALL); + } + genotypes.add(Genotype.modifyAlleles(genotype, trimmedAlleles)); + } + + final VariantContextBuilder builder = new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length()); + return builder.alleles(alleles).genotypes(genotypes).make(); + } + public static GenotypesContext stripPLs(GenotypesContext genotypes) { GenotypesContext newGs = GenotypesContext.create(genotypes.size()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 067e9088c..e95284190 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1, - Arrays.asList("e948543b83bfd0640fcb994d72f8e234")); + Arrays.asList("ec907c65da5ed9b6046404b0f81422d4")); executeTest("test Multiple SNP alleles", spec); } @@ -74,6 +74,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test bad read", spec); } + @Test + public void testReverseTrim() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, + Arrays.asList("a70593bbb5042e2d0e46e3c932cae170")); + executeTest("test reverse trim", spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing compressed output diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java index 7681ed7d1..e0fb1b876 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java @@ -85,7 +85,7 @@ public class VCFCodecUnitTest extends BaseTest { @Test(dataProvider = "AlleleClippingTestProvider") public void TestAlleleClipping(AlleleClippingTestProvider cfg) { - int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref, 0, 1); + int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false, 1); Assert.assertEquals(result, cfg.expectedClip); } } \ No newline at end of file