diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java index 6562c38f4..78c96418d 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -344,6 +344,17 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("Testing InbreedingCoeff annotation with PED file", spec); } + @Test(enabled = true) + public void testAlleleTrimming() { + final String MD5 = "5f4b8dcbd4ec3b773486945e5b38e7f3"; + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + privateTestDir + "alleleTrim.vcf.gz" + + " -L 1:26608870-26608875 -no_cmdline_in_header --resource:exac " + privateTestDir + "exacAlleleTrim.vcf.gz -E exac.AC_Adj" + + " -o %s", 1, + Arrays.asList(MD5)); + executeTest("Testing allele trimming annotation", spec); + } + @Test public void testStrandBiasBySample() throws IOException { // pipeline 1: create variant via HalotypeCaller with no default annotations diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorEngine.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorEngine.java index c9c877fed..f064bd4de 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorEngine.java @@ -41,6 +41,7 @@ import org.broadinstitute.gatk.utils.commandline.RodBinding; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; @@ -318,12 +319,10 @@ public class VariantAnnotatorEngine { final boolean isMultiAllelic = expressionVC.getNAlleles() > 2; final boolean useRefAndAltAlleles = VCFHeaderLineCount.R == hInfo.getCountType(); final boolean useAltAlleles = VCFHeaderLineCount.A == hInfo.getCountType(); + List usedExpressionAlleles = null; // Multiallelic and count of A or R if ( isMultiAllelic && (useAltAlleles || useRefAndAltAlleles) ){ - // get the alleles common to the expression and variant - final List usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles(); - final List commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles()); // remove brackets and spaces from expression attribute final String cleanedExpression = expressionVC.getAttribute(expression.fieldName).toString().replaceAll("[\\[\\]\\s]", ""); @@ -332,18 +331,58 @@ public class VariantAnnotatorEngine { final Map mapAlleleToExpressionValue = new HashMap(); // get comma separated expression values - final String [] expressionValues = cleanedExpression.split(","); + ArrayList expressionValuesList = new ArrayList(Arrays.asList(cleanedExpression.split(","))); + + if ( vc.isSNP() && expressionVC.isMixed() ){ + final VariantContextBuilder builder = new VariantContextBuilder(expressionVC); + List sameLengthAlleles = new ArrayList(); + + // get alt alleles that are the same length as the ref allele + Iterator expressionValuesIterator = expressionValuesList.iterator(); + for ( Allele allele : expressionVC.getAlleles() ){ + if ( allele.isNonReference() ){ + if ( !expressionValuesIterator.hasNext() ){ + logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for expression allele): " + allele); + break; + } + expressionValuesIterator.next(); + if ( allele.length() == expressionVC.getReference().length() ) { + sameLengthAlleles.add(allele); + } + else { + // remove unused expression values + expressionValuesIterator.remove(); + } + } else { + if ( useRefAndAltAlleles ) + expressionValuesIterator.remove(); + } + } + + if (!sameLengthAlleles.isEmpty()) { + sameLengthAlleles.add(0, expressionVC.getReference()); + VariantContext variantContext = builder.alleles(sameLengthAlleles).make(); + // extract the SNPs + VariantContext variantContextTrimmed = GATKVariantContextUtils.trimAlleles(variantContext, true, true); + usedExpressionAlleles = useRefAndAltAlleles ? variantContextTrimmed.getAlleles() : variantContextTrimmed.getAlternateAlleles(); + } + } else { + // get the alleles common to the expression and variant + usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles(); + } + + final List commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles()); // the number of expression values must be the same as the number of alleles - if ( expressionValues.length != usedExpressionAlleles.size() ) { + if ( expressionValuesList.size() != usedExpressionAlleles.size() ) { logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for variant allele(s): " + vc.getAlleles() + ", " + - expressionValues.length + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles"); + expressionValuesList.size() + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles"); continue; } // map the used expression alleles to it's value - for (int i = 0; i != expressionValues.length; i++) - mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValues[i]); + for (int i = 0; i != expressionValuesList.size(); i++) + mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValuesList.get(i)); // add the variants expression values to the annotation final List annotationValues = new ArrayList();