More allele trimming for VariantAnnotator

This commit is contained in:
Ron Levine 2015-04-29 20:28:28 -04:00
parent 9c2dd29bd0
commit 9ff827c83a
2 changed files with 58 additions and 8 deletions

View File

@ -344,6 +344,17 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("Testing InbreedingCoeff annotation with PED file", spec);
}
@Test(enabled = true)
public void testAlleleTrimming() {
final String MD5 = "5f4b8dcbd4ec3b773486945e5b38e7f3";
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + privateTestDir + "alleleTrim.vcf.gz" +
" -L 1:26608870-26608875 -no_cmdline_in_header --resource:exac " + privateTestDir + "exacAlleleTrim.vcf.gz -E exac.AC_Adj" +
" -o %s", 1,
Arrays.asList(MD5));
executeTest("Testing allele trimming annotation", spec);
}
@Test
public void testStrandBiasBySample() throws IOException {
// pipeline 1: create variant via HalotypeCaller with no default annotations

View File

@ -41,6 +41,7 @@ import org.broadinstitute.gatk.utils.commandline.RodBinding;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
@ -318,12 +319,10 @@ public class VariantAnnotatorEngine {
final boolean isMultiAllelic = expressionVC.getNAlleles() > 2;
final boolean useRefAndAltAlleles = VCFHeaderLineCount.R == hInfo.getCountType();
final boolean useAltAlleles = VCFHeaderLineCount.A == hInfo.getCountType();
List<Allele> usedExpressionAlleles = null;
// Multiallelic and count of A or R
if ( isMultiAllelic && (useAltAlleles || useRefAndAltAlleles) ){
// get the alleles common to the expression and variant
final List<Allele> usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles();
final List<Allele> commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles());
// remove brackets and spaces from expression attribute
final String cleanedExpression = expressionVC.getAttribute(expression.fieldName).toString().replaceAll("[\\[\\]\\s]", "");
@ -332,18 +331,58 @@ public class VariantAnnotatorEngine {
final Map<String, String> mapAlleleToExpressionValue = new HashMap<String, String>();
// get comma separated expression values
final String [] expressionValues = cleanedExpression.split(",");
ArrayList<String> expressionValuesList = new ArrayList<String>(Arrays.asList(cleanedExpression.split(",")));
if ( vc.isSNP() && expressionVC.isMixed() ){
final VariantContextBuilder builder = new VariantContextBuilder(expressionVC);
List<Allele> sameLengthAlleles = new ArrayList<Allele>();
// get alt alleles that are the same length as the ref allele
Iterator<String> expressionValuesIterator = expressionValuesList.iterator();
for ( Allele allele : expressionVC.getAlleles() ){
if ( allele.isNonReference() ){
if ( !expressionValuesIterator.hasNext() ){
logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for expression allele): " + allele);
break;
}
expressionValuesIterator.next();
if ( allele.length() == expressionVC.getReference().length() ) {
sameLengthAlleles.add(allele);
}
else {
// remove unused expression values
expressionValuesIterator.remove();
}
} else {
if ( useRefAndAltAlleles )
expressionValuesIterator.remove();
}
}
if (!sameLengthAlleles.isEmpty()) {
sameLengthAlleles.add(0, expressionVC.getReference());
VariantContext variantContext = builder.alleles(sameLengthAlleles).make();
// extract the SNPs
VariantContext variantContextTrimmed = GATKVariantContextUtils.trimAlleles(variantContext, true, true);
usedExpressionAlleles = useRefAndAltAlleles ? variantContextTrimmed.getAlleles() : variantContextTrimmed.getAlternateAlleles();
}
} else {
// get the alleles common to the expression and variant
usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles();
}
final List<Allele> commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles());
// the number of expression values must be the same as the number of alleles
if ( expressionValues.length != usedExpressionAlleles.size() ) {
if ( expressionValuesList.size() != usedExpressionAlleles.size() ) {
logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for variant allele(s): " + vc.getAlleles() + ", " +
expressionValues.length + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles");
expressionValuesList.size() + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles");
continue;
}
// map the used expression alleles to it's value
for (int i = 0; i != expressionValues.length; i++)
mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValues[i]);
for (int i = 0; i != expressionValuesList.size(); i++)
mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValuesList.get(i));
// add the variants expression values to the annotation
final List<String> annotationValues = new ArrayList<String>();