More allele trimming for VariantAnnotator
This commit is contained in:
parent
9c2dd29bd0
commit
9ff827c83a
|
|
@ -344,6 +344,17 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("Testing InbreedingCoeff annotation with PED file", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testAlleleTrimming() {
|
||||
final String MD5 = "5f4b8dcbd4ec3b773486945e5b38e7f3";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + privateTestDir + "alleleTrim.vcf.gz" +
|
||||
" -L 1:26608870-26608875 -no_cmdline_in_header --resource:exac " + privateTestDir + "exacAlleleTrim.vcf.gz -E exac.AC_Adj" +
|
||||
" -o %s", 1,
|
||||
Arrays.asList(MD5));
|
||||
executeTest("Testing allele trimming annotation", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStrandBiasBySample() throws IOException {
|
||||
// pipeline 1: create variant via HalotypeCaller with no default annotations
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
|||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -318,12 +319,10 @@ public class VariantAnnotatorEngine {
|
|||
final boolean isMultiAllelic = expressionVC.getNAlleles() > 2;
|
||||
final boolean useRefAndAltAlleles = VCFHeaderLineCount.R == hInfo.getCountType();
|
||||
final boolean useAltAlleles = VCFHeaderLineCount.A == hInfo.getCountType();
|
||||
List<Allele> usedExpressionAlleles = null;
|
||||
|
||||
// Multiallelic and count of A or R
|
||||
if ( isMultiAllelic && (useAltAlleles || useRefAndAltAlleles) ){
|
||||
// get the alleles common to the expression and variant
|
||||
final List<Allele> usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles();
|
||||
final List<Allele> commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles());
|
||||
|
||||
// remove brackets and spaces from expression attribute
|
||||
final String cleanedExpression = expressionVC.getAttribute(expression.fieldName).toString().replaceAll("[\\[\\]\\s]", "");
|
||||
|
|
@ -332,18 +331,58 @@ public class VariantAnnotatorEngine {
|
|||
final Map<String, String> mapAlleleToExpressionValue = new HashMap<String, String>();
|
||||
|
||||
// get comma separated expression values
|
||||
final String [] expressionValues = cleanedExpression.split(",");
|
||||
ArrayList<String> expressionValuesList = new ArrayList<String>(Arrays.asList(cleanedExpression.split(",")));
|
||||
|
||||
if ( vc.isSNP() && expressionVC.isMixed() ){
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(expressionVC);
|
||||
List<Allele> sameLengthAlleles = new ArrayList<Allele>();
|
||||
|
||||
// get alt alleles that are the same length as the ref allele
|
||||
Iterator<String> expressionValuesIterator = expressionValuesList.iterator();
|
||||
for ( Allele allele : expressionVC.getAlleles() ){
|
||||
if ( allele.isNonReference() ){
|
||||
if ( !expressionValuesIterator.hasNext() ){
|
||||
logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for expression allele): " + allele);
|
||||
break;
|
||||
}
|
||||
expressionValuesIterator.next();
|
||||
if ( allele.length() == expressionVC.getReference().length() ) {
|
||||
sameLengthAlleles.add(allele);
|
||||
}
|
||||
else {
|
||||
// remove unused expression values
|
||||
expressionValuesIterator.remove();
|
||||
}
|
||||
} else {
|
||||
if ( useRefAndAltAlleles )
|
||||
expressionValuesIterator.remove();
|
||||
}
|
||||
}
|
||||
|
||||
if (!sameLengthAlleles.isEmpty()) {
|
||||
sameLengthAlleles.add(0, expressionVC.getReference());
|
||||
VariantContext variantContext = builder.alleles(sameLengthAlleles).make();
|
||||
// extract the SNPs
|
||||
VariantContext variantContextTrimmed = GATKVariantContextUtils.trimAlleles(variantContext, true, true);
|
||||
usedExpressionAlleles = useRefAndAltAlleles ? variantContextTrimmed.getAlleles() : variantContextTrimmed.getAlternateAlleles();
|
||||
}
|
||||
} else {
|
||||
// get the alleles common to the expression and variant
|
||||
usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles();
|
||||
}
|
||||
|
||||
final List<Allele> commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles());
|
||||
|
||||
// the number of expression values must be the same as the number of alleles
|
||||
if ( expressionValues.length != usedExpressionAlleles.size() ) {
|
||||
if ( expressionValuesList.size() != usedExpressionAlleles.size() ) {
|
||||
logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for variant allele(s): " + vc.getAlleles() + ", " +
|
||||
expressionValues.length + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles");
|
||||
expressionValuesList.size() + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles");
|
||||
continue;
|
||||
}
|
||||
|
||||
// map the used expression alleles to it's value
|
||||
for (int i = 0; i != expressionValues.length; i++)
|
||||
mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValues[i]);
|
||||
for (int i = 0; i != expressionValuesList.size(); i++)
|
||||
mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValuesList.get(i));
|
||||
|
||||
// add the variants expression values to the annotation
|
||||
final List<String> annotationValues = new ArrayList<String>();
|
||||
|
|
|
|||
Loading…
Reference in New Issue