More allele trimming for VariantAnnotator
This commit is contained in:
parent
9c2dd29bd0
commit
9ff827c83a
|
|
@ -344,6 +344,17 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
executeTest("Testing InbreedingCoeff annotation with PED file", spec);
|
executeTest("Testing InbreedingCoeff annotation with PED file", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testAlleleTrimming() {
|
||||||
|
final String MD5 = "5f4b8dcbd4ec3b773486945e5b38e7f3";
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T VariantAnnotator -R " + b37KGReference + " -A InbreedingCoeff --variant:vcf " + privateTestDir + "alleleTrim.vcf.gz" +
|
||||||
|
" -L 1:26608870-26608875 -no_cmdline_in_header --resource:exac " + privateTestDir + "exacAlleleTrim.vcf.gz -E exac.AC_Adj" +
|
||||||
|
" -o %s", 1,
|
||||||
|
Arrays.asList(MD5));
|
||||||
|
executeTest("Testing allele trimming annotation", spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testStrandBiasBySample() throws IOException {
|
public void testStrandBiasBySample() throws IOException {
|
||||||
// pipeline 1: create variant via HalotypeCaller with no default annotations
|
// pipeline 1: create variant via HalotypeCaller with no default annotations
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
||||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||||
|
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -318,12 +319,10 @@ public class VariantAnnotatorEngine {
|
||||||
final boolean isMultiAllelic = expressionVC.getNAlleles() > 2;
|
final boolean isMultiAllelic = expressionVC.getNAlleles() > 2;
|
||||||
final boolean useRefAndAltAlleles = VCFHeaderLineCount.R == hInfo.getCountType();
|
final boolean useRefAndAltAlleles = VCFHeaderLineCount.R == hInfo.getCountType();
|
||||||
final boolean useAltAlleles = VCFHeaderLineCount.A == hInfo.getCountType();
|
final boolean useAltAlleles = VCFHeaderLineCount.A == hInfo.getCountType();
|
||||||
|
List<Allele> usedExpressionAlleles = null;
|
||||||
|
|
||||||
// Multiallelic and count of A or R
|
// Multiallelic and count of A or R
|
||||||
if ( isMultiAllelic && (useAltAlleles || useRefAndAltAlleles) ){
|
if ( isMultiAllelic && (useAltAlleles || useRefAndAltAlleles) ){
|
||||||
// get the alleles common to the expression and variant
|
|
||||||
final List<Allele> usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles();
|
|
||||||
final List<Allele> commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles());
|
|
||||||
|
|
||||||
// remove brackets and spaces from expression attribute
|
// remove brackets and spaces from expression attribute
|
||||||
final String cleanedExpression = expressionVC.getAttribute(expression.fieldName).toString().replaceAll("[\\[\\]\\s]", "");
|
final String cleanedExpression = expressionVC.getAttribute(expression.fieldName).toString().replaceAll("[\\[\\]\\s]", "");
|
||||||
|
|
@ -332,18 +331,58 @@ public class VariantAnnotatorEngine {
|
||||||
final Map<String, String> mapAlleleToExpressionValue = new HashMap<String, String>();
|
final Map<String, String> mapAlleleToExpressionValue = new HashMap<String, String>();
|
||||||
|
|
||||||
// get comma separated expression values
|
// get comma separated expression values
|
||||||
final String [] expressionValues = cleanedExpression.split(",");
|
ArrayList<String> expressionValuesList = new ArrayList<String>(Arrays.asList(cleanedExpression.split(",")));
|
||||||
|
|
||||||
|
if ( vc.isSNP() && expressionVC.isMixed() ){
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder(expressionVC);
|
||||||
|
List<Allele> sameLengthAlleles = new ArrayList<Allele>();
|
||||||
|
|
||||||
|
// get alt alleles that are the same length as the ref allele
|
||||||
|
Iterator<String> expressionValuesIterator = expressionValuesList.iterator();
|
||||||
|
for ( Allele allele : expressionVC.getAlleles() ){
|
||||||
|
if ( allele.isNonReference() ){
|
||||||
|
if ( !expressionValuesIterator.hasNext() ){
|
||||||
|
logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for expression allele): " + allele);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
expressionValuesIterator.next();
|
||||||
|
if ( allele.length() == expressionVC.getReference().length() ) {
|
||||||
|
sameLengthAlleles.add(allele);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// remove unused expression values
|
||||||
|
expressionValuesIterator.remove();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ( useRefAndAltAlleles )
|
||||||
|
expressionValuesIterator.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sameLengthAlleles.isEmpty()) {
|
||||||
|
sameLengthAlleles.add(0, expressionVC.getReference());
|
||||||
|
VariantContext variantContext = builder.alleles(sameLengthAlleles).make();
|
||||||
|
// extract the SNPs
|
||||||
|
VariantContext variantContextTrimmed = GATKVariantContextUtils.trimAlleles(variantContext, true, true);
|
||||||
|
usedExpressionAlleles = useRefAndAltAlleles ? variantContextTrimmed.getAlleles() : variantContextTrimmed.getAlternateAlleles();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// get the alleles common to the expression and variant
|
||||||
|
usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles();
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<Allele> commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles());
|
||||||
|
|
||||||
// the number of expression values must be the same as the number of alleles
|
// the number of expression values must be the same as the number of alleles
|
||||||
if ( expressionValues.length != usedExpressionAlleles.size() ) {
|
if ( expressionValuesList.size() != usedExpressionAlleles.size() ) {
|
||||||
logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for variant allele(s): " + vc.getAlleles() + ", " +
|
logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for variant allele(s): " + vc.getAlleles() + ", " +
|
||||||
expressionValues.length + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles");
|
expressionValuesList.size() + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// map the used expression alleles to it's value
|
// map the used expression alleles to it's value
|
||||||
for (int i = 0; i != expressionValues.length; i++)
|
for (int i = 0; i != expressionValuesList.size(); i++)
|
||||||
mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValues[i]);
|
mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValuesList.get(i));
|
||||||
|
|
||||||
// add the variants expression values to the annotation
|
// add the variants expression values to the annotation
|
||||||
final List<String> annotationValues = new ArrayList<String>();
|
final List<String> annotationValues = new ArrayList<String>();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue