Fix mmultiallelic info field annotation for VariantAnnotator
Add multi-allele test for info field annotations Fix to process all types of INFO annotations roll back to previous version, removes INFO and FORMAT Correct @return for VariantAnnotatorEngine.getNonReferenceAlleles() Enhance comments and clean up multi-allelic logic, handle header info number = R only parse counts of A & R Add INFO for AC update MD5 Performance enhancement, only parse multiallelic with a count A or R Make argument final in getNonReferenceAlleles() Code cleanup, add exceptions for bad expression/allele size mismatch and missing header info for an expression Change exception to warning for expression value/number of alleles check remove adevertised exceptions
This commit is contained in:
parent
3deaa4832c
commit
08790e1dab
|
|
@ -212,6 +212,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
executeTest("using expression", spec);
|
executeTest("using expression", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUsingExpressionMultiAllele() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations-multiAllele.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty-multiAllele.vcf -E foo.AF -E foo.AC -L " + privateTestDir + "vcfexample3empty-multiAllele.vcf", 1,
|
||||||
|
Arrays.asList("24d56a0fdb4b99b8b2ff193b6ebdc77c"));
|
||||||
|
executeTest("using expression with multi-alleles", spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUsingExpressionWithID() {
|
public void testUsingExpressionWithID() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
|
|
||||||
|
|
@ -224,7 +224,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
||||||
final Set<VCFHeaderLine> hInfo = new HashSet<>();
|
final Set<VCFHeaderLine> hInfo = new HashSet<>();
|
||||||
hInfo.addAll(engine.getVCFAnnotationDescriptions());
|
hInfo.addAll(engine.getVCFAnnotationDescriptions());
|
||||||
for ( final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variantCollection.variants.getName())) ) {
|
for ( final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName) ) {
|
||||||
if ( isUniqueHeaderLine(line, hInfo) )
|
if ( isUniqueHeaderLine(line, hInfo) )
|
||||||
hInfo.add(line);
|
hInfo.add(line);
|
||||||
}
|
}
|
||||||
|
|
@ -256,6 +256,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
engine.makeHeaderInfoMap(hInfo);
|
||||||
engine.invokeAnnotationInitializationMethods(hInfo);
|
engine.invokeAnnotationInitializationMethods(hInfo);
|
||||||
|
|
||||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||||
|
|
@ -293,8 +294,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
if ( tracker == null )
|
if ( tracker == null )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
// get the variant contexts for all the variants at the location
|
||||||
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
|
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
|
||||||
if ( VCs.size() == 0 )
|
if ( VCs.isEmpty() )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
Collection<VariantContext> annotatedVCs = VCs;
|
Collection<VariantContext> annotatedVCs = VCs;
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,8 @@ import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import htsjdk.variant.variantcontext.*;
|
import htsjdk.variant.variantcontext.*;
|
||||||
import htsjdk.variant.vcf.*;
|
import htsjdk.variant.vcf.*;
|
||||||
|
import org.apache.commons.collections.ListUtils;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||||
|
|
@ -44,6 +46,7 @@ import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
public class VariantAnnotatorEngine {
|
public class VariantAnnotatorEngine {
|
||||||
|
private final static Logger logger = Logger.getLogger(VariantAnnotatorEngine.class);
|
||||||
private List<InfoFieldAnnotation> requestedInfoAnnotations = Collections.emptyList();
|
private List<InfoFieldAnnotation> requestedInfoAnnotations = Collections.emptyList();
|
||||||
private List<GenotypeAnnotation> requestedGenotypeAnnotations = Collections.emptyList();
|
private List<GenotypeAnnotation> requestedGenotypeAnnotations = Collections.emptyList();
|
||||||
private List<VAExpression> requestedExpressions = new ArrayList<>();
|
private List<VAExpression> requestedExpressions = new ArrayList<>();
|
||||||
|
|
@ -53,12 +56,15 @@ public class VariantAnnotatorEngine {
|
||||||
|
|
||||||
VariantOverlapAnnotator variantOverlapAnnotator = null;
|
VariantOverlapAnnotator variantOverlapAnnotator = null;
|
||||||
|
|
||||||
|
// Map of info field name to info field
|
||||||
|
private final Map<String, VCFInfoHeaderLine> hInfoMap = new HashMap<>();
|
||||||
|
|
||||||
protected static class VAExpression {
|
protected static class VAExpression {
|
||||||
|
|
||||||
public String fullName, fieldName;
|
public String fullName, fieldName;
|
||||||
public RodBinding<VariantContext> binding;
|
public RodBinding<VariantContext> binding;
|
||||||
|
|
||||||
public VAExpression(String fullExpression, List<RodBinding<VariantContext>> bindings) {
|
public VAExpression(String fullExpression, List<RodBinding<VariantContext>> bindings){
|
||||||
final int indexOfDot = fullExpression.lastIndexOf(".");
|
final int indexOfDot = fullExpression.lastIndexOf(".");
|
||||||
if ( indexOfDot == -1 )
|
if ( indexOfDot == -1 )
|
||||||
throw new UserException.BadArgumentValue(fullExpression, "it should be in rodname.value format");
|
throw new UserException.BadArgumentValue(fullExpression, "it should be in rodname.value format");
|
||||||
|
|
@ -94,6 +100,13 @@ public class VariantAnnotatorEngine {
|
||||||
initializeDBs(toolkit);
|
initializeDBs(toolkit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void makeHeaderInfoMap(final Set<VCFHeaderLine> hInfo ){
|
||||||
|
for ( VCFHeaderLine hLine : hInfo ) {
|
||||||
|
if ( hLine instanceof VCFInfoHeaderLine )
|
||||||
|
hInfoMap.put( ((VCFInfoHeaderLine)hLine).getID(), (VCFInfoHeaderLine)hLine);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// select specific expressions to use
|
// select specific expressions to use
|
||||||
public void initializeExpressions(Set<String> expressionsToUse) {
|
public void initializeExpressions(Set<String> expressionsToUse) {
|
||||||
// set up the expressions
|
// set up the expressions
|
||||||
|
|
@ -111,7 +124,7 @@ public class VariantAnnotatorEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void excludeAnnotations(List<String> annotationsToExclude) {
|
private void excludeAnnotations(List<String> annotationsToExclude) {
|
||||||
if ( annotationsToExclude.size() == 0 )
|
if ( annotationsToExclude.isEmpty() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
final List<InfoFieldAnnotation> tempRequestedInfoAnnotations = new ArrayList<>(requestedInfoAnnotations.size());
|
final List<InfoFieldAnnotation> tempRequestedInfoAnnotations = new ArrayList<>(requestedInfoAnnotations.size());
|
||||||
|
|
@ -186,7 +199,7 @@ public class VariantAnnotatorEngine {
|
||||||
final Map<String, Object> infoAnnotations = new LinkedHashMap<>(vc.getAttributes());
|
final Map<String, Object> infoAnnotations = new LinkedHashMap<>(vc.getAttributes());
|
||||||
|
|
||||||
// annotate expressions where available
|
// annotate expressions where available
|
||||||
annotateExpressions(tracker, ref.getLocus(), infoAnnotations);
|
annotateExpressions(tracker, ref.getLocus(), vc, infoAnnotations);
|
||||||
|
|
||||||
// go through all the requested info annotationTypes
|
// go through all the requested info annotationTypes
|
||||||
for ( final InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
for ( final InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||||
|
|
@ -210,6 +223,7 @@ public class VariantAnnotatorEngine {
|
||||||
final VariantContext vc) {
|
final VariantContext vc) {
|
||||||
//TODO we transform the read-likelihood into the Map^2 previous version for the sake of not changing of not changing annotation interface.
|
//TODO we transform the read-likelihood into the Map^2 previous version for the sake of not changing of not changing annotation interface.
|
||||||
//TODO should we change those interfaces?
|
//TODO should we change those interfaces?
|
||||||
|
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> annotationLikelihoods = readLikelihoods.toPerReadAlleleLikelihoodMap();
|
final Map<String, PerReadAlleleLikelihoodMap> annotationLikelihoods = readLikelihoods.toPerReadAlleleLikelihoodMap();
|
||||||
return annotateContextForActiveRegion(tracker, annotationLikelihoods, vc);
|
return annotateContextForActiveRegion(tracker, annotationLikelihoods, vc);
|
||||||
}
|
}
|
||||||
|
|
@ -253,28 +267,96 @@ public class VariantAnnotatorEngine {
|
||||||
return variantOverlapAnnotator.annotateOverlaps(tracker, variantOverlapAnnotator.annotateRsID(tracker, vc));
|
return variantOverlapAnnotator.annotateOverlaps(tracker, variantOverlapAnnotator.annotateRsID(tracker, vc));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void annotateExpressions(final RefMetaDataTracker tracker, final GenomeLoc loc, final Map<String, Object> infoAnnotations) {
|
/**
|
||||||
|
* Annotate the requested expressions
|
||||||
|
*
|
||||||
|
* @param tracker ref meta data tracker (cannot be null)
|
||||||
|
* @param loc the location on the genome
|
||||||
|
* @param vc variant context to annotate
|
||||||
|
* @param infoAnnotations the annotations for the requested expressions
|
||||||
|
*/
|
||||||
|
@Requires({"tracker != null && loc != null && vc != null"})
|
||||||
|
private void annotateExpressions(final RefMetaDataTracker tracker, final GenomeLoc loc, final VariantContext vc, final Map<String, Object> infoAnnotations){
|
||||||
|
|
||||||
|
// each requested expression
|
||||||
for ( final VAExpression expression : requestedExpressions ) {
|
for ( final VAExpression expression : requestedExpressions ) {
|
||||||
final Collection<VariantContext> VCs = tracker.getValues(expression.binding, loc);
|
|
||||||
if ( VCs.size() == 0 )
|
// get the variant contexts for all the expressions at the location
|
||||||
|
final Collection<VariantContext> expressionVCs = tracker.getValues(expression.binding, loc);
|
||||||
|
if ( expressionVCs.isEmpty() )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
final VariantContext vc = VCs.iterator().next();
|
// get the expression's variant context
|
||||||
|
final VariantContext expressionVC = expressionVCs.iterator().next();
|
||||||
|
|
||||||
// special-case the ID field
|
// special-case the ID field
|
||||||
if ( expression.fieldName.equals("ID") ) {
|
if ( expression.fieldName.equals("ID") ) {
|
||||||
if ( vc.hasID() )
|
if ( expressionVC.hasID() )
|
||||||
infoAnnotations.put(expression.fullName, vc.getID());
|
infoAnnotations.put(expression.fullName, expressionVC.getID());
|
||||||
} else if (expression.fieldName.equals("ALT")) {
|
} else if (expression.fieldName.equals("ALT")) {
|
||||||
infoAnnotations.put(expression.fullName, vc.getAlternateAllele(0).getDisplayString());
|
infoAnnotations.put(expression.fullName, expressionVC.getAlternateAllele(0).getDisplayString());
|
||||||
|
} else if ( expressionVC.hasAttribute(expression.fieldName) ) {
|
||||||
|
// find the info field
|
||||||
|
final VCFInfoHeaderLine hInfo = hInfoMap.get(expression.fullName);
|
||||||
|
if ( hInfo == null ){
|
||||||
|
throw new UserException("Cannot annotate expression " + expression.fullName + " at " + loc + " for variant allele(s) " + vc.getAlleles() + ", missing header info");
|
||||||
|
}
|
||||||
|
|
||||||
} else if ( vc.hasAttribute(expression.fieldName) ) {
|
// can not annotate if more variant than expression alleles
|
||||||
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
|
if ( expressionVC.getNAlleles() < vc.getNAlleles() ) {
|
||||||
|
logger.warn("Skipping expression " + expression.fullName + " at " + loc + ", can not match " + expressionVC.getNAlleles() + " in the expression to " +
|
||||||
|
vc.getNAlleles() + " in the variant");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Add the info field annotations
|
||||||
|
//
|
||||||
|
|
||||||
|
final boolean isMultiAllelic = expressionVC.getNAlleles() > 2;
|
||||||
|
final boolean useRefAndAltAlleles = VCFHeaderLineCount.R == hInfo.getCountType();
|
||||||
|
final boolean useAltAlleles = VCFHeaderLineCount.A == hInfo.getCountType();
|
||||||
|
|
||||||
|
// Multiallelic and count of A or R
|
||||||
|
if ( isMultiAllelic && (useAltAlleles || useRefAndAltAlleles) ){
|
||||||
|
// get the alleles common to the expression and variant
|
||||||
|
final List<Allele> usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles();
|
||||||
|
final List<Allele> commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles());
|
||||||
|
|
||||||
|
// remove brackets and spaces from expression attribute
|
||||||
|
final String cleanedExpression = expressionVC.getAttribute(expression.fieldName).toString().replaceAll("[\\[\\]\\s]", "");
|
||||||
|
|
||||||
|
// map where key = expression allele string value = expression value corresponding to the allele
|
||||||
|
final Map<String, String> mapAlleleToExpressionValue = new HashMap<String, String>();
|
||||||
|
|
||||||
|
// get comma separated expression values
|
||||||
|
final String [] expressionValues = cleanedExpression.split(",");
|
||||||
|
|
||||||
|
// the number of expression values must be the same as the number of alleles
|
||||||
|
if ( expressionValues.length != usedExpressionAlleles.size() ) {
|
||||||
|
logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for variant allele(s): " + vc.getAlleles() + ", " +
|
||||||
|
expressionValues.length + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// map the used expression alleles to it's value
|
||||||
|
for (int i = 0; i != expressionValues.length; i++)
|
||||||
|
mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValues[i]);
|
||||||
|
|
||||||
|
// add the variants expression values to the annotation
|
||||||
|
final List<String> annotationValues = new ArrayList<String>();
|
||||||
|
for (final Allele commonAllele : commonAlleles) {
|
||||||
|
annotationValues.add(mapAlleleToExpressionValue.get(commonAllele.getBaseString()));
|
||||||
|
}
|
||||||
|
|
||||||
|
infoAnnotations.put(expression.fullName, annotationValues);
|
||||||
|
} else {
|
||||||
|
infoAnnotations.put(expression.fullName, expressionVC.getAttribute(expression.fieldName));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private GenotypesContext annotateGenotypes(final RefMetaDataTracker tracker,
|
private GenotypesContext annotateGenotypes(final RefMetaDataTracker tracker,
|
||||||
final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts,
|
final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
final VariantContext vc,
|
final VariantContext vc,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue