Merge pull request #785 from broadinstitute/rhl_variant_multiallelic_annotations
Rhl variant multiallelic annotations
This commit is contained in:
commit
071389b507
|
|
@ -212,6 +212,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingExpressionMultiAllele() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations-multiAllele.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty-multiAllele.vcf -E foo.AF -E foo.AC -L " + privateTestDir + "vcfexample3empty-multiAllele.vcf", 1,
|
||||
Arrays.asList("24d56a0fdb4b99b8b2ff193b6ebdc77c"));
|
||||
executeTest("using expression with multi-alleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -224,7 +224,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<>();
|
||||
hInfo.addAll(engine.getVCFAnnotationDescriptions());
|
||||
for ( final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variantCollection.variants.getName())) ) {
|
||||
for ( final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName) ) {
|
||||
if ( isUniqueHeaderLine(line, hInfo) )
|
||||
hInfo.add(line);
|
||||
}
|
||||
|
|
@ -256,6 +256,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
}
|
||||
}
|
||||
|
||||
engine.makeHeaderInfoMap(hInfo);
|
||||
engine.invokeAnnotationInitializationMethods(hInfo);
|
||||
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
|
|
@ -293,8 +294,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
// get the variant contexts for all the variants at the location
|
||||
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
|
||||
if ( VCs.size() == 0 )
|
||||
if ( VCs.isEmpty() )
|
||||
return 0;
|
||||
|
||||
Collection<VariantContext> annotatedVCs = VCs;
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.vcf.*;
|
||||
import org.apache.commons.collections.ListUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
|
|
@ -44,6 +46,7 @@ import java.util.*;
|
|||
|
||||
|
||||
public class VariantAnnotatorEngine {
|
||||
private final static Logger logger = Logger.getLogger(VariantAnnotatorEngine.class);
|
||||
private List<InfoFieldAnnotation> requestedInfoAnnotations = Collections.emptyList();
|
||||
private List<GenotypeAnnotation> requestedGenotypeAnnotations = Collections.emptyList();
|
||||
private List<VAExpression> requestedExpressions = new ArrayList<>();
|
||||
|
|
@ -53,12 +56,15 @@ public class VariantAnnotatorEngine {
|
|||
|
||||
VariantOverlapAnnotator variantOverlapAnnotator = null;
|
||||
|
||||
// Map of info field name to info field
|
||||
private final Map<String, VCFInfoHeaderLine> hInfoMap = new HashMap<>();
|
||||
|
||||
protected static class VAExpression {
|
||||
|
||||
public String fullName, fieldName;
|
||||
public RodBinding<VariantContext> binding;
|
||||
|
||||
public VAExpression(String fullExpression, List<RodBinding<VariantContext>> bindings) {
|
||||
public VAExpression(String fullExpression, List<RodBinding<VariantContext>> bindings){
|
||||
final int indexOfDot = fullExpression.lastIndexOf(".");
|
||||
if ( indexOfDot == -1 )
|
||||
throw new UserException.BadArgumentValue(fullExpression, "it should be in rodname.value format");
|
||||
|
|
@ -94,6 +100,13 @@ public class VariantAnnotatorEngine {
|
|||
initializeDBs(toolkit);
|
||||
}
|
||||
|
||||
public void makeHeaderInfoMap(final Set<VCFHeaderLine> hInfo ){
|
||||
for ( VCFHeaderLine hLine : hInfo ) {
|
||||
if ( hLine instanceof VCFInfoHeaderLine )
|
||||
hInfoMap.put( ((VCFInfoHeaderLine)hLine).getID(), (VCFInfoHeaderLine)hLine);
|
||||
}
|
||||
}
|
||||
|
||||
// select specific expressions to use
|
||||
public void initializeExpressions(Set<String> expressionsToUse) {
|
||||
// set up the expressions
|
||||
|
|
@ -111,7 +124,7 @@ public class VariantAnnotatorEngine {
|
|||
}
|
||||
|
||||
private void excludeAnnotations(List<String> annotationsToExclude) {
|
||||
if ( annotationsToExclude.size() == 0 )
|
||||
if ( annotationsToExclude.isEmpty() )
|
||||
return;
|
||||
|
||||
final List<InfoFieldAnnotation> tempRequestedInfoAnnotations = new ArrayList<>(requestedInfoAnnotations.size());
|
||||
|
|
@ -186,7 +199,7 @@ public class VariantAnnotatorEngine {
|
|||
final Map<String, Object> infoAnnotations = new LinkedHashMap<>(vc.getAttributes());
|
||||
|
||||
// annotate expressions where available
|
||||
annotateExpressions(tracker, ref.getLocus(), infoAnnotations);
|
||||
annotateExpressions(tracker, ref.getLocus(), vc, infoAnnotations);
|
||||
|
||||
// go through all the requested info annotationTypes
|
||||
for ( final InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
|
|
@ -210,6 +223,7 @@ public class VariantAnnotatorEngine {
|
|||
final VariantContext vc) {
|
||||
//TODO we transform the read-likelihood into the Map^2 previous version for the sake of not changing of not changing annotation interface.
|
||||
//TODO should we change those interfaces?
|
||||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> annotationLikelihoods = readLikelihoods.toPerReadAlleleLikelihoodMap();
|
||||
return annotateContextForActiveRegion(tracker, annotationLikelihoods, vc);
|
||||
}
|
||||
|
|
@ -253,28 +267,96 @@ public class VariantAnnotatorEngine {
|
|||
return variantOverlapAnnotator.annotateOverlaps(tracker, variantOverlapAnnotator.annotateRsID(tracker, vc));
|
||||
}
|
||||
|
||||
private void annotateExpressions(final RefMetaDataTracker tracker, final GenomeLoc loc, final Map<String, Object> infoAnnotations) {
|
||||
/**
|
||||
* Annotate the requested expressions
|
||||
*
|
||||
* @param tracker ref meta data tracker (cannot be null)
|
||||
* @param loc the location on the genome
|
||||
* @param vc variant context to annotate
|
||||
* @param infoAnnotations the annotations for the requested expressions
|
||||
*/
|
||||
@Requires({"tracker != null && loc != null && vc != null"})
|
||||
private void annotateExpressions(final RefMetaDataTracker tracker, final GenomeLoc loc, final VariantContext vc, final Map<String, Object> infoAnnotations){
|
||||
|
||||
// each requested expression
|
||||
for ( final VAExpression expression : requestedExpressions ) {
|
||||
final Collection<VariantContext> VCs = tracker.getValues(expression.binding, loc);
|
||||
if ( VCs.size() == 0 )
|
||||
|
||||
// get the variant contexts for all the expressions at the location
|
||||
final Collection<VariantContext> expressionVCs = tracker.getValues(expression.binding, loc);
|
||||
if ( expressionVCs.isEmpty() )
|
||||
continue;
|
||||
|
||||
final VariantContext vc = VCs.iterator().next();
|
||||
// get the expression's variant context
|
||||
final VariantContext expressionVC = expressionVCs.iterator().next();
|
||||
|
||||
// special-case the ID field
|
||||
if ( expression.fieldName.equals("ID") ) {
|
||||
if ( vc.hasID() )
|
||||
infoAnnotations.put(expression.fullName, vc.getID());
|
||||
if ( expressionVC.hasID() )
|
||||
infoAnnotations.put(expression.fullName, expressionVC.getID());
|
||||
} else if (expression.fieldName.equals("ALT")) {
|
||||
infoAnnotations.put(expression.fullName, vc.getAlternateAllele(0).getDisplayString());
|
||||
infoAnnotations.put(expression.fullName, expressionVC.getAlternateAllele(0).getDisplayString());
|
||||
} else if ( expressionVC.hasAttribute(expression.fieldName) ) {
|
||||
// find the info field
|
||||
final VCFInfoHeaderLine hInfo = hInfoMap.get(expression.fullName);
|
||||
if ( hInfo == null ){
|
||||
throw new UserException("Cannot annotate expression " + expression.fullName + " at " + loc + " for variant allele(s) " + vc.getAlleles() + ", missing header info");
|
||||
}
|
||||
|
||||
} else if ( vc.hasAttribute(expression.fieldName) ) {
|
||||
infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
|
||||
// can not annotate if more variant than expression alleles
|
||||
if ( expressionVC.getNAlleles() < vc.getNAlleles() ) {
|
||||
logger.warn("Skipping expression " + expression.fullName + " at " + loc + ", can not match " + expressionVC.getNAlleles() + " in the expression to " +
|
||||
vc.getNAlleles() + " in the variant");
|
||||
continue;
|
||||
}
|
||||
|
||||
//
|
||||
// Add the info field annotations
|
||||
//
|
||||
|
||||
final boolean isMultiAllelic = expressionVC.getNAlleles() > 2;
|
||||
final boolean useRefAndAltAlleles = VCFHeaderLineCount.R == hInfo.getCountType();
|
||||
final boolean useAltAlleles = VCFHeaderLineCount.A == hInfo.getCountType();
|
||||
|
||||
// Multiallelic and count of A or R
|
||||
if ( isMultiAllelic && (useAltAlleles || useRefAndAltAlleles) ){
|
||||
// get the alleles common to the expression and variant
|
||||
final List<Allele> usedExpressionAlleles = useRefAndAltAlleles ? expressionVC.getAlleles() : expressionVC.getAlternateAlleles();
|
||||
final List<Allele> commonAlleles = ListUtils.intersection(usedExpressionAlleles, vc.getAlleles());
|
||||
|
||||
// remove brackets and spaces from expression attribute
|
||||
final String cleanedExpression = expressionVC.getAttribute(expression.fieldName).toString().replaceAll("[\\[\\]\\s]", "");
|
||||
|
||||
// map where key = expression allele string value = expression value corresponding to the allele
|
||||
final Map<String, String> mapAlleleToExpressionValue = new HashMap<String, String>();
|
||||
|
||||
// get comma separated expression values
|
||||
final String [] expressionValues = cleanedExpression.split(",");
|
||||
|
||||
// the number of expression values must be the same as the number of alleles
|
||||
if ( expressionValues.length != usedExpressionAlleles.size() ) {
|
||||
logger.warn("Cannot annotate expression " + expression.fullName + " at " + loc + " for variant allele(s): " + vc.getAlleles() + ", " +
|
||||
expressionValues.length + " expression values is not equal to " + usedExpressionAlleles.size() + " expression alleles");
|
||||
continue;
|
||||
}
|
||||
|
||||
// map the used expression alleles to it's value
|
||||
for (int i = 0; i != expressionValues.length; i++)
|
||||
mapAlleleToExpressionValue.put(usedExpressionAlleles.get(i).getBaseString(), expressionValues[i]);
|
||||
|
||||
// add the variants expression values to the annotation
|
||||
final List<String> annotationValues = new ArrayList<String>();
|
||||
for (final Allele commonAllele : commonAlleles) {
|
||||
annotationValues.add(mapAlleleToExpressionValue.get(commonAllele.getBaseString()));
|
||||
}
|
||||
|
||||
infoAnnotations.put(expression.fullName, annotationValues);
|
||||
} else {
|
||||
infoAnnotations.put(expression.fullName, expressionVC.getAttribute(expression.fieldName));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private GenotypesContext annotateGenotypes(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
|
|
|
|||
Loading…
Reference in New Issue