First checkin of GenomicAnnotator which annotates an input VCF file by pulling data in a generic way from an arbitrary set of TabularRODs.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3114 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
weisburd 2010-04-02 17:49:42 +00:00
parent 699a0ea9d1
commit 6b7b07f178
5 changed files with 556 additions and 13 deletions

View File

@ -123,7 +123,7 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
*/
public Integer reduceInit() { return 0; }
/**
* We want reads that span deletions
*
@ -154,16 +154,20 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
return 0;
// if the reference base is not ambiguous, we can annotate
Collection<VariantContext> annotatedVCs = Arrays.asList( new VariantContext[] { vc } );
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
if ( stratifiedContexts != null ) {
vc = engine.annotateContext(tracker, ref, stratifiedContexts, vc);
annotatedVCs = engine.annotateContext(tracker, ref, stratifiedContexts, vc);
}
}
if ( variant instanceof RodVCF ) {
for(VariantContext annotatedVC : annotatedVCs ) {
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
}
}
if ( variant instanceof RodVCF )
vcfWriter.addRecord(VariantContextAdaptors.toVCF(vc, ref.getBase()));
return 1;
}

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.playground.gatk.walkers.annotator.GenomicAnnotation;
import org.broadinstitute.sting.utils.PackageUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
@ -33,6 +34,12 @@ public class VariantAnnotatorEngine {
// how about hapmap3?
private boolean annotateHapmap3 = false;
// command-line option used for GenomicAnnotation.
private Map<String, Set<String>> requestedColumnsMap;
// command-line option used for GenomicAnnotation.
private boolean explode;
// use this constructor if you want all possible annotations
public VariantAnnotatorEngine(GenomeAnalysisEngine engine) {
@ -143,7 +150,7 @@ public class VariantAnnotatorEngine {
return descriptions;
}
public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
public Collection<VariantContext> annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
Map<String, Object> infoAnnotations = new HashMap<String, Object>(vc.getAttributes());
@ -166,12 +173,50 @@ public class VariantAnnotatorEngine {
infoAnnotations.put(VCFRecord.HAPMAP3_KEY, hapmap3.size() == 0 ? "0" : "1");
}
for ( InfoFieldAnnotation annotation : requestedInfoAnnotations ) {
Map<String, Object> result = annotation.annotate(tracker, ref, stratifiedContexts, vc);
if ( result != null )
infoAnnotations.putAll(result);
//Process the info field
List<Map<String, Object>> infoAnnotationOutputsList = new LinkedList<Map<String, Object>>(); //each element in infoAnnotationOutputs corresponds to a single line in the output VCF file
infoAnnotationOutputsList.add(new HashMap<String, Object>(vc.getAttributes())); //keep the existing info-field annotations. After this infoAnnotationOutputsList.size() == 1, which means the output VCF file gains 1 line.
//go through all the requested info annotationTypes
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations )
{
Map<String, Object> annotationsFromCurrentType = annotationType.annotate(tracker, ref, stratifiedContexts, vc);
if ( annotationsFromCurrentType == null ) {
continue;
}
if(annotationType instanceof GenomicAnnotation)
{
//go through the annotations returned by GenericAnnotation for each -B input file.
for( Map.Entry<String, Object> annotationsFromInputFile : annotationsFromCurrentType.entrySet() )
{
final String inputFileBindingName = annotationsFromInputFile.getKey();
final List<Map<String, String>> matchingRecords = (List<Map<String, String>>) annotationsFromInputFile.getValue();
if( matchingRecords.size() > 1 && explode)
{
//More than one record matched in this file. After this, infoAnnotationOutputsList.size() will be infoAnnotationOutputsList.size()*matchingRecords.size().
infoAnnotationOutputsList = explodeInfoAnnotationOutputsList( infoAnnotationOutputsList, matchingRecords, inputFileBindingName);
}
else
{
//This doesn't change infoAnnotationOutputsList.size(). If more than one record matched, their annotations will
//all be added to the same output line, with keys disambiguated by appending _i .
addToExistingAnnotationOutputs( infoAnnotationOutputsList, matchingRecords, inputFileBindingName);
}
}
}
else
{
//add the annotations to each output line.
for(Map<String, Object> infoAnnotationOutput : infoAnnotationOutputsList) {
infoAnnotationOutput.putAll(annotationsFromCurrentType);
}
}
}
//Process genotypes
Map<String, Genotype> genotypes;
if ( requestedGenotypeAnnotations.size() == 0 ) {
genotypes = vc.getGenotypes();
@ -195,6 +240,161 @@ public class VariantAnnotatorEngine {
}
}
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.getFilters(), infoAnnotations);
//Create a separate VariantContext (aka. output line) for each element in infoAnnotationOutputsList
Collection<VariantContext> returnValue = new LinkedList<VariantContext>();
for(Map<String, Object> infoAnnotationOutput : infoAnnotationOutputsList) {
returnValue.add( new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.getFilters(), infoAnnotationOutput) );
}
return returnValue;
}
}
/**
* Implements non-explode mode, where the output lines have a one-to-one relationship
* with the input variants, and all multiple-match records are collapsed into the single info field.
* The collapsing is done by appending an _i to each key name (where 'i' is a record counter).
*
* @param infoAnnotationOutputsList
* @param matchingRecords
* @param bindingName
*/
private void addToExistingAnnotationOutputs(
final List<Map<String, Object>> infoAnnotationOutputsList,
final List<Map<String, String>> matchingRecords,
final String bindingName) {
//For each matching record, just add its annotations to all existing output lines.
final boolean renameKeys = matchingRecords.size() > 1;
for(int i = 0; i < matchingRecords.size(); i++) {
Map<String,String> annotationsForRecord = matchingRecords.get(i);
annotationsForRecord = selectColumnsFromRecord(bindingName, annotationsForRecord); //use only those columns that the user specifically requested.
if(renameKeys) {
//Rename keys to avoid naming conflicts (eg. if you have multiple dbsnp matches,
// dbSNP.avHet=value1 from record 1 and dbSNP.avHet=value2 from record 2 will become dbSNP.avHet_1=value1 and dbSNP.avHet_2=value2 )
Map<String,String> annotationsForRecordWithRenamedKeys = new HashMap<String, String>();
for(Map.Entry<String, String> annotation : annotationsForRecord.entrySet()) {
annotationsForRecordWithRenamedKeys.put(annotation.getKey() + "_" + i, annotation.getValue());
}
annotationsForRecord = annotationsForRecordWithRenamedKeys;
}
//Add the annotations from this record to each output line.
for(Map<String, Object> infoAnnotationOutput : infoAnnotationOutputsList) {
infoAnnotationOutput.putAll(annotationsForRecord);
}
}
}
/**
* Implements "explode" mode. Takes the current list of
* infoAnnotationOutputs (each element of will end up in a different line
* of the output VCF file), and generates/returns a new list of infoAnnotationOutputs
* which contain one copy of the current infoAnnotationOutputs for each record
* in matching records. The returned list will have size:
*
* infoAnnotationOutputsList.size() * matchingRecords.size()
*
* See class-level comments for more details.
*
* @param infoAnnotationOutputsList
* @param matchingRecords
* @param bindingName
* @return
*/
private List<Map<String, Object>> explodeInfoAnnotationOutputsList(
final List<Map<String, Object>> infoAnnotationOutputsList,
final List<Map<String, String>> matchingRecords,
final String bindingName) {
//This is the return value. It represents the new list of lines in the output VCF file.
final List<Map<String, Object>> newInfoAnnotationOutputsList = new LinkedList<Map<String, Object>>();
//For each matching record, generate a new output line
for(int i = 0; i < matchingRecords.size(); i++) {
Map<String,String> annotationsForRecord = matchingRecords.get(i);
annotationsForRecord = selectColumnsFromRecord(bindingName, annotationsForRecord); //use only those columns that the user specifically requested.
//Add the annotations from this record to each output line.
for(Map<String, Object> infoAnnotationOutput : infoAnnotationOutputsList) {
Map<String, Object> infoAnnotationOutputCopy = new HashMap<String, Object>(infoAnnotationOutput); //create a new copy of this line.
infoAnnotationOutputCopy.putAll(annotationsForRecord); //Adds the column-value pairs from this record to this line.
newInfoAnnotationOutputsList.add(infoAnnotationOutputCopy); //Add the line to the new list of lines.
}
}
return newInfoAnnotationOutputsList;
}
/**
* Takes a list of key-value pairs and returns a new Map containing only the columns which were requested by the user
* via the -s arg. If there was no -s arg that referenced the given bindingName, all annotationsForRecord returned untouched.
*
* @param bindingName The binding name for a particular ROD input file.
* @param annotationsForRecord The list of column_name -> value pairs for a particular record from the given input file.
*
* @return Map - see above.
*/
private Map<String, String> selectColumnsFromRecord( String bindingName, Map<String, String> annotationsForRecord) {
if(requestedColumnsMap == null || !requestedColumnsMap.containsKey(bindingName)) {
return annotationsForRecord;
}
Set<String> requestedColumns = requestedColumnsMap.get(bindingName);
Map<String, String> subsettedAnnotations = new HashMap<String, String>();
for(Map.Entry<String, String> e : annotationsForRecord.entrySet() ) {
if(requestedColumns.contains(e.getKey())) {
subsettedAnnotations.put(e.getKey(), e.getValue());
}
}
if(subsettedAnnotations.isEmpty()) {
throw new StingException("Invalid -s argument for the '" + bindingName + "' input file. " +
"It caused all columns in the file to be rejected. Please check to make sure the -s column " +
"names match the column names in the '" + bindingName + "' file's HEADER line.");
}
return subsettedAnnotations;
}
/**
* Determines how the engine will handle the case where multiple records in a ROD file
* overlap a particular single locus. If explode is set to true, the output will be
* one-to-many, so that each locus in the input VCF file could result in multiple
* entries in the output VCF file. Otherwise, the output will be one-to-one, and
* all multiple-match records will be collapsed into the single info field.
* The collapsing is done by appending an _i to each key name (where 'i' is a
* record counter).
*
* See class-level comments for more details.
*
* @param explode
*/
public void setExplode(boolean explode) {
this.explode = explode;
}
/**
* Sets the columns that will be used for the info annotation field.
* Column names should be of the form bindingName.columnName (eg. dbsnp.avHet).
*
* @param columns An array of strings where each string is a comma-separated list
* of columnNames (eg ["dbsnp.avHet,dbsnp.valid", "file2.col1,file3.col1"] ).
*/
public void setRequestedColumns(String[] columns) {
if(columns == null) {
throw new IllegalArgumentException("columns arg is null. Please check the -s command-line arg.");
}
//System.err.println("COLUMNS: "+Arrays.asList(columns).toString());
this.requestedColumnsMap = GenomicAnnotation.parseColumnsArg(columns);
}
}

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
@ -224,7 +225,8 @@ public class UnifiedGenotyperEngine {
// first off, we want to use the *unfiltered* context for the annotations
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
call.vc = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, call.vc);
Collection<VariantContext> variantContexts = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, call.vc);
call.vc = variantContexts.iterator().next(); //We know the collection will always have exactly 1 element.
}
}

View File

@ -0,0 +1,163 @@
package org.broadinstitute.sting.playground.gatk.walkers.annotator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.TabularROD;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
/**
* TODO comment
*/
public class GenomicAnnotation implements InfoFieldAnnotation {
/**
* For each ROD (aka. record) which overlaps the current locus, generates a
* set of annotations of the form:
*
* thisRodName.fieldName1=fieldValue, thisRodName.fieldName1=fieldValue (eg. dbSNP.avHet=0.7, dbSNP.ref_allele=A),
*
* These annotations are stored in a Map<String, String>.
*
* Since a single input file can have multiple records that overlap the current
* locus (eg. dbSNP can have multiple entries for the same location), a different
* Map<String, String> is created for each of these, resulting in a List<Map<String, String>>
* for each input file.
*
* The return value of this method is a Map of the form:
* rodName1 -> List<Map<String, String>>
* rodName2 -> List<Map<String, String>>
* rodName3 -> List<Map<String, String>>
* ...
*
* The List values are guaranteed to have size > 0, and in most cases will have size == 1.
*/
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final Map<String, StratifiedAlignmentContext> stratifiedContexts,
final VariantContext vc) {
final Map<String, Object> annotations = new HashMap<String, Object>();
for(final GATKFeature gatkFeature : tracker.getAllRods())
{
final ReferenceOrderedDatum rod = (ReferenceOrderedDatum) gatkFeature.getUnderlyingObject();
if(! (rod instanceof TabularROD) ) {
continue; //GenericAnnotation only works with TabularRODs so that it can select individual columns
}
final String name = rod.getName();
if(name.equals("variant") || name.equals("interval")) {
continue;
}
final Map<String, String> annotationsForRecord = convertRecordToAnnotations( (TabularROD) rod );
List<Map<String, String>> listOfMatchingRecords = (List<Map<String, String>>) annotations.get(name);
if(listOfMatchingRecords == null) {
listOfMatchingRecords = new LinkedList<Map<String,String>>();
listOfMatchingRecords.add( annotationsForRecord );
annotations.put(name, listOfMatchingRecords);
} else {
listOfMatchingRecords.add( annotationsForRecord );
}
}
return annotations;
}
/**
* Converts the ROD to a set of key-value pairs of the form:
* thisRodName.fieldName1=fieldValue, thisRodName.fieldName1=fieldValue
* (eg. dbSNP.avHet=0.7, dbSNP.ref_allele=A)
*
* @param rod A TabularROD corresponding to one record in one input file.
*
* @return The map of column-name -> value pairs.
*/
private Map<String, String> convertRecordToAnnotations( final TabularROD rod ) {
final String rodName = rod.getName(); //aka the rod binding
final Map<String, String> result = new HashMap<String, String>();
for(final Entry<String, String> entry : rod.entrySet()) {
result.put( generateInfoFieldKey(rodName, entry.getKey()), entry.getValue());
}
return result;
}
public static String generateInfoFieldKey(String rodBindingName, String columnName ) {
return rodBindingName + "." + columnName;
}
/**
* Parses the columns arg and returns a Map of columns hashed by their binding name.
* For example:
* The command line:
* -s dbSnp.valid,dbsnp.avHet -s refGene.txStart,refGene.txEnd
*
* will be passed to this method as:
* ["dbSnp.valid,dbsnp.avHet", "refGene.txStart,refGene.txEnd"]
*
* resulting in a return value of:
* {
* "dbSnp" -> "dbSnp.valid" ,
* "dbSnp" -> "dbsnp.avHet" ,
* "refGene" -> "refGene.txStart",
* "refGene" -> "refGene.txEnd"
* }
*
* @param columnsArg The -s command line arg value.
*
* @return Map representing a parsed version of this arg - see above.
*/
public static Map<String, Set<String>> parseColumnsArg(String[] columnsArg) {
Map<String, Set<String>> result = new HashMap<String, Set<String>>();
for(String s : columnsArg) {
for(String columnSpecifier : s.split(",") ) {
String[] rodNameColumnName = columnSpecifier.split("\\.");
if(rodNameColumnName.length != 2) {
throw new IllegalArgumentException("The following column specifier in the -s arg is invalid: [" + columnSpecifier + "]. It must be of the form 'bindingName.columnName'.");
}
String rodName = rodNameColumnName[0];
//String columnName = rodNameColumnName[1];
Set<String> requestedColumns = result.get(rodName);
if(requestedColumns == null) {
requestedColumns = new HashSet<String>();
result.put(rodName, requestedColumns);
}
requestedColumns.add(columnSpecifier);
}
}
return result;
}
public VCFInfoHeaderLine getDescription() {
return new VCFInfoHeaderLine("GenericAnnotation", 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "For each variant in the 'variants' ROD, finds all entries in the other -B files that overlap the variant's position. ");
}
public String getKeyName() {
return "GenericAnnotation";
}
}

View File

@ -0,0 +1,174 @@
package org.broadinstitute.sting.playground.gatk.walkers.annotator;
import java.io.File;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.Allows;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
/**
* Annotates variant calls with context information. Users can specify which of the available annotations to use.
*/
//@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariationRod.class))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@Reference(window=@Window(start=-50,stop=50))
@By(DataSource.REFERENCE)
public class GenomicAnnotator extends RodWalker<Integer, Integer> {
@Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true)
protected File VCF_OUT;
@Argument(fullName="sampleName", shortName="sample", doc="The sample (NA-ID) corresponding to the variant input (for non-VCF input only)", required=false)
protected String sampleName = null;
@Argument(fullName="select", shortName="s", doc="Select which columns to use for each ROD file. Column #s are 0-based. (eg. The following will select columns 5,6,2 from file1.txt and columns 3,7 from file2.txt: -B my-rod,table,/path/file1.txt -B my-rod2,table,/path/file2.txt -S my-rod={5,6,2} -S my-rod2={3,7})", required=false)
protected String[] COLUMNS = {};
@Argument(fullName="explode", shortName="exp", doc="If more than one record from the same file matches a particular locus, create multiple entries in the ouptut file - one for each match. WARNING: This could lead to combinatorial explotion if more than one file have more than one match at a particular locus.", required=false)
protected Boolean EXPLODE = false;
private VCFWriter vcfWriter;
private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>();
private VariantAnnotatorEngine engine;
/**
* Prepare the output file and the list of available features.
*/
public void initialize() {
// get the list of all sample names from the various VCF input rods
TreeSet<String> samples = new TreeSet<String>();
SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap<Pair<String, String>, String>());
// add the non-VCF sample from the command-line, if applicable
if ( sampleName != null ) {
nonVCFsampleName.put(sampleName.toUpperCase(), "variant");
samples.add(sampleName.toUpperCase());
}
// if there are no valid samples, warn the user
if ( samples.size() == 0 ) {
logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");
}
engine = new VariantAnnotatorEngine(getToolkit(), new String[] { }, new String[] { "GenomicAnnotation" });
engine.setExplode( Boolean.TRUE.equals( EXPLODE ) );
engine.setRequestedColumns(COLUMNS);
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "Annotator"));
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
hInfo.addAll(engine.getVCFAnnotationDescriptions());
vcfWriter = new VCFWriter(VCF_OUT);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);
}
/**
* Initialize the number of loci processed to zero.
*
* @return 0
*/
public Integer reduceInit() { return 0; }
/**
* We want reads that span deletions
*
* @return true
*/
public boolean includeReadsWithDeletionAtLoci() { return true; }
/**
* For each site of interest, annotate based on the requested annotation types
*
* @param tracker the meta-data tracker
* @param ref the reference base
* @param context the context for the given locus
* @return 1 if the locus was successfully processed, 0 if otherwise
*/
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return 0;
List<Object> rods = tracker.getReferenceMetaData("variant");
// ignore places where we don't have a variant
if ( rods.size() == 0 )
return 0;
Object variant = rods.get(0);
VariantContext vc = VariantContextAdaptors.toVariantContext("variant", variant);
if ( vc == null )
return 0;
// if the reference base is not ambiguous, we can annotate
Collection<VariantContext> annotatedVCs = null;
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
if ( stratifiedContexts != null ) {
annotatedVCs = engine.annotateContext(tracker, ref, stratifiedContexts, vc);
}
}
for(VariantContext annotatedVC : annotatedVCs) {
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
}
return 1;
}
/**
* Increment the number of loci processed.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return the new number of loci processed.
*/
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
/**
* Tell the user the number of loci processed and close out the new variants file.
*
* @param result the number of loci seen.
*/
public void onTraversalDone(Integer result) {
out.printf("Processed %d loci.\n", result);
vcfWriter.close();
}
}