Now, if a dbsnp rod is passed to either the UnifiedGenotyper or VariantAnnotator, a DB=0/1 annotation is added (in addition to filling in the ID field); this is in line with 1KG project calls. If no dbsnp rod is used, the annotation is not added (as opposed to setting every entry to DB=0).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2678 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-25 17:27:12 +00:00
parent 5d2f8aaa54
commit dc170caafc
2 changed files with 50 additions and 11 deletions

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.genotype.*;
@ -41,6 +42,9 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
private ArrayList<VariantAnnotation> requestedAnnotations;
// should we annotate dbsnp?
private boolean annotateDbsnp = false;
// mapping from class name to class
private static HashMap<String, VariantAnnotation> allAnnotations = null;
private static HashMap<String, VariantAnnotation> standardAnnotations = null;
@ -115,12 +119,24 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
}
}
// check to see whether a dbsnp rod was included
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
ReferenceOrderedData rod = source.getReferenceOrderedData();
if ( rod.getType().equals(rodDbSNP.class) ) {
annotateDbsnp = true;
break;
}
}
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations));
if ( annotateDbsnp )
hInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership"));
vcfWriter = new VCFWriter(VCF_OUT);
vcfHeader = new VCFHeader(hInfo, samples);
@ -166,7 +182,7 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
if ( stratifiedContexts != null )
annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations);
annotations = getAnnotations(tracker, ref, stratifiedContexts, variant, requestedAnnotations, annotateDbsnp);
}
writeVCF(tracker, ref, context, variant, annotations);
@ -208,24 +224,30 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
}
// option #1: don't specify annotations to be used: standard annotations are used by default
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp) {
if ( standardAnnotations == null )
determineAllAnnotations();
return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values());
return getAnnotations(tracker, ref, stratifiedContexts, variation, standardAnnotations.values(), annotateDbsnp);
}
// option #2: specify that all possible annotations be used
public static Map<String, String> getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
public static Map<String, String> getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, boolean annotateDbsnp) {
if ( allAnnotations == null )
determineAllAnnotations();
return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values());
return getAnnotations(tracker, ref, stratifiedContexts, variation, allAnnotations.values(), annotateDbsnp);
}
// option #3: specify the exact annotations to be used
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, Collection<VariantAnnotation> annotations) {
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation, Collection<VariantAnnotation> annotations, boolean annotateDbsnp) {
HashMap<String, String> results = new HashMap<String, String>();
// annotate dbsnp occurrence
if ( annotateDbsnp ) {
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
results.put(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1");
}
for ( VariantAnnotation annotator : annotations) {
String annot = annotator.annotate(tracker, ref, stratifiedContexts, variation);
if ( annot != null ) {
@ -236,13 +258,12 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
return results;
}
private void writeVCF(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariationRod variant, Map<String, String> annotations) {
VCFRecord rec = getVCFRecord(tracker, ref, context, variant);
if ( rec != null ) {
rec.addInfoFields(annotations);
// also, annotate dbsnp id if available and not already there
if ( rec.getID() == null || rec.getID().equals(".") ) {
if ( annotateDbsnp && (rec.getID() == null || rec.getID().equals(".")) ) {
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
if ( dbsnp != null )
rec.setID(dbsnp.getRS_ID());

View File

@ -26,8 +26,11 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
import org.broadinstitute.sting.utils.*;
@ -66,7 +69,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
// samples in input
private Set<String> samples = new HashSet<String>();
/** Enable deletions in the pileup **/
// should we annotate dbsnp?
private boolean annotateDbsnp = false;
// Enable deletions in the pileup
public boolean includeReadsWithDeletionAtLoci() { return true; }
/**
@ -162,6 +168,16 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
beagleWriter.println();
}
// check to see whether a dbsnp rod was included
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
ReferenceOrderedData rod = source.getReferenceOrderedData();
if ( rod.getType().equals(rodDbSNP.class) ) {
annotateDbsnp = true;
break;
}
}
// *** If we were called by another walker, then we don't ***
// *** want to do any of the other initialization steps. ***
if ( writer == null )
@ -192,6 +208,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
// annotation (INFO) fields from UnifiedGenotyper
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
if ( annotateDbsnp )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership"));
if ( !UAC.NO_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
@ -274,9 +292,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation);
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp);
else
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation);
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp);
((ArbitraryFieldsBacked)call.variation).setFields(annotations);
}