Refactored and generalized the database/comp annotations in VariantAnnotator. Now one can provide comp tracks as with VariantEval (e.g. compHapMap, comp1KG_CEU) and the INFO field will be annotated with the track name (without the 'comp') if the variant record overlaps a comp site (e.g. ...;1KG_CEU;...). This means that you can now pass 1kg calls to the Unified Genotyper and automatically have records annotated with their presence in 1kg.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3684 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-06-30 16:37:31 +00:00
parent cf910d9cc2
commit 944dbb94ce
3 changed files with 29 additions and 45 deletions

View File

@ -34,13 +34,13 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PackageUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
@ -52,7 +52,7 @@ import java.util.*;
/**
* Annotates variant calls with context information. Users can specify which of the available annotations to use.
*/
//@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariationRod.class))
@Requires(value={},referenceMetaData=@RMD(name="variant",type=ReferenceOrderedDatum.class))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@Reference(window=@Window(start=-50,stop=50))
@By(DataSource.REFERENCE)
@ -114,9 +114,10 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
if ( LIST )
listAnnotationsAndExit();
// get the list of all sample names from the various VCF input rods
TreeSet<String> samples = new TreeSet<String>();
SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap<Pair<String, String>, String>());
// get the list of all sample names from the variant VCF input rod, if applicable
Set<String> rodName = new HashSet<String>();
rodName.add("variant");
TreeSet<String> samples = new TreeSet<String>(SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName));
// add the non-VCF sample from the command-line, if applicable
if ( sampleName != null ) {

View File

@ -48,7 +48,6 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
@ -61,17 +60,14 @@ import org.broadinstitute.sting.utils.classloader.PackageUtils;
public class VariantAnnotatorEngine {
public static final String dbPrefix = "comp";
private ArrayList<InfoFieldAnnotation> requestedInfoAnnotations;
private ArrayList<GenotypeAnnotation> requestedGenotypeAnnotations;
// should we annotate dbsnp?
private boolean annotateDbsnp = false;
// how about hapmap2?
private boolean annotateHapmap2 = false;
// how about hapmap3?
private boolean annotateHapmap3 = false;
private HashMap<String, String> dbAnnotations = new HashMap<String, String>();
// command-line option from GenomicAnnotator.
// command-line option from GenomicAnnotator.
private Map<String, Set<String>> requestedColumnsMap;
// command-line option from GenomicAnnotator.
@ -164,15 +160,11 @@ public class VariantAnnotatorEngine {
// check to see whether a dbsnp rod was included
List<ReferenceOrderedDataSource> dataSources = engine.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
annotateDbsnp = true;
if ( source.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
dbAnnotations.put(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, VCFRecord.DBSNP_KEY);
}
if ( rod.getName().equals("hapmap2") ) {
annotateHapmap2 = true;
}
if ( rod.getName().equals("hapmap3") ) {
annotateHapmap3 = true;
else if ( source.getName().startsWith(dbPrefix) ) {
dbAnnotations.put(source.getName(), source.getName().substring(dbPrefix.length()));
}
}
}
@ -185,12 +177,8 @@ public class VariantAnnotatorEngine {
descriptions.addAll(annotation.getDescriptions());
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations )
descriptions.addAll(annotation.getDescriptions());
if ( annotateDbsnp )
descriptions.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Flag, "dbSNP Membership"));
if ( annotateHapmap2 )
descriptions.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Flag, "Hapmap 2 Membership"));
if ( annotateHapmap3 )
descriptions.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Flag, "Hapmap 3 Membership"));
for ( Map.Entry<String, String> dbSet : dbAnnotations.entrySet() )
descriptions.add(new VCFInfoHeaderLine(dbSet.getValue(), 0, VCFInfoHeaderLine.INFO_TYPE.Flag, (dbSet.getKey().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ? "dbSNP" : dbSet.getValue()) + " Membership"));
return descriptions;
}
@ -199,23 +187,18 @@ public class VariantAnnotatorEngine {
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
// annotate dbsnp occurrence
if ( annotateDbsnp ) {
DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
infoAnnotations.put(VCFRecord.DBSNP_KEY, dbsnp == null ? false : true);
// annotate dbsnp id if available and not already there
if ( dbsnp != null && (!vc.hasAttribute("ID") || vc.getAttribute("ID").equals(VCFRecord.EMPTY_ID_FIELD)) )
infoAnnotations.put("ID", dbsnp.getRsID());
}
if ( annotateHapmap2 ) {
List<Object> hapmap2 = tracker.getReferenceMetaData("hapmap2");
infoAnnotations.put(VCFRecord.HAPMAP2_KEY, hapmap2.size() == 0 ? false : true);
}
if ( annotateHapmap3 ) {
List<Object> hapmap3 = tracker.getReferenceMetaData("hapmap3");
infoAnnotations.put(VCFRecord.HAPMAP3_KEY, hapmap3.size() == 0 ? false : true);
// annotate db occurrences
for ( Map.Entry<String, String> dbSet : dbAnnotations.entrySet() ) {
if ( dbSet.getKey().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
infoAnnotations.put(VCFRecord.DBSNP_KEY, dbsnp == null ? false : true);
// annotate dbsnp id if available and not already there
if ( dbsnp != null && (!vc.hasAttribute("ID") || vc.getAttribute("ID").equals(VCFRecord.EMPTY_ID_FIELD)) )
infoAnnotations.put("ID", dbsnp.getRsID());
} else {
List<Object> dbRod = tracker.getReferenceMetaData(dbSet.getKey());
infoAnnotations.put(dbSet.getValue(), dbRod.size() == 0 ? false : true);
}
}
//Process the info field

View File

@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTag() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("c3e0361af8e98bda1bc3a27260cb2c4a"));
Arrays.asList("39a6b268e3edc1eee5f71d77460e448e"));
executeTest("getting DB tag", spec);
}
}