Some significant changes to the annotator:

1. Annotations can now be "decorated" with any arbitrary interface description - not just standard or experimental.
2. Users can now not only specify specific annotations to use, but also the interface names from #1.  Any number of them can be specified, e.g. -G Standard -G Experimental -A RankSumTest.
3. These same arguments can be used with the Unified Genotyper for when it calls into the Annotator.
4. There are now two types of annotations: those that are applied to the INFO field and those that are applied to specific genotypes (the FORMAT field) in the VCF (however, I haven't implemented any of these latter annotations just yet; coming soon).



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3029 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-03-18 05:38:32 +00:00
parent 58a31bab6a
commit ee0e833616
37 changed files with 357 additions and 294 deletions

View File

@ -198,7 +198,8 @@ public class VariantContext {
this.commonInfo = new InferredGeneticContext(name, negLog10PError, filters, attributes); this.commonInfo = new InferredGeneticContext(name, negLog10PError, filters, attributes);
if ( alleles == null ) { throw new StingException("Alleles cannot be null"); } if ( alleles == null ) { throw new StingException("Alleles cannot be null"); }
this.alleles = Collections.unmodifiableSet(alleleCollectionToSet(new HashSet<Allele>(), alleles)); // we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles
this.alleles = Collections.unmodifiableSet(alleleCollectionToSet(new LinkedHashSet<Allele>(), alleles));
if ( genotypes == null ) { genotypes = NO_GENOTYPES; } if ( genotypes == null ) { genotypes = NO_GENOTYPES; }
this.genotypes = Collections.unmodifiableMap(genotypes); this.genotypes = Collections.unmodifiableMap(genotypes);

View File

@ -5,12 +5,13 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.TabularROD; import org.broadinstitute.sting.gatk.refdata.TabularROD;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
public class Alignability implements VariantAnnotation { public class Alignability implements InfoFieldAnnotation {
public String annotate(RefMetaDataTracker tracker, public String annotate(RefMetaDataTracker tracker,
ReferenceContext ref, ReferenceContext ref,

View File

@ -4,13 +4,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
public class AlleleBalance extends StandardVariantAnnotation { public class AlleleBalance implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) { public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -4,13 +4,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
public class DepthOfCoverage extends StandardVariantAnnotation { public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) { public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
int depth = 0; int depth = 0;

View File

@ -4,14 +4,14 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
public class GCContent implements VariantAnnotation { public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) { public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
double content = computeGCContent(ref); double content = computeGCContent(ref);

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation; import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
@ -11,7 +12,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
import java.util.Map; import java.util.Map;
public class HardyWeinberg implements VariantAnnotation { public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation {
private static final int MIN_SAMPLES = 10; private static final int MIN_SAMPLES = 10;
private static final int MIN_GENOTYPE_QUALITY = 10; private static final int MIN_GENOTYPE_QUALITY = 10;

View File

@ -4,13 +4,15 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
public class HomopolymerRun extends StandardVariantAnnotation { public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) { public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
@ -11,7 +12,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
public class LowMQ implements VariantAnnotation { public class LowMQ implements InfoFieldAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) { public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
double mq0 = 0; double mq0 = 0;

View File

@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
@ -11,7 +13,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
public class MappingQualityZero extends StandardVariantAnnotation { public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) { public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
int mq0 = 0; int mq0 = 0;

View File

@ -4,13 +4,15 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
import java.util.ArrayList; import java.util.ArrayList;
public class QualByDepth extends StandardVariantAnnotation { public class QualByDepth implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) { public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
final Map<String, Genotype> genotypes = vc.getGenotypes(); final Map<String, Genotype> genotypes = vc.getGenotypes();

View File

@ -4,18 +4,12 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
/** public class QualityAdjustedSecondBaseLod implements InfoFieldAnnotation, ExperimentalAnnotation {
* Created by IntelliJ IDEA.
* User: Ghost
* Date: Dec 19, 2009
* Time: 1:02:09 AM
* To change this template use File | Settings | File Templates.
*/
public class QualityAdjustedSecondBaseLod implements VariantAnnotation {
private final String KEY_NAME = "Qual_Adjusted_2blod"; private final String KEY_NAME = "Qual_Adjusted_2blod";
private final double CHI_LOD_MAX = -1000.0; private final double CHI_LOD_MAX = -1000.0;
private final SecondBaseSkew skewCalc = new SecondBaseSkew(); private final SecondBaseSkew skewCalc = new SecondBaseSkew();

View File

@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -14,7 +16,7 @@ import java.util.Map;
import java.util.ArrayList; import java.util.ArrayList;
public class RMSMappingQuality extends StandardVariantAnnotation { public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) { public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
ArrayList<Integer> qualities = new ArrayList<Integer>(); ArrayList<Integer> qualities = new ArrayList<Integer>();

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -12,7 +13,7 @@ import java.util.ArrayList;
import java.util.Map; import java.util.Map;
public abstract class RankSumTest implements VariantAnnotation { public abstract class RankSumTest implements InfoFieldAnnotation, WorkInProgressAnnotation {
private final static boolean DEBUG = false; private final static boolean DEBUG = false;
private static final double minPValue = 1e-10; private static final double minPValue = 1e-10;

View File

@ -9,18 +9,12 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import java.util.Map; import java.util.Map;
/** public class SecondBaseSkew implements InfoFieldAnnotation, ExperimentalAnnotation {
* Created by IntelliJ IDEA.
* User: chartl
* Date: Nov 16, 2009
* Time: 11:25:51 AM
* To change this template use File | Settings | File Templates.
*/
public class SecondBaseSkew implements VariantAnnotation {
private final static double epsilon = Math.pow(10.0,-12.0); private final static double epsilon = Math.pow(10.0,-12.0);
private final static String KEY_NAME = "2b_Chi"; private final static String KEY_NAME = "2b_Chi";
private final static double[] UNIFORM_ON_OFF_RATIO = {1.0/3.0, 2.0/3.0}; private final static double[] UNIFORM_ON_OFF_RATIO = {1.0/3.0, 2.0/3.0};

View File

@ -4,13 +4,15 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
public class SpanningDeletions extends StandardVariantAnnotation { public class SpanningDeletions implements InfoFieldAnnotation, StandardAnnotation {
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) { public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
int deletions = 0; int deletions = 0;

View File

@ -1,6 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
public abstract class StandardVariantAnnotation implements VariantAnnotation {
}

View File

@ -2,10 +2,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
@ -27,12 +28,12 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
protected File VCF_OUT; protected File VCF_OUT;
@Argument(fullName="sampleName", shortName="sample", doc="The sample (NA-ID) corresponding to the variant input (for non-VCF input only)", required=false) @Argument(fullName="sampleName", shortName="sample", doc="The sample (NA-ID) corresponding to the variant input (for non-VCF input only)", required=false)
protected String sampleName = null; protected String sampleName = null;
@Argument(fullName="annotations", shortName="A", doc="Annotation types to apply to variant calls", required=false) @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected String[] ANNOTATIONS; protected String[] annotationsToUse = {};
@Argument(fullName="includeExperimentalAnnotations", shortName="exp", doc="Use all possible annotations, including experimental ones", required=false) @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
protected String[] annotationClassesToUse = { };
@Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", required=false)
protected Boolean USE_ALL_ANNOTATIONS = false; protected Boolean USE_ALL_ANNOTATIONS = false;
@Argument(fullName="useStandardAnnotations", shortName="standard", doc="Use all standard annotations", required=false)
protected Boolean USE_STANDARD_ANNOTATIONS = false;
@Argument(fullName="list", shortName="ls", doc="List the available annotations and exit") @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit")
protected Boolean LIST = false; protected Boolean LIST = false;
@ -40,43 +41,19 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>(); private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>();
private ArrayList<VariantAnnotation> requestedAnnotations; private VariantAnnotatorEngine engine;
// should we annotate dbsnp?
private boolean annotateDbsnp = false;
// how about hapmap2?
private boolean annotateHapmap2 = false;
// how about hapmap3?
private boolean annotateHapmap3 = false;
// mapping from class name to class
private static HashMap<String, VariantAnnotation> allAnnotations = null;
private static HashMap<String, VariantAnnotation> standardAnnotations = null;
private static void determineAllAnnotations() { private void listAnnotationsAndExit() {
allAnnotations = new HashMap<String, VariantAnnotation>(); List<Class<? extends InfoFieldAnnotation>> infoAnnotationClasses = PackageUtils.getClassesImplementingInterface(InfoFieldAnnotation.class);
standardAnnotations = new HashMap<String, VariantAnnotation>(); out.println("\nAvailable annotations for the VCF INFO field:");
List<Class<? extends VariantAnnotation>> annotationClasses = PackageUtils.getClassesImplementingInterface(VariantAnnotation.class); for (int i = 0; i < infoAnnotationClasses.size(); i++)
for ( Class c : annotationClasses ) { out.println("\t" + infoAnnotationClasses.get(i).getSimpleName());
try { out.println();
VariantAnnotation annot = (VariantAnnotation) c.newInstance(); List<Class<? extends GenotypeAnnotation>> genotypeAnnotationClasses = PackageUtils.getClassesImplementingInterface(GenotypeAnnotation.class);
allAnnotations.put(c.getSimpleName().toUpperCase(), annot); out.println("\nAvailable annotations for the VCF FORMAT field:");
if ( annot instanceof StandardVariantAnnotation ) for (int i = 0; i < genotypeAnnotationClasses.size(); i++)
standardAnnotations.put(c.getSimpleName().toUpperCase(), annot); out.println("\t" + genotypeAnnotationClasses.get(i).getSimpleName());
} catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate annotation class '%s': must be concrete class", c.getSimpleName()));
} catch (IllegalAccessException e) {
throw new StingException(String.format("Cannot instantiate annotation class '%s': must have no-arg constructor", c.getSimpleName()));
}
}
}
private void listFiltersAndExit() {
List<Class<? extends VariantAnnotation>> annotationClasses = PackageUtils.getClassesImplementingInterface(VariantAnnotation.class);
out.println("\nAvailable annotations:");
for (int i = 0; i < annotationClasses.size(); i++)
out.println("\t" + annotationClasses.get(i).getSimpleName());
out.println(); out.println();
System.exit(0); System.exit(0);
} }
@ -87,7 +64,7 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
public void initialize() { public void initialize() {
if ( LIST ) if ( LIST )
listFiltersAndExit(); listAnnotationsAndExit();
// get the list of all sample names from the various VCF input rods // get the list of all sample names from the various VCF input rods
TreeSet<String> samples = new TreeSet<String>(); TreeSet<String> samples = new TreeSet<String>();
@ -104,53 +81,17 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired."); logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired.");
} }
determineAllAnnotations(); if ( USE_ALL_ANNOTATIONS )
engine = new VariantAnnotatorEngine(getToolkit());
if ( USE_STANDARD_ANNOTATIONS ) { else
requestedAnnotations = new ArrayList<VariantAnnotation>(standardAnnotations.values()); engine = new VariantAnnotatorEngine(getToolkit(), annotationClassesToUse, annotationsToUse);
} else if ( USE_ALL_ANNOTATIONS ) {
requestedAnnotations = new ArrayList<VariantAnnotation>(allAnnotations.values());
} else {
requestedAnnotations = new ArrayList<VariantAnnotation>();
if ( ANNOTATIONS != null ) {
for ( String requested : ANNOTATIONS ) {
VariantAnnotation annot = allAnnotations.get(requested.toUpperCase());
if ( annot == null )
throw new StingException("Unknown annotation '" + requested + "'. Issue the '-ls' argument to list available annotations.");
requestedAnnotations.add(annot);
}
}
}
// check to see whether a dbsnp rod was included
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
ReferenceOrderedData rod = source.getReferenceOrderedData();
if ( rod.getType().equals(rodDbSNP.class) ) {
annotateDbsnp = true;
}
if ( rod.getName().equals("hapmap2") ) {
annotateHapmap2 = true;
}
if ( rod.getName().equals("hapmap3") ) {
annotateHapmap3 = true;
}
}
// setup the header fields // setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator")); hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName())); hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations)); hInfo.addAll(engine.getVCFAnnotationDescriptions());
if ( annotateDbsnp )
hInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership"));
if ( annotateHapmap2 )
hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 2 membership"));
if ( annotateHapmap3 )
hInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY,1,VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 3 membership"));
vcfWriter = new VCFWriter(VCF_OUT); vcfWriter = new VCFWriter(VCF_OUT);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples); VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
@ -189,120 +130,31 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
if ( rods == null || rods.size() == 0 ) if ( rods == null || rods.size() == 0 )
return 0; return 0;
Map<String, String> annotations = new HashMap<String, String>();
ReferenceOrderedDatum variant = rods.get(0); ReferenceOrderedDatum variant = rods.get(0);
VariantContext vc = VariantContextAdaptors.toVariantContext("variant", variant); VariantContext vc = VariantContextAdaptors.toVariantContext("variant", variant);
if ( vc == null ) if ( vc == null )
return 0; return 0;
MutableVariantContext mvc = new MutableVariantContext(vc);
// if the reference base is not ambiguous, we can annotate // if the reference base is not ambiguous, we can annotate
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) { if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup()); Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
if ( stratifiedContexts != null ) if ( stratifiedContexts != null ) {
annotations = getAnnotations(tracker, ref, stratifiedContexts, vc, requestedAnnotations, annotateDbsnp, annotateHapmap2, annotateHapmap3); engine.annotateContext(tracker, ref, stratifiedContexts, mvc);
}
} }
VCFRecord record; if ( variant instanceof RodVCF ) {
if ( variant instanceof RodVCF ) RodVCF vcf = (RodVCF)variant;
record = ((RodVCF)variant).mCurrentRecord; vcfWriter.addRecord(VariantContextAdaptors.toVCF(mvc, ref.getBase(), Arrays.asList(vcf.getRecord().getGenotypeFormatString().split(VCFRecord.GENOTYPE_FIELD_SEPERATOR)), vcf.getFilterString() != null));
else } else {
record = VariantContextAdaptors.toVCF(vc, ref.getBase()); vcfWriter.addRecord(VariantContextAdaptors.toVCF(mvc, ref.getBase()));
}
record.addInfoFields(annotations);
writeVCF(tracker, record);
return 1; return 1;
} }
// option #1: don't specify annotations to be used: standard annotations are used by default
public static Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
if ( standardAnnotations == null )
determineAllAnnotations();
TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
for ( VariantAnnotation annotation : standardAnnotations.values() )
descriptions.add(annotation.getDescription());
return descriptions;
}
// option #2: specify that all possible annotations be used
public static Set<VCFHeaderLine> getAllVCFAnnotationDescriptions() {
if ( standardAnnotations == null )
determineAllAnnotations();
TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
for ( VariantAnnotation annotation : allAnnotations.values() )
descriptions.add(annotation.getDescription());
return descriptions;
}
// option #3: specify the exact annotations to be used
public static Set<VCFHeaderLine> getVCFAnnotationDescriptions(Collection<VariantAnnotation> annotations) {
TreeSet<VCFHeaderLine> descriptions = new TreeSet<VCFHeaderLine>();
for ( VariantAnnotation annotation : annotations )
descriptions.add(annotation.getDescription());
return descriptions;
}
// option #1: don't specify annotations to be used: standard annotations are used by default
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) {
if ( standardAnnotations == null )
determineAllAnnotations();
return getAnnotations(tracker, ref, stratifiedContexts, vc, standardAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3);
}
// option #2: specify that all possible annotations be used
public static Map<String, String> getAllAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) {
if ( allAnnotations == null )
determineAllAnnotations();
return getAnnotations(tracker, ref, stratifiedContexts, vc, allAnnotations.values(), annotateDbsnp, annotateHapmap2, annotateHapmap3);
}
// option #3: specify the exact annotations to be used
public static Map<String, String> getAnnotations(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc, Collection<VariantAnnotation> annotations, boolean annotateDbsnp, boolean annotateHapmap2, boolean annotateHapmap3) {
HashMap<String, String> results = new HashMap<String, String>();
// annotate dbsnp occurrence
if ( annotateDbsnp ) {
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
results.put(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1");
}
if ( annotateHapmap2 ) {
RODRecordList hapmap2 = tracker.getTrackData("hapmap2",null);
results.put(VCFRecord.HAPMAP2_KEY, hapmap2 == null? "0" : "1");
}
if ( annotateHapmap3 ) {
RODRecordList hapmap3 = tracker.getTrackData("hapmap3",null);
results.put( VCFRecord.HAPMAP3_KEY, hapmap3 == null ? "0" : "1");
}
for ( VariantAnnotation annotator : annotations) {
String annot = annotator.annotate(tracker, ref, stratifiedContexts, vc);
if ( annot != null ) {
results.put(annotator.getKeyName(), annot);
}
}
return results;
}
private void writeVCF(RefMetaDataTracker tracker, VCFRecord record) {
// annotate dbsnp id if available and not already there
if ( annotateDbsnp && (record.getID() == null || record.getID().equals(VCFRecord.EMPTY_ID_FIELD)) ) {
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
if ( dbsnp != null )
record.setID(dbsnp.getRS_ID());
}
vcfWriter.addRecord(record);
}
/** /**
* Increment the number of loci processed. * Increment the number of loci processed.
* *

View File

@ -0,0 +1,169 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.util.*;
public class VariantAnnotatorEngine {
private ArrayList<InfoFieldAnnotation> requestedInfoAnnotations;
private ArrayList<GenotypeAnnotation> requestedGenotypeAnnotations;
// should we annotate dbsnp?
private boolean annotateDbsnp = false;
// how about hapmap2?
private boolean annotateHapmap2 = false;
// how about hapmap3?
private boolean annotateHapmap3 = false;
// use this constructor if you want all possible annotations
public VariantAnnotatorEngine(GenomeAnalysisEngine engine) {
List<Class<? extends InfoFieldAnnotation>> infoAnnotationClasses = PackageUtils.getClassesImplementingInterface(InfoFieldAnnotation.class);
requestedInfoAnnotations = getInstances(infoAnnotationClasses);
List<Class<? extends GenotypeAnnotation>> genotypeAnnotationClasses = PackageUtils.getClassesImplementingInterface(GenotypeAnnotation.class);
requestedGenotypeAnnotations = getInstances(genotypeAnnotationClasses);
initialize(engine);
}
// use this constructor if you want to select specific annotations (and/or interfaces)
public VariantAnnotatorEngine(GenomeAnalysisEngine engine, String[] annotationClassesToUse, String[] annotationsToUse) {
// create a map for all annotation classes which implement our top-level interfaces
HashMap<String, Class> classMap = new HashMap<String, Class>();
for ( Class c : PackageUtils.getClassesImplementingInterface(InfoFieldAnnotation.class) )
classMap.put(c.getSimpleName(), c);
for ( Class c : PackageUtils.getClassesImplementingInterface(GenotypeAnnotation.class) )
classMap.put(c.getSimpleName(), c);
for ( Class c : PackageUtils.getInterfacesExtendingInterface(AnnotationType.class) )
classMap.put(c.getSimpleName(), c);
HashSet<Class> classes = new HashSet<Class>();
// get the classes from the provided groups (interfaces)
for ( String group : annotationClassesToUse ) {
Class interfaceClass = classMap.get(group);
if ( interfaceClass == null )
interfaceClass = classMap.get(group + "Annotation");
if ( interfaceClass == null )
throw new StingException("Class " + group + " is not found; please check that you have specified the class name correctly");
classes.addAll(PackageUtils.getClassesImplementingInterface(interfaceClass));
}
// get the specific classes provided
for ( String annotation : annotationsToUse ) {
Class annotationClass = classMap.get(annotation);
if ( annotationClass == null )
annotationClass = classMap.get(annotation + "Annotation");
if ( annotationClass == null )
throw new StingException("Class " + annotation + " is not found; please check that you have specified the class name correctly");
classes.add(annotationClass);
}
// get the instances
requestedInfoAnnotations = new ArrayList<InfoFieldAnnotation>();
requestedGenotypeAnnotations = new ArrayList<GenotypeAnnotation>();
for ( Class c : classes ) {
if ( InfoFieldAnnotation.class.isAssignableFrom(c) )
requestedInfoAnnotations.add((InfoFieldAnnotation)getInstance(c));
else if ( GenotypeAnnotation.class.isAssignableFrom(c) )
requestedGenotypeAnnotations.add((GenotypeAnnotation)getInstance(c));
}
initialize(engine);
}
private static <T> ArrayList<T> getInstances(List<Class<? extends T>> classes) {
ArrayList<T> objects = new ArrayList<T>();
for ( Class c : classes )
objects.add((T)getInstance(c));
return objects;
}
private static <T> T getInstance(Class<T> c) {
try {
return c.newInstance();
} catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate annotation class '%s': must be concrete class", c.getSimpleName()));
} catch (IllegalAccessException e) {
throw new StingException(String.format("Cannot instantiate annotation class '%s': must have no-arg constructor", c.getSimpleName()));
}
}
private void initialize(GenomeAnalysisEngine engine) {
// check to see whether a dbsnp rod was included
List<ReferenceOrderedDataSource> dataSources = engine.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
ReferenceOrderedData rod = source.getReferenceOrderedData();
if ( rod.getType().equals(rodDbSNP.class) ) {
annotateDbsnp = true;
}
if ( rod.getName().equals("hapmap2") ) {
annotateHapmap2 = true;
}
if ( rod.getName().equals("hapmap3") ) {
annotateHapmap3 = true;
}
}
}
public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();
for ( InfoFieldAnnotation annotation : requestedInfoAnnotations )
descriptions.add(annotation.getDescription());
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations )
descriptions.add(annotation.getDescription());
if ( annotateDbsnp )
descriptions.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership"));
if ( annotateHapmap2 )
descriptions.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 2 membership"));
if ( annotateHapmap3 )
descriptions.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Hapmap 3 membership"));
return descriptions;
}
public void annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, MutableVariantContext vc) {
// annotate dbsnp occurrence
if ( annotateDbsnp ) {
rodDbSNP dbsnp = rodDbSNP.getFirstRealSNP(tracker.getTrackData("dbsnp", null));
vc.putAttribute(VCFRecord.DBSNP_KEY, dbsnp == null ? "0" : "1");
// annotate dbsnp id if available and not already there
if ( dbsnp != null && !vc.hasAttribute("ID") )
vc.putAttribute("ID", dbsnp.getRS_ID());
}
if ( annotateHapmap2 ) {
RODRecordList hapmap2 = tracker.getTrackData("hapmap2",null);
vc.putAttribute(VCFRecord.HAPMAP2_KEY, hapmap2 == null? "0" : "1");
}
if ( annotateHapmap3 ) {
RODRecordList hapmap3 = tracker.getTrackData("hapmap3",null);
vc.putAttribute(VCFRecord.HAPMAP3_KEY, hapmap3 == null ? "0" : "1");
}
for ( InfoFieldAnnotation annotation : requestedInfoAnnotations ) {
String annot = annotation.annotate(tracker, ref, stratifiedContexts, vc);
if ( annot != null ) {
vc.putAttribute(annotation.getKeyName(), annot);
}
}
for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
annotation.annotateContext(tracker, ref, stratifiedContexts, vc);
}
}
}

View File

@ -0,0 +1,3 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
public interface AnnotationType {}

View File

@ -0,0 +1,3 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
public interface ExperimentalAnnotation extends AnnotationType {}

View File

@ -0,0 +1,22 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.genotype.vcf.VCFFormatHeaderLine;
import java.util.Map;
public interface GenotypeAnnotation {
// annotate the given record for the given variation and context split by sample
public void annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc);
// return the FORMAT key
public String getKeyName();
// return the description used for the VCF FORMAT meta field
public VCFFormatHeaderLine getDescription();
}

View File

@ -1,15 +1,14 @@
package org.broadinstitute.sting.gatk.walkers.annotator; package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map; import java.util.Map;
public interface InfoFieldAnnotation {
public interface VariantAnnotation {
// return the annotation for the given variation and context split by sample (return null for no annotation) // return the annotation for the given variation and context split by sample (return null for no annotation)
public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc); public String annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc);

View File

@ -0,0 +1,3 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
public interface StandardAnnotation extends AnnotationType {}

View File

@ -0,0 +1,3 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
public interface WorkInProgressAnnotation extends AnnotationType {}

View File

@ -53,9 +53,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false) @Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false)
public boolean NO_SLOD = false; public boolean NO_SLOD = false;
@Argument(fullName = "include_experimental_annotations", shortName = "exp", doc = "Annotate calls with all annotations, including experimental ones", required = false)
public boolean ALL_ANNOTATIONS = false;
// control the error modes // control the error modes
@Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false) @Argument(fullName = "assume_single_sample_reads", shortName = "single_sample", doc = "The single sample that we should assume is represented in the input bam (and therefore associate with all reads regardless of whether they have read groups)", required = false)

View File

@ -29,7 +29,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.*; import org.broadinstitute.sting.utils.cmdLine.*;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
@ -59,9 +59,18 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false) @Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
public PrintStream beagleWriter = null; public PrintStream beagleWriter = null;
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected String[] annotationsToUse = {};
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
protected String[] annotationClassesToUse = { "Standard" };
// the calculation arguments // the calculation arguments
private UnifiedGenotyperEngine UG_engine = null; private UnifiedGenotyperEngine UG_engine = null;
// the annotation engine
private VariantAnnotatorEngine annotationEngine;
// enable deletions in the pileup // enable deletions in the pileup
public boolean includeReadsWithDeletionAtLoci() { return true; } public boolean includeReadsWithDeletionAtLoci() { return true; }
@ -95,7 +104,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
**/ **/
public void initialize() { public void initialize() {
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, writer, verboseWriter, beagleWriter); annotationEngine = new VariantAnnotatorEngine(getToolkit(), annotationClassesToUse, annotationsToUse);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, writer, verboseWriter, beagleWriter, annotationEngine);
// initialize the writers // initialize the writers
if ( verboseWriter != null ) { if ( verboseWriter != null ) {
@ -129,11 +139,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
headerInfo.add(new VCFHeaderLine("source", "UnifiedGenotyper")); headerInfo.add(new VCFHeaderLine("source", "UnifiedGenotyper"));
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
// annotation (INFO) fields from VariantAnnotator // all annotation fields from VariantAnnotatorEngine
if ( UAC.ALL_ANNOTATIONS ) headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions());
else
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper // annotation (INFO) fields from UnifiedGenotyper
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency")); headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantConte
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.*;
@ -57,6 +57,9 @@ public class UnifiedGenotyperEngine {
// the unified argument collection // the unified argument collection
protected UnifiedArgumentCollection UAC = null; protected UnifiedArgumentCollection UAC = null;
// the annotation engine
protected VariantAnnotatorEngine annotationEngine;
// the model used for calculating genotypes // the model used for calculating genotypes
protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>(); protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
@ -71,20 +74,21 @@ public class UnifiedGenotyperEngine {
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
initialize(toolkit, UAC, null, null, null, null); initialize(toolkit, UAC, null, null, null, null, null);
} }
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter) { public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter, VariantAnnotatorEngine engine) {
initialize(toolkit, UAC, logger, genotypeWriter, verboseWriter, beagleWriter); initialize(toolkit, UAC, logger, genotypeWriter, verboseWriter, beagleWriter, engine);
} }
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter) { private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter, VariantAnnotatorEngine engine) {
this.UAC = UAC; this.UAC = UAC;
this.logger = logger; this.logger = logger;
this.genotypeWriter = genotypeWriter; this.genotypeWriter = genotypeWriter;
this.verboseWriter = verboseWriter; this.verboseWriter = verboseWriter;
this.beagleWriter = beagleWriter; this.beagleWriter = beagleWriter;
this.annotationEngine = engine;
// deal with input errors // deal with input errors
if ( UAC.genotypeModel == GenotypeCalculationModel.Model.INDELS && !(genotypeWriter instanceof VCFGenotypeWriter) ) { if ( UAC.genotypeModel == GenotypeCalculationModel.Model.INDELS && !(genotypeWriter instanceof VCFGenotypeWriter) ) {
@ -223,13 +227,7 @@ public class UnifiedGenotyperEngine {
if ( call != null && call.vc != null ) { if ( call != null && call.vc != null ) {
// first off, we want to use the *unfiltered* context for the annotations // first off, we want to use the *unfiltered* context for the annotations
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup()); stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, (MutableVariantContext)call.vc);
Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
else
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.vc, annotateDbsnp, annotateHapmap2, annotateHapmap3);
((MutableVariantContext)call.vc).putAttributes(annotations);
} }
} }

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers.annotator; package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
@ -19,7 +19,7 @@ import java.util.Map;
* Time: 2:48:15 PM * Time: 2:48:15 PM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class ProportionOfNonrefBasesSupportingSNP implements VariantAnnotation { public class ProportionOfNonrefBasesSupportingSNP implements InfoFieldAnnotation {
private String KEY_NAME = "prop_nonref_that_are_snp"; private String KEY_NAME = "prop_nonref_that_are_snp";
public String getKeyName() { return KEY_NAME; } public String getKeyName() { return KEY_NAME; }

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers.annotator; package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
@ -20,7 +20,7 @@ import java.util.Map;
* Time: 2:18:43 PM * Time: 2:18:43 PM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class ProportionOfRefSecondBasesSupportingSNP implements VariantAnnotation { public class ProportionOfRefSecondBasesSupportingSNP implements InfoFieldAnnotation {
private String KEY_NAME = "ref_2bb_snp_prop"; private String KEY_NAME = "ref_2bb_snp_prop";
private boolean USE_MAPQ0_READS = false; private boolean USE_MAPQ0_READS = false;

View File

@ -4,7 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
@ -20,7 +20,7 @@ import java.util.Map;
* Time: 2:42:05 PM * Time: 2:42:05 PM
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class ProportionOfSNPSecondBasesSupportingRef implements VariantAnnotation { public class ProportionOfSNPSecondBasesSupportingRef implements InfoFieldAnnotation {
public String KEY_NAME = "SNP_2B_SUPPORT_REF"; public String KEY_NAME = "SNP_2B_SUPPORT_REF";
public boolean USE_MAPQ0_READS = false; public boolean USE_MAPQ0_READS = false;
public String debug_file = "/humgen/gsa-scr1/chartl/temporary/ProportionOfRefSecondBasesSupportingSNP.debug.txt"; public String debug_file = "/humgen/gsa-scr1/chartl/temporary/ProportionOfRefSecondBasesSupportingSNP.debug.txt";

View File

@ -5,7 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD; import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
@ -17,7 +17,7 @@ import java.util.Map;
* @author chartl * @author chartl
* @date Feb 1, 2010 * @date Feb 1, 2010
*/ */
public class ThousandGenomesAnnotator implements VariantAnnotation { public class ThousandGenomesAnnotator implements InfoFieldAnnotation {
public String getKeyName() { public String getKeyName() {
return "1KG"; return "1KG";

View File

@ -27,7 +27,6 @@ import java.util.*;
// todo -- write a simple column table system and have the evaluators return this instead of the list<list<string>> objects // todo -- write a simple column table system and have the evaluators return this instead of the list<list<string>> objects
// todo -- site frequency spectrum eval (freq. of variants in eval as a function of their AC and AN numbers) // todo -- site frequency spectrum eval (freq. of variants in eval as a function of their AC and AN numbers)
// todo -- allele freqeuncy discovery tool (FREQ in true vs. discovery counts in eval). Needs to process subset of samples in true (pools)
// todo -- clustered SNP counter // todo -- clustered SNP counter
// todo -- HWEs // todo -- HWEs
// todo -- indel metrics [count of sizes in/del should be in CountVariants] // todo -- indel metrics [count of sizes in/del should be in CountVariants]
@ -84,12 +83,10 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// todo -- add doc string @Argument(shortName="select", doc="One or more stratifications to use when evaluating the data", required=false)
@Argument(shortName="select", doc="", required=false)
protected String[] SELECT_EXPS = {"QUAL > 500.0", "HARD_TO_VALIDATE==1", "GATK_STANDARD==1"}; protected String[] SELECT_EXPS = {"QUAL > 500.0", "HARD_TO_VALIDATE==1", "GATK_STANDARD==1"};
// todo -- add doc string @Argument(shortName="selectName", doc="Names to use for the list of stratifications (must be a 1-to-1 mapping)", required=false)
@Argument(shortName="selectName", doc="", required=false)
protected String[] SELECT_NAMES = {"q500plus", "low_mapq", "gatk_std_filters"}; protected String[] SELECT_NAMES = {"q500plus", "low_mapq", "gatk_std_filters"};
@Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) @Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false)

View File

@ -52,4 +52,22 @@ public class PackageUtils {
return concreteTypes; return concreteTypes;
} }
/**
* Return the interface classes that extend the specified interface.
*
* @param iface the interface which returned classes should extend.
* @return the list of interface classes that implement the interface.
*/
public static <T> List<Class<? extends T>> getInterfacesExtendingInterface(Class<T> iface) {
// Load all classes extending the given interface, then filter out any class that is concrete.
Set<Class<? extends T>> allTypes = reflections.getSubTypesOf(iface);
List<Class<? extends T>> nonConcreteTypes = new ArrayList<Class<? extends T>>();
for( Class<? extends T> type: allTypes ) {
if( !JVMUtils.isConcrete(type) )
nonConcreteTypes.add(type);
}
return nonConcreteTypes;
}
} }

View File

@ -85,9 +85,6 @@ public class VCFGenotypeRecord {
// make sure the GT field isn't being set // make sure the GT field isn't being set
if ( key.equals(GENOTYPE_KEY) ) if ( key.equals(GENOTYPE_KEY) )
throw new IllegalArgumentException("Setting the GT field is not allowed as that's done internally"); throw new IllegalArgumentException("Setting the GT field is not allowed as that's done internally");
// we need to be backwards compatible
if ( key.equals(OLD_DEPTH_KEY) )
key = DEPTH_KEY;
mFields.put(key, value); mFields.put(key, value);
} }

View File

@ -172,6 +172,8 @@ public class VCFRecord {
case INFO: case INFO:
String vals[] = columnValues.get(val).split(";"); String vals[] = columnValues.get(val).split(";");
for (String alt : vals) { for (String alt : vals) {
if ( alt.equals(EMPTY_INFO_FIELD) )
continue;
String keyVal[] = alt.split("="); String keyVal[] = alt.split("=");
if ( keyVal.length == 1 ) if ( keyVal.length == 1 )
addInfoField(keyVal[0], ""); addInfoField(keyVal[0], "");
@ -391,11 +393,6 @@ public class VCFRecord {
* @return a map, of the info key-value pairs * @return a map, of the info key-value pairs
*/ */
public final Map<String, String> getInfoValues() { public final Map<String, String> getInfoValues() {
if (mInfoFields.size() < 1) {
Map<String, String> map = new HashMap<String, String>();
map.put(".", "");
return map;
}
return mInfoFields; return mInfoFields;
} }
@ -497,10 +494,6 @@ public class VCFRecord {
public void addInfoField(String key, String value) { public void addInfoField(String key, String value) {
//System.out.printf("Adding info field %s=%s%n", key, value); //System.out.printf("Adding info field %s=%s%n", key, value);
mInfoFields.put(key, value); mInfoFields.put(key, value);
// remove the empty token if it's present
if ( mInfoFields.containsKey(".") )
mInfoFields.remove(".");
} }
public void printInfoFields() { public void printInfoFields() {
@ -587,15 +580,18 @@ public class VCFRecord {
* @return a string representing the infomation fields * @return a string representing the infomation fields
*/ */
protected String createInfoString() { protected String createInfoString() {
String info = ""; StringBuffer info = new StringBuffer();
for (String str : getInfoValues().keySet()) { boolean isFirst = true;
for (String str : mInfoFields.keySet()) {
if (str.equals(EMPTY_INFO_FIELD)) if ( isFirst )
return EMPTY_INFO_FIELD; isFirst = false;
else else
info += str + "=" + getInfoValues().get(str) + INFO_FIELD_SEPERATOR; info.append(INFO_FIELD_SEPERATOR);
info.append(str);
info.append("=");
info.append(mInfoFields.get(str));
} }
return (info.contains(INFO_FIELD_SEPERATOR)) ? info.substring(0, info.lastIndexOf(INFO_FIELD_SEPERATOR)) : info; return info.length() == 0 ? EMPTY_INFO_FIELD : info.toString();
} }
/** /**

View File

@ -65,7 +65,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test @Test
public void testHasAnnotsAsking1() { public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("98bcbd4dd9d0edc5aa1ae97877a7e8f8")); Arrays.asList("98bcbd4dd9d0edc5aa1ae97877a7e8f8"));
executeTest("test file has annotations, asking for annotations, #1", spec); executeTest("test file has annotations, asking for annotations, #1", spec);
} }
@ -73,7 +73,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test @Test
public void testHasAnnotsAsking2() { public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("74d1dc2a65f4398cd8c11b294917dfe5")); Arrays.asList("74d1dc2a65f4398cd8c11b294917dfe5"));
executeTest("test file has annotations, asking for annotations, #2", spec); executeTest("test file has annotations, asking for annotations, #2", spec);
} }
@ -97,7 +97,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test @Test
public void testNoAnnotsAsking1() { public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("437b45d2ee1a150f15c4479cb2cb5e8f")); Arrays.asList("437b45d2ee1a150f15c4479cb2cb5e8f"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec); executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
} }
@ -105,7 +105,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test @Test
public void testNoAnnotsAsking2() { public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -standard -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("857875a3234112bcd047cf31683aa331")); Arrays.asList("857875a3234112bcd047cf31683aa331"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec); executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
} }

View File

@ -22,7 +22,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testPooled1() { public void testPooled1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
Arrays.asList("c30af5d192661abd77b05a316f1d8923")); Arrays.asList("c91f44a198cd7222520118726ea806ca"));
executeTest("testPooled1", spec); executeTest("testPooled1", spec);
} }
@ -35,7 +35,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1Joint() { public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("882b2fae1cd1ba65cac3cadacec0ce2b")); Arrays.asList("d8af2cb687aa89d21c5492c98f100b5f"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec); executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
} }
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2Joint() { public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("aa0cff414e6623c36465726a987a645d")); Arrays.asList("724bc2b640e111df82b9ebd261ddb5d9"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec); executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
} }
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2Joint() { public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
Arrays.asList("53df224164083cc7d8ad85f3d16ba38f")); Arrays.asList("304ec09a459705f5738a9a82b603ae1f"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec); executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
} }
@ -64,7 +64,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testParallelization() { public void testParallelization() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1, "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30 -nt 4", 1,
Arrays.asList("3ade750c0d261594ea549db7b127a1e3")); Arrays.asList("33e9fe3b8c1ed729c22196d5db3e0d11"));
executeTest("test parallelization", spec); executeTest("test parallelization", spec);
} }
@ -77,11 +77,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testParameter() { public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>(); HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "bee9fa71d70fdde094ab30785d4fa84e" ); e.put( "-genotype", "fb3ffa0f101cf9f8ffc6892b0acab414" );
e.put( "-all_bases", "410cff9d97cd017becd1f6260c7abeeb" ); e.put( "-all_bases", "3888d0856370f9a5b18c078e2caaec2a" );
e.put( "--min_base_quality_score 26", "85e1c35d3926afc68761aefea3f41332" ); e.put( "--min_base_quality_score 26", "66f729d1948dc057486832731278c226" );
e.put( "--min_mapping_quality_score 26", "1c49a7d5e6ad295c0450b8a35053050f" ); e.put( "--min_mapping_quality_score 26", "80a7fca199b899a3d0bc1293eb7bf7e5" );
e.put( "--max_mismatches_in_40bp_window 5", "7e7db5a0d859704e12a4b89d35065682" ); e.put( "--max_mismatches_in_40bp_window 5", "c6f8846865dcd9021372df917f6c962b" );
for ( Map.Entry<String, String> entry : e.entrySet() ) { for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -95,7 +95,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() { public void testConfidence() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1, "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 10 ", 1,
Arrays.asList("c67dd3e97cb188b117074d2c4692fcfa")); Arrays.asList("7854c02fcc0c8fcc879f6e35fef2e11f"));
executeTest("testConfidence", spec); executeTest("testConfidence", spec);
} }
@ -106,7 +106,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@Test @Test
public void testOtherOutput() { public void testOtherOutput() {
String[] md5s = {"ce0024816a092af9f998a7561ffb4fb2", "8cba0b8752f18fc620b4697840bc7291"}; String[] md5s = {"5f3b9abe1b2c30c2ede0007c43e1934c", "8cba0b8752f18fc620b4697840bc7291"};
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper" + "-T UnifiedGenotyper" +
" -R " + oneKGLocation + "reference/human_b36_both.fasta" + " -R " + oneKGLocation + "reference/human_b36_both.fasta" +