diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index ebfcc0c29..f5e936a09 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -92,7 +92,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine activeRegions = integrateActiveList( isActiveList ); logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." ); - workQueue.addAll( activeRegions ); + if( walker.activeRegionOutStream == null ) { + workQueue.addAll( activeRegions ); + } else { // Just want to output the active regions to a file, not actually process them + for( final ActiveRegion activeRegion : activeRegions ) { + if( activeRegion.isActive ) { + walker.activeRegionOutStream.println( activeRegion.getLocation() ); + } + } + } + // Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them if( !workQueue.isEmpty() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index d7e170d73..98308ee11 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -1,6 +1,11 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.IntervalBinding; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; @@ -14,8 +19,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; +import java.io.PrintStream; import java.util.ArrayList; import java.util.List; @@ -32,6 +39,31 @@ import java.util.List; @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class}) public abstract class ActiveRegionWalker extends Walker { + @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) + public PrintStream activeRegionOutStream = null; + + @Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false) + protected List> activeRegionBindings = null; + + public GenomeLocSortedSet presetActiveRegions = null; + + @Override + public void initialize() { + if( activeRegionBindings == null ) { return; } + List allIntervals = new ArrayList(0); + for ( IntervalBinding intervalBinding : activeRegionBindings ) { + List intervals = intervalBinding.getIntervals(this.getToolkit()); + + if ( intervals.isEmpty() ) { + logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed."); + } + + allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, IntervalSetRule.UNION); + } + + presetActiveRegions = IntervalUtils.sortAndMergeIntervals(this.getToolkit().getGenomeLocParser(), allIntervals, IntervalMergingRule.ALL); + } + // Do we actually want to operate on the context? public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { return true; // We are keeping all the reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index b9e6a5b2b..889cc634c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -23,7 +24,7 @@ import java.util.*; * Time: 12:24 PM * To change this template use File | Settings | File Templates. */ -public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation { +public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { private MendelianViolation mendelianViolation = null; private String motherId; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 69560c7cb..5312c4136 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.BaseUtils; @@ -84,7 +83,6 @@ public class VariantAnnotator extends RodWalker implements Ann @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); - public RodBinding getVariantRodBinding() { return variantCollection.variants; } /** * The INFO field will be annotated with information on the most biologically-significant effect @@ -163,6 +161,13 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit") protected Boolean LIST = false; + /** + * By default, the dbSNP ID is added only when the ID field in the variant VCF is empty. + */ + @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated") + protected Boolean ALWAYS_APPEND_DBSNP_ID = false; + public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; } + @Hidden @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 98d2fe17b..90d0ad740 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -195,11 +195,20 @@ public class VariantAnnotatorEngine { private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map infoAnnotations) { for ( Map.Entry, String> dbSet : dbAnnotations.entrySet() ) { if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) { - String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType()); + final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType()); + + // put the DB key into the INFO field infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null); - // annotate dbsnp id if available and not already there - if ( rsID != null && vc.emptyID() ) - vc = new VariantContextBuilder(vc).id(rsID).make(); + + // add the ID if appropriate + if ( rsID != null ) { + if ( vc.emptyID() ) { + vc = new VariantContextBuilder(vc).id(rsID).make(); + } else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) { + final String newRsID = vc.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID; + vc = new VariantContextBuilder(vc).id(newRsID).make(); + } + } } else { boolean overlapsComp = false; for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java index 7200f841b..1331ad5df 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java @@ -8,9 +8,9 @@ import java.util.List; public interface AnnotatorCompatibleWalker { // getter methods for various used bindings - public abstract RodBinding getVariantRodBinding(); public abstract RodBinding getSnpEffRodBinding(); public abstract RodBinding getDbsnpRodBinding(); public abstract List> getCompRodBindings(); public abstract List> getResourceRodBindings(); + public abstract boolean alwaysAppendDbsnpId(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 369c2d0c6..b1495ac7d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -126,10 +126,10 @@ public class UnifiedGenotyper extends LocusWalker getDbsnpRodBinding() { return dbsnp.dbsnp; } - public RodBinding getVariantRodBinding() { return null; } public RodBinding getSnpEffRodBinding() { return null; } public List> getCompRodBindings() { return Collections.emptyList(); } public List> getResourceRodBindings() { return Collections.emptyList(); } + public boolean alwaysAppendDbsnpId() { return false; } /** * A raw, unfiltered, highly specific callset in VCF format. @@ -205,6 +205,12 @@ public class UnifiedGenotyper extends LocusWalker samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index ee5aed3e5..ba4b22445 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -54,8 +54,9 @@ public class UnifiedGenotyperEngine { EMIT_VARIANTS_ONLY, /** produces calls at variant sites and confident reference sites */ EMIT_ALL_CONFIDENT_SITES, - /** produces calls at any callable site regardless of confidence; this argument is intended for point - * mutations (SNPs) only and while some indel calls may be produced they are by no means comprehensive */ + /** produces calls at any callable site regardless of confidence; this argument is intended only for point + * mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by + * no means produce a comprehensive set of indels in DISCOVERY mode */ EMIT_ALL_SITES } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java index bb822f2ed..97166833b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; +import org.broad.tribble.TribbleException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Arrays; @@ -149,7 +150,11 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF count = Integer.valueOf(numberStr); } - type = VCFHeaderLineType.valueOf(mapping.get("Type")); + try { + type = VCFHeaderLineType.valueOf(mapping.get("Type")); + } catch (Exception e) { + throw new TribbleException(mapping.get("Type") + " is not a valid type in the VCF specification (note that types are case-sensitive)"); + } if (type == VCFHeaderLineType.Flag && !allowFlagValues()) throw new IllegalArgumentException("Flag is an unsupported type for this kind of field"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 14f7457b8..0d9d9bcd8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -110,6 +110,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("getting DB tag with dbSNP", spec); } + @Test + public void testMultipleIdsWithDbsnp() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3withIDs.vcf -L " + validationDataLocation + "vcfexample3withIDs.vcf", 1, + Arrays.asList("cd7e3d43b8f5579c461b3e588a295fa8")); + executeTest("adding multiple IDs with dbSNP", spec); + } + @Test public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec(