From 9818c69df67fe5c930df4b62d895b5b9b15fac07 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 25 Jan 2012 09:32:52 -0500 Subject: [PATCH 1/8] Can now specify active regions to process at the command line, mainly for debugging purposes --- .../traversals/TraverseActiveRegions.java | 4 +-- .../gatk/walkers/ActiveRegionWalker.java | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index ebfcc0c29..cf15cc92b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -92,7 +92,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends Walker { + @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) + protected PrintStream activeRegionOutStream = null; + + @Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false) + protected List> activeRegionBindings = null; + + public GenomeLocSortedSet presetActiveRegions = null; + + @Override + public void initialize() { + if( activeRegionBindings == null ) { return; } + List allIntervals = new ArrayList(0); + for ( IntervalBinding intervalBinding : activeRegionBindings ) { + List intervals = intervalBinding.getIntervals(this.getToolkit()); + + if ( intervals.isEmpty() ) { + logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed."); + } + + allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, IntervalSetRule.UNION); + } + + presetActiveRegions = IntervalUtils.sortAndMergeIntervals(this.getToolkit().getGenomeLocParser(), allIntervals, IntervalMergingRule.ALL); + } + // Do we actually want to operate on the context? public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { return true; // We are keeping all the reads From bbefe4a272f440262c717eef51750b4f6a138c37 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 25 Jan 2012 09:47:06 -0500 Subject: [PATCH 2/8] Added option to be able to write out the active regions to an interval list file --- .../sting/gatk/traversals/TraverseActiveRegions.java | 11 ++++++++++- .../sting/gatk/walkers/ActiveRegionWalker.java | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index cf15cc92b..f5e936a09 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -128,7 +128,16 @@ public class TraverseActiveRegions extends TraversalEngine activeRegions = integrateActiveList( isActiveList ); logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." ); - workQueue.addAll( activeRegions ); + if( walker.activeRegionOutStream == null ) { + workQueue.addAll( activeRegions ); + } else { // Just want to output the active regions to a file, not actually process them + for( final ActiveRegion activeRegion : activeRegions ) { + if( activeRegion.isActive ) { + walker.activeRegionOutStream.println( activeRegion.getLocation() ); + } + } + } + // Since we've sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them if( !workQueue.isEmpty() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 508aebb5c..98308ee11 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -40,7 +40,7 @@ import java.util.List; public abstract class ActiveRegionWalker extends Walker { @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) - protected PrintStream activeRegionOutStream = null; + public PrintStream activeRegionOutStream = null; @Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false) protected List> activeRegionBindings = null; From ea3d4d60f2f99e1211ecccbfafc25c4d43ef12b7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 25 Jan 2012 11:35:13 -0500 Subject: [PATCH 4/8] This annotation requires rods and should be annotated as such --- .../sting/gatk/walkers/annotator/MVLikelihoodRatio.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index b9e6a5b2b..889cc634c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -23,7 +24,7 @@ import java.util.*; * Time: 12:24 PM * To change this template use File | Settings | File Templates. */ -public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation { +public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { private MendelianViolation mendelianViolation = null; private String motherId; From e349b4b14b2f66efd56119dc75014f7a5190ee90 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 25 Jan 2012 11:35:54 -0500 Subject: [PATCH 5/8] Allow appending with the dbSNP ID even if a (different) ID is already present for the variant rod. --- .../walkers/annotator/VariantAnnotator.java | 9 +++++++-- .../annotator/VariantAnnotatorEngine.java | 17 +++++++++++++---- .../interfaces/AnnotatorCompatibleWalker.java | 2 +- .../walkers/genotyper/UnifiedGenotyper.java | 2 +- .../VariantAnnotatorIntegrationTest.java | 8 ++++++++ 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 69560c7cb..5312c4136 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.BaseUtils; @@ -84,7 +83,6 @@ public class VariantAnnotator extends RodWalker implements Ann @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); - public RodBinding getVariantRodBinding() { return variantCollection.variants; } /** * The INFO field will be annotated with information on the most biologically-significant effect @@ -163,6 +161,13 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="list", shortName="ls", doc="List the available annotations and exit") protected Boolean LIST = false; + /** + * By default, the dbSNP ID is added only when the ID field in the variant VCF is empty. + */ + @Argument(fullName="alwaysAppendDbsnpId", shortName="alwaysAppendDbsnpId", doc="In conjunction with the dbSNP binding, append the dbSNP ID even when the variant VCF already has the ID field populated") + protected Boolean ALWAYS_APPEND_DBSNP_ID = false; + public boolean alwaysAppendDbsnpId() { return ALWAYS_APPEND_DBSNP_ID; } + @Hidden @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 98d2fe17b..90d0ad740 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -195,11 +195,20 @@ public class VariantAnnotatorEngine { private VariantContext annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map infoAnnotations) { for ( Map.Entry, String> dbSet : dbAnnotations.entrySet() ) { if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) { - String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType()); + final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), ref.getLocus()), vc.getType()); + + // put the DB key into the INFO field infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null); - // annotate dbsnp id if available and not already there - if ( rsID != null && vc.emptyID() ) - vc = new VariantContextBuilder(vc).id(rsID).make(); + + // add the ID if appropriate + if ( rsID != null ) { + if ( vc.emptyID() ) { + vc = new VariantContextBuilder(vc).id(rsID).make(); + } else if ( walker.alwaysAppendDbsnpId() && vc.getID().indexOf(rsID) == -1 ) { + final String newRsID = vc.getID() + VCFConstants.ID_FIELD_SEPARATOR + rsID; + vc = new VariantContextBuilder(vc).id(newRsID).make(); + } + } } else { boolean overlapsComp = false; for ( VariantContext comp : tracker.getValues(dbSet.getKey(), ref.getLocus()) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java index 7200f841b..1331ad5df 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java @@ -8,9 +8,9 @@ import java.util.List; public interface AnnotatorCompatibleWalker { // getter methods for various used bindings - public abstract RodBinding getVariantRodBinding(); public abstract RodBinding getSnpEffRodBinding(); public abstract RodBinding getDbsnpRodBinding(); public abstract List> getCompRodBindings(); public abstract List> getResourceRodBindings(); + public abstract boolean alwaysAppendDbsnpId(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 369c2d0c6..5a269087c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -126,10 +126,10 @@ public class UnifiedGenotyper extends LocusWalker getDbsnpRodBinding() { return dbsnp.dbsnp; } - public RodBinding getVariantRodBinding() { return null; } public RodBinding getSnpEffRodBinding() { return null; } public List> getCompRodBindings() { return Collections.emptyList(); } public List> getResourceRodBindings() { return Collections.emptyList(); } + public boolean alwaysAppendDbsnpId() { return false; } /** * A raw, unfiltered, highly specific callset in VCF format. diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 14f7457b8..0d9d9bcd8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -110,6 +110,14 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("getting DB tag with dbSNP", spec); } + @Test + public void testMultipleIdsWithDbsnp() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3withIDs.vcf -L " + validationDataLocation + "vcfexample3withIDs.vcf", 1, + Arrays.asList("cd7e3d43b8f5579c461b3e588a295fa8")); + executeTest("adding multiple IDs with dbSNP", spec); + } + @Test public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( From fb863dc6a70ca1b23d8e5c07ab1ad237c840e63a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 25 Jan 2012 11:50:12 -0500 Subject: [PATCH 6/8] Warn user when trying to run with EMIT_ALL_SITES with indels; better docs for that option. --- .../sting/gatk/walkers/genotyper/UnifiedGenotyper.java | 6 ++++++ .../gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 5 +++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 5a269087c..5f84f62ec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -205,6 +205,12 @@ public class UnifiedGenotyper extends LocusWalker samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index ee5aed3e5..ba4b22445 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -54,8 +54,9 @@ public class UnifiedGenotyperEngine { EMIT_VARIANTS_ONLY, /** produces calls at variant sites and confident reference sites */ EMIT_ALL_CONFIDENT_SITES, - /** produces calls at any callable site regardless of confidence; this argument is intended for point - * mutations (SNPs) only and while some indel calls may be produced they are by no means comprehensive */ + /** produces calls at any callable site regardless of confidence; this argument is intended only for point + * mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by + * no means produce a comprehensive set of indels in DISCOVERY mode */ EMIT_ALL_SITES } From 96b62daff39a21d478db8d2e443c13e27b863792 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 25 Jan 2012 11:55:33 -0500 Subject: [PATCH 7/8] Minor tweak to the warning message. --- .../sting/gatk/walkers/genotyper/UnifiedGenotyper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 5f84f62ec..b1495ac7d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -209,7 +209,7 @@ public class UnifiedGenotyper extends LocusWalker samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); From 2799a1b686763543b95e7815809aa898cc2de101 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 25 Jan 2012 12:15:51 -0500 Subject: [PATCH 8/8] Catch exception for bad type and throw as a TribbleException --- .../sting/utils/codecs/vcf/VCFCompoundHeaderLine.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java index bb822f2ed..97166833b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCompoundHeaderLine.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; +import org.broad.tribble.TribbleException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Arrays; @@ -149,7 +150,11 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF count = Integer.valueOf(numberStr); } - type = VCFHeaderLineType.valueOf(mapping.get("Type")); + try { + type = VCFHeaderLineType.valueOf(mapping.get("Type")); + } catch (Exception e) { + throw new TribbleException(mapping.get("Type") + " is not a valid type in the VCF specification (note that types are case-sensitive)"); + } if (type == VCFHeaderLineType.Flag && !allowFlagValues()) throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");