From 7f8e6a97eea99deec5f1e2bf745717d077f22438 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 25 Jul 2011 10:47:11 -0400 Subject: [PATCH 001/186] InfoFieldAnnotation now an abstract class extended by annotations so doc system works --- .../sting/gatk/walkers/annotator/AlleleBalance.java | 2 +- .../gatk/walkers/annotator/AnnotationByDepth.java | 2 +- .../sting/gatk/walkers/annotator/BaseCounts.java | 2 +- .../sting/gatk/walkers/annotator/ChromosomeCounts.java | 2 +- .../sting/gatk/walkers/annotator/DepthOfCoverage.java | 2 +- .../sting/gatk/walkers/annotator/FisherStrand.java | 2 +- .../sting/gatk/walkers/annotator/GCContent.java | 2 +- .../sting/gatk/walkers/annotator/GLstats.java | 2 +- .../sting/gatk/walkers/annotator/HaplotypeScore.java | 2 +- .../sting/gatk/walkers/annotator/HardyWeinberg.java | 2 +- .../sting/gatk/walkers/annotator/HomopolymerRun.java | 2 +- .../sting/gatk/walkers/annotator/IndelType.java | 2 +- .../sting/gatk/walkers/annotator/LowMQ.java | 2 +- .../gatk/walkers/annotator/MappingQualityZero.java | 2 +- .../walkers/annotator/MappingQualityZeroFraction.java | 2 +- .../sting/gatk/walkers/annotator/NBaseCount.java | 2 +- .../sting/gatk/walkers/annotator/QualByDepth.java | 2 +- .../gatk/walkers/annotator/RMSMappingQuality.java | 2 +- .../sting/gatk/walkers/annotator/RankSumTest.java | 2 +- .../sting/gatk/walkers/annotator/SampleList.java | 2 +- .../gatk/walkers/annotator/SpanningDeletions.java | 2 +- .../gatk/walkers/annotator/TechnologyComposition.java | 2 +- .../annotator/genomicannotator/GenomicAnnotation.java | 2 +- .../annotator/interfaces/InfoFieldAnnotation.java | 10 ++++++---- .../sting/utils/help/DocumentedGATKFeature.java | 2 +- .../broadinstitute/sting/utils/help/GATKDoclet.java | 6 +++++- 26 files changed, 35 insertions(+), 29 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 3144098a8..784927ab4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -42,7 +42,7 @@ import java.util.List; import java.util.Map; -public class AlleleBalance implements InfoFieldAnnotation { +public class AlleleBalance extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java index 6c14e7445..dc41dbc81 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -8,7 +8,7 @@ import java.util.Map; -public abstract class AnnotationByDepth implements InfoFieldAnnotation { +public abstract class AnnotationByDepth extends InfoFieldAnnotation { protected int annotationByVariantDepth(final Map genotypes, Map stratifiedContexts) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index 66416ce11..7cd159c5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -46,7 +46,7 @@ import java.util.List; import java.util.Map; -public class BaseCounts implements InfoFieldAnnotation { +public class BaseCounts extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 74f7f9d80..9b30079d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -43,7 +43,7 @@ import java.util.List; import java.util.Map; -public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation { +public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation { private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index c384e0d09..d8907c57f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -16,7 +16,7 @@ import java.util.List; import java.util.Map; -public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation { +public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 97ed221e7..e71febece 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -42,7 +42,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class FisherStrand implements InfoFieldAnnotation, StandardAnnotation { +public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation { private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 48677bbe5..588d3e98a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -16,7 +16,7 @@ import java.util.List; import java.util.Map; -public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation { +public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { double content = computeGCContent(ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java index cca0ad4bc..862e12f7d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java @@ -23,7 +23,7 @@ import java.util.Map; */ // A set of annotations calculated directly from the GLs -public class GLstats implements InfoFieldAnnotation, StandardAnnotation { +public class GLstats extends InfoFieldAnnotation implements StandardAnnotation { private static final int MIN_SAMPLES = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index b175579f1..2196de389 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -48,7 +48,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { +public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { private final static boolean DEBUG = false; private final static int MIN_CONTEXT_WING_SIZE = 10; private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index d86728d5e..2d9424e98 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -18,7 +18,7 @@ import java.util.List; import java.util.Map; -public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation { +public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { private static final int MIN_SAMPLES = 10; private static final int MIN_GENOTYPE_QUALITY = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 02efd854c..870e9992b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -16,7 +16,7 @@ import java.util.List; import java.util.Map; -public class HomopolymerRun implements InfoFieldAnnotation, StandardAnnotation { +public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation { private boolean ANNOTATE_INDELS = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index 2fd62ddf3..b1c16ba0d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -19,7 +19,7 @@ import java.util.*; * Time: 11:47:33 AM * To change this template use File | Settings | File Templates. */ -public class IndelType implements InfoFieldAnnotation, ExperimentalAnnotation { +public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 1d999c531..5de9aaa3b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -16,7 +16,7 @@ import java.util.List; import java.util.Map; -public class LowMQ implements InfoFieldAnnotation { +public class LowMQ extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index f240d02bc..60bfe945f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -18,7 +18,7 @@ import java.util.List; import java.util.Map; -public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotation { +public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index 08a25a7e3..3a6c9dce9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -18,7 +18,7 @@ import java.util.Map; -public class MappingQualityZeroFraction implements InfoFieldAnnotation, ExperimentalAnnotation { +public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 1c70a1b33..9f67acf65 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -21,7 +21,7 @@ import java.util.Map; * Date: 5/16/11 */ -public class NBaseCount implements InfoFieldAnnotation { +public class NBaseCount extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 2175d39e6..20bee9008 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -16,7 +16,7 @@ import java.util.List; import java.util.Map; -public class QualByDepth extends AnnotationByDepth implements InfoFieldAnnotation, StandardAnnotation { +public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index d52f07b58..d1d9871e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -20,7 +20,7 @@ import java.util.List; import java.util.Map; -public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotation { +public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 5466828f6..643056c1d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -21,7 +21,7 @@ import java.util.Map; -public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnotation { +public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation { static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final boolean DEBUG = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index ff9092a71..3712ca8ae 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -41,7 +41,7 @@ import java.util.List; import java.util.Map; -public class SampleList implements InfoFieldAnnotation { +public class SampleList extends InfoFieldAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( vc.isMonomorphic() || !vc.hasGenotypes() ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index a4668eeb6..332b0226b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -16,7 +16,7 @@ import java.util.List; import java.util.Map; -public class SpanningDeletions implements InfoFieldAnnotation, StandardAnnotation { +public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index b46d82d8b..626142cd2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -24,7 +24,7 @@ import java.util.Map; * Time: 3:14 PM * To change this template use File | Settings | File Templates. */ -public class TechnologyComposition implements ExperimentalAnnotation,InfoFieldAnnotation { +public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation { private String nSLX = "NumSLX"; private String n454 ="Num454"; private String nSolid = "NumSOLiD"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java index 05c1b3c52..0e8360484 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java @@ -48,7 +48,7 @@ import java.util.Map.Entry; * * For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator */ -public class GenomicAnnotation implements InfoFieldAnnotation { +public class GenomicAnnotation extends InfoFieldAnnotation { public static final String CHR_COLUMN = "chr"; public static final String START_COLUMN = "start"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index 4e850d01b..ebce538ed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -4,20 +4,22 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.List; import java.util.Map; -public interface InfoFieldAnnotation { +@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator INFO-field annotations", summary = "VariantAnnotator annotations, written to INFO Field") +public abstract class InfoFieldAnnotation { // return annotations for the given contexts split by sample - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc); + public abstract Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc); // return the INFO keys - public List getKeyNames(); + public abstract List getKeyNames(); // return the descriptions used for the VCF INFO meta field - public List getDescriptions(); + public abstract List getDescriptions(); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java index 710503ca8..acc64a024 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java @@ -36,7 +36,7 @@ import java.lang.annotation.*; @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.TYPE) public @interface DocumentedGATKFeature { - public boolean enable() default true; + public boolean enable() default false; public String groupName(); public String summary() default ""; public Class handler() default GenericDocumentationHandler.class; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 0b4c69e3c..49214237a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -57,7 +57,7 @@ public class GATKDoclet { * @throws java.io.IOException if output can't be written. */ public static boolean start(RootDoc rootDoc) throws IOException { - logger.setLevel(Level.INFO); + logger.setLevel(Level.DEBUG); // load arguments for(String[] options: rootDoc.options()) { if(options[0].equals("-build-timestamp")) @@ -95,6 +95,10 @@ public class GATKDoclet { for ( ClassDoc doc : rootDoc.classes() ) { logger.debug("Considering " + doc); Class clazz = getClassForClassDoc(doc); + + if ( clazz != null && clazz.getName().equals("org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance")) + logger.debug("foo"); + DocumentedGATKFeature feature = getFeatureForClassDoc(doc); DocumentedGATKFeatureHandler handler = createHandler(doc, feature); if ( handler != null && handler.shouldBeProcessed(doc) ) { From 1a268ff1fd97108dd6394b54536fe537fab60dc0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 25 Jul 2011 10:55:09 -0400 Subject: [PATCH 002/186] Refactor so that GenotypeAnnotation and InfoFieldAnnotation share common superclass VariantAnnotatorAnnotation --- .../annotator/AlleleBalanceBySample.java | 2 +- .../annotator/DepthPerAlleleBySample.java | 2 +- .../annotator/MappingQualityZeroBySample.java | 2 +- .../ReadDepthAndAllelicFractionBySample.java | 2 +- .../interfaces/GenotypeAnnotation.java | 11 ++--- .../interfaces/InfoFieldAnnotation.java | 9 +--- .../VariantAnnotatorAnnotation.java | 41 +++++++++++++++++++ 7 files changed, 51 insertions(+), 18 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index a99f87a70..f70a87dc5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -15,7 +15,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAnnotation { +public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { Double ratio = annotateSNP(stratifiedContext, vc, g); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index e3e8bc258..20513421d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -22,7 +22,7 @@ import java.util.List; import java.util.Map; -public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation { +public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { private static String REF_ALLELE = "REF"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index 0ca53adf2..f2b7b72b9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -49,7 +49,7 @@ import java.util.Map; * Time: 6:46:25 PM * To change this template use File | Settings | File Templates. */ -public class MappingQualityZeroBySample implements GenotypeAnnotation { +public class MappingQualityZeroBySample extends GenotypeAnnotation { public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { if ( g == null || !g.isCalled() ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java index c56e2622d..f3e99235a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java @@ -52,7 +52,7 @@ import java.util.Map; * Time: 3:59:27 PM * To change this template use File | Settings | File Templates. */ -public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation { +public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { private static String REF_ALLELE = "REF"; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index 57bc44ab8..e982582ee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -10,15 +10,12 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.List; import java.util.Map; -public interface GenotypeAnnotation { +public abstract class GenotypeAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts/genotype split by sample - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g); - - // return the FORMAT keys - public List getKeyNames(); + public abstract Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g); // return the descriptions used for the VCF FORMAT meta field - public List getDescriptions(); - + public abstract List getDescriptions(); + } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index ebce538ed..84438ccd8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -10,16 +11,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.List; import java.util.Map; -@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator INFO-field annotations", summary = "VariantAnnotator annotations, written to INFO Field") -public abstract class InfoFieldAnnotation { - +public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts split by sample public abstract Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc); - // return the INFO keys - public abstract List getKeyNames(); - // return the descriptions used for the VCF INFO meta field public abstract List getDescriptions(); - } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java new file mode 100644 index 000000000..f33d61df9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.List; +import java.util.Map; + +@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations") +public abstract class VariantAnnotatorAnnotation { + // return the INFO keys + public abstract List getKeyNames(); +} \ No newline at end of file From 0b43ee117cd06a138309a3d18be3b3809be87c9f Mon Sep 17 00:00:00 2001 From: Kiran V Garimella Date: Mon, 25 Jul 2011 11:35:34 -0400 Subject: [PATCH 003/186] Added the required=false tag to the -noST and -noEV arguments so the auto-help output doesn't look weird (i.e. listing arguments as required when their value has already been specified by default). --- .../sting/gatk/walkers/varianteval/VariantEvalWalker.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index fe3173506..3867aa958 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -67,7 +67,7 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="stratificationModule", shortName="ST", doc="One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)", required=false) protected String[] STRATIFICATIONS_TO_USE = {}; - @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)") + @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false) protected Boolean NO_STANDARD_STRATIFICATIONS = false; @Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false) @@ -77,7 +77,7 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="evalModule", shortName="EV", doc="One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless -noE is specified)", required=false) protected String[] MODULES_TO_USE = {}; - @Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)") + @Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)", required=false) protected Boolean NO_STANDARD_MODULES = false; // Other arguments From acda8eb09cae11a7f16b1eee48977ec44515b369 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 25 Jul 2011 12:43:27 -0400 Subject: [PATCH 006/186] Commented out test that causes new CommandLineGATK() to fail --- .../utils/help/DocumentedGATKFeature.java | 2 +- .../sting/gatk/WalkerManagerUnitTest.java | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java index acc64a024..710503ca8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java @@ -36,7 +36,7 @@ import java.lang.annotation.*; @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.TYPE) public @interface DocumentedGATKFeature { - public boolean enable() default false; + public boolean enable() default true; public String groupName(); public String summary() default ""; public Class handler() default GenericDocumentationHandler.class; diff --git a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java index cd43927a4..357872dbd 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java @@ -57,17 +57,17 @@ public class WalkerManagerUnitTest { walkerManager.createByName("Missing"); } - @Test(expectedExceptions=DynamicClassResolutionException.class) - public void testUninstantiableWalker() { - walkerManager.createByName("Uninstantiable"); - } +// @Test(expectedExceptions=DynamicClassResolutionException.class) +// public void testUninstantiableWalker() { +// walkerManager.createByName("Uninstantiable"); +// } } -@Hidden -@Requires(value={}) -class UninstantiableWalker extends Walker { - // Private constructor will generate uninstantiable message - private UninstantiableWalker() {} - public Long reduceInit() { return 0L; } - public Long reduce(Integer value, Long accum) { return 0L; } -} +//@Hidden +//@Requires(value={}) +//class UninstantiableWalker extends Walker { +// // Private constructor will generate uninstantiable message +// private UninstantiableWalker() {} +// public Long reduceInit() { return 0L; } +// public Long reduce(Integer value, Long accum) { return 0L; } +//} From 44bd9ae70363c0f9a15632b3e5ffad2e87126d99 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 25 Jul 2011 12:53:06 -0400 Subject: [PATCH 007/186] Restoring UninstantiableWalker, as it is not going to be possible to run ant test; ant gatkdocs without ant clean in between --- .../sting/gatk/WalkerManagerUnitTest.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java index 357872dbd..cd43927a4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java @@ -57,17 +57,17 @@ public class WalkerManagerUnitTest { walkerManager.createByName("Missing"); } -// @Test(expectedExceptions=DynamicClassResolutionException.class) -// public void testUninstantiableWalker() { -// walkerManager.createByName("Uninstantiable"); -// } + @Test(expectedExceptions=DynamicClassResolutionException.class) + public void testUninstantiableWalker() { + walkerManager.createByName("Uninstantiable"); + } } -//@Hidden -//@Requires(value={}) -//class UninstantiableWalker extends Walker { -// // Private constructor will generate uninstantiable message -// private UninstantiableWalker() {} -// public Long reduceInit() { return 0L; } -// public Long reduce(Integer value, Long accum) { return 0L; } -//} +@Hidden +@Requires(value={}) +class UninstantiableWalker extends Walker { + // Private constructor will generate uninstantiable message + private UninstantiableWalker() {} + public Long reduceInit() { return 0L; } + public Long reduce(Integer value, Long accum) { return 0L; } +} From 2ac490dbdf6710401566ab6930dd9169f559f8f7 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Mon, 25 Jul 2011 13:20:00 -0400 Subject: [PATCH 009/186] Fix improper detection of command-line arguments with missing values. --- .../commandline/ArgumentDefinitions.java | 3 +- .../sting/commandline/ArgumentMatch.java | 2 +- .../sting/commandline/ParsingEngine.java | 39 +++++++++++++++---- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java index 9f92df6e0..8e3f753a8 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinitions.java @@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable { static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() { public boolean matches( ArgumentDefinition definition, Object key ) { - return definition.validation != null; + // We can perform some sort of validation for anything that isn't a flag. + return !definition.isFlag; } }; } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java index 60ed8c899..351583c07 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java @@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable { public final String label; /** - * Maps indicies of command line arguments to values paired with that argument. + * Maps indices of command line arguments to values paired with that argument. */ public final SortedMap> indices = new TreeMap>(); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 8423bb2f2..279aed396 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -41,6 +41,11 @@ import java.util.*; * A parser for Sting command-line arguments. */ public class ParsingEngine { + /** + * The loaded argument sources along with their back definitions. + */ + private Map argumentSourcesByDefinition = new HashMap(); + /** * A list of defined arguments against which command lines are matched. * Package protected for testing access. @@ -107,8 +112,13 @@ public class ParsingEngine { */ public void addArgumentSource( String sourceName, Class sourceClass ) { List argumentsFromSource = new ArrayList(); - for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) - argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() ); + for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) { + List argumentDefinitions = argumentSource.createArgumentDefinitions(); + for(ArgumentDefinition argumentDefinition: argumentDefinitions) { + argumentSourcesByDefinition.put(argumentDefinition,argumentSource); + argumentsFromSource.add( argumentDefinition ); + } + } argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) ); } @@ -199,16 +209,25 @@ public class ParsingEngine { throw new InvalidArgumentException( invalidArguments ); } - // Find invalid argument values (arguments that fail the regexp test. + // Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression. if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) { Collection verifiableArguments = argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher ); Collection> invalidValues = new ArrayList>(); for( ArgumentDefinition verifiableArgument: verifiableArguments ) { ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument ); + // Check to see whether an argument value was specified. Argument values must be provided + // when the argument name is specified and the argument is not a flag type. + for(ArgumentMatch verifiableMatch: verifiableMatches) { + ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument); + if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault()) + invalidValues.add(new Pair(verifiableArgument,null)); + } + + // Ensure that the field contents meet the validation criteria specified by the regular expression. for( ArgumentMatch verifiableMatch: verifiableMatches ) { for( String value: verifiableMatch.values() ) { - if( !value.matches(verifiableArgument.validation) ) + if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) ) invalidValues.add( new Pair(verifiableArgument, value) ); } } @@ -515,10 +534,14 @@ class InvalidArgumentValueException extends ArgumentException { private static String formatArguments( Collection> invalidArgumentValues ) { StringBuilder sb = new StringBuilder(); for( Pair invalidValue: invalidArgumentValues ) { - sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)", - invalidValue.first.fullName, - invalidValue.second, - invalidValue.first.validation) ); + if(invalidValue.getSecond() == null) + sb.append( String.format("%nArgument '--%s' requires a value but none was provided", + invalidValue.first.fullName) ); + else + sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)", + invalidValue.first.fullName, + invalidValue.second, + invalidValue.first.validation) ); } return sb.toString(); } From f3049fba63899bfb35f245198b829472f480c9fa Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 25 Jul 2011 13:21:52 -0400 Subject: [PATCH 010/186] refdata directory cleanup Removing unused files RODRecordIterator, ReferenceOrderedData, QueryableTrack, RMDTrackCreationException, GATKFeatureIterator, ReferenceOrderedDataUnitTest Refactored dbSNP and refseq utilities to be closer to the other files implementing these features --- .../sting/gatk/GenomeAnalysisEngine.java | 2 +- .../rmd/ReferenceOrderedDataPool.java | 2 +- .../rmd/ReferenceOrderedDataSource.java | 2 +- .../sting/gatk/refdata/RODRecordIterator.java | 238 ------------------ .../gatk/refdata/ReferenceOrderedData.java | 130 ---------- .../gatk/refdata/VariantContextAdaptors.java | 2 +- .../helpers => features}/DbSNPHelper.java | 2 +- .../features/refseq/RefSeqFeature.java | 1 - .../{ => features/refseq}/Transcript.java | 2 +- .../gatk/refdata/indexer/RMDIndexer.java | 2 +- .../gatk/refdata/tracks/QueryableTrack.java | 45 ---- .../{builders => }/RMDTrackBuilder.java | 8 +- .../tracks/RMDTrackCreationException.java | 45 ---- .../refdata/utils/GATKFeatureIterator.java | 65 ----- .../sting/gatk/walkers/PileupWalker.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 2 +- .../coverage/DepthOfCoverageWalker.java | 2 +- .../walkers/genotyper/UnifiedGenotyper.java | 2 +- .../indels/SomaticIndelDetectorWalker.java | 4 +- .../TableRecalibrationWalker.java | 2 +- .../varianteval/VariantEvalWalker.java | 2 +- .../variantutils/ValidateVariants.java | 2 +- .../walkers/variantutils/VariantsToVCF.java | 4 +- .../gatk/GATKExtensionsGenerator.java | 2 +- .../queue/extensions/gatk/RodBindField.java | 2 +- .../interval/IntervalFileMergingIterator.java | 1 - .../StringToGenomeLocIteratorAdapter.java | 2 +- .../sting/utils/text/ListFileUtils.java | 2 +- .../ReferenceOrderedViewUnitTest.java | 2 +- .../rmd/ReferenceOrderedDataPoolUnitTest.java | 2 +- .../refdata/ReferenceOrderedDataUnitTest.java | 48 ---- .../RMDTrackBuilderUnitTest.java | 3 +- 32 files changed, 28 insertions(+), 604 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java rename public/java/src/org/broadinstitute/sting/gatk/refdata/{utils/helpers => features}/DbSNPHelper.java (99%) rename public/java/src/org/broadinstitute/sting/gatk/refdata/{ => features/refseq}/Transcript.java (95%) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java rename public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/{builders => }/RMDTrackBuilder.java (98%) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeatureIterator.java rename public/java/src/org/broadinstitute/sting/{gatk/refdata/utils => utils/interval}/StringToGenomeLocIteratorAdapter.java (95%) delete mode 100644 public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java rename public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/{builders => }/RMDTrackBuilderUnitTest.java (98%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 918bc1251..a414d24aa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.walkers.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java index abd5929eb..9d5a54f58 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPool.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.rmd; import net.sf.samtools.SAMSequenceDictionary; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java index 60b68bda5..6992fc1ff 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java @@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java deleted file mode 100644 index ce924fd87..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RODRecordIterator.java +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.XReadLines; - -import java.io.File; -import java.io.FileNotFoundException; -import java.lang.reflect.Constructor; -import java.util.Iterator; -import java.util.regex.Pattern; - -/** - * This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data - * files. The only assumption made is that every line in the file provides a complete and separate data record. The records - * can be associated with coordinates or coordinate intervals, there can be one or more records associated with a given - * position/interval, or intervals can overlap. The records must be comprised of delimited fields, but the format is - * otherwise free. For any specific line-based data format, an appropriate implementation of ReferenceOrderedDatum must be - * provided that is capable of parsing itself from a single line of data. This implementation will be used, - * through reflection mechanism, as a callback to do all the work. - * - * The model is, hence, as follows: - * - * String dataRecord <---> RodImplementation ( ::parseLine(dataRecord.split(delimiter)) is aware of the format and fills - * an instance of RodImplementation with data values from dataRecord line). - * - * - * instantiation of RODRecordIterator(dataFile, trackName, RodImplementation.class) will immediately provide an iterator - * that walks along the dataFile line by line, and on each call to next() returns a new RodImplementation object - * representing a single line (record) of data. The returned object will be initialized with "track name" trackName - - * track names (as returned by ROD.getName()) are often used in other parts of the code to distinguish between - * multiple streams of (possibly heterogeneous) annotation data bound to an application. - * - * This generic iterator skips and ignores a) empty lines, b) lines starting with '#' (comments): they are never sent back - * to the ROD implementation class for processing. - * - * This iterator does not actually check if the ROD records (lines) in the file are indeed ordedered by coordinate, - * and it does not depend on such an order as it still implements a low-level line-based traversal of the data. Higher-level - * iterators/wrappers will perform all the necessary checks. - * - * Note: some data formats/ROD implementations may require a header line in the file. In this case the current (ugly) - * mechanism is as follows: - * 1) rod implementation's ::initialize(file) method should be able to open the file, find and read the header line - * and return the header object (to be kept by the iterator) - * 2) rod implementation's ::parseLine(header,line) method should be capable of making use of that saved header object now served to it - * and - * 3) ::parseLine(header,line) should be able to recognize the original header line in the file and skip it (after ROD's initialize() - * method is called, the iterator will re-open the file and start reading it from the very beginning; there is no - * other way, except for "smart" ::parseLine(), to avoid reading in the header line as "data"). - * - * Created by IntelliJ IDEA. - * User: asivache - * Date: Sep 10, 2009 - * Time: 1:22:23 PM - * To change this template use File | Settings | File Templates. - */ -public class RODRecordIterator implements Iterator { - - private PushbackIterator reader; - - // stores name of the track this iterator reads (will be also returned by getName() of ROD objects - // generated by this iterator) - private String name; - - // we keep the file object, only to use file name in error reports - private File file; - - // rod type; this is what we will instantiate for RODs at runtime - private Class type; - - private Object header = null; // Some RODs may use header - - // field delimiter in the file. Should it be the job of the iterator to split the lines though? RODs can do that! - private String fieldDelimiter; - - // constructor for the ROD objects we are going to return. Constructor that takes the track name as its single arg is required. - private Constructor named_constructor; - - // keep track of the lines we are reading. used for error messages only. - private long linenum = 0; - - private boolean allow_empty = true; - private boolean allow_comments = true; - public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$"); - - public RODRecordIterator(File file, String name, Class type) { - try { - reader = new PushbackIterator(new XReadLines(file)); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(file, e); - } - this.file = file; - this.name = name; - this.type = type; - try { - named_constructor = type.getConstructor(String.class); - } - catch (java.lang.NoSuchMethodException e) { - throw new ReviewedStingException("ROD class "+type.getName()+" does not have constructor that accepts a single String argument (track name)"); - } - ROD rod = instantiateROD(name); - fieldDelimiter = rod.delimiterRegex(); // get delimiter from the ROD itself - try { - header = rod.initialize(file); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(file, "ROD "+type.getName() + " failed to initialize properly from file "+file); - } - - } - - - /** - * Returns true if the iteration has more elements. (In other - * words, returns true if next would return an element - * rather than throwing an exception.) - * - * @return true if the iterator has more elements. - */ - public boolean hasNext() { - if ( allow_empty || allow_comments ) { - while ( reader.hasNext() ) { - String line = reader.next(); - if ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty line - if ( allow_comments && line.charAt(0) == '#' ) continue; // skip comment lines - // the line is not empty and not a comment line, so we have next after all - reader.pushback(line); - return true; - } - return false; // oops, we end up here if there's nothing left - } else { - return reader.hasNext(); - } - } - - /** - * Returns the next valid ROD record in the file, skipping empty and comment lines. - * - * @return the next element in the iteration. - * @throws java.util.NoSuchElementException - * iteration has no more elements. - */ - public ROD next() { - ROD n = null; - boolean parsed_ok = false; - String line ; - - while ( ! parsed_ok && reader.hasNext() ) { - line = reader.next(); - linenum++; - while ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() || - allow_comments && line.charAt(0) == '#' ) { - if ( reader.hasNext() ) { - line = reader.next(); - linenum++; - } else { - line = null; - break; - } - } - - if ( line == null ) break; // if we ran out of lines while skipping empty lines/comments, then we are done - - String parts[] = line.split(fieldDelimiter); - - try { - n = instantiateROD(name); - parsed_ok = n.parseLine(header,parts) ; - } - catch ( Exception e ) { - throw new UserException.MalformedFile(file, "Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+ - "\nOffending line: "+line+ - "\nReason ("+e.getClass().getName()+")", e); - } - } - - - return n; - } - - /** - * Removes from the underlying collection the last element returned by the - * iterator (optional operation). This method can be called only once per - * call to next. The behavior of an iterator is unspecified if - * the underlying collection is modified while the iteration is in - * progress in any way other than by calling this method. - * - * @throws UnsupportedOperationException if the remove - * operation is not supported by this Iterator. - * @throws IllegalStateException if the next method has not - * yet been called, or the remove method has already - * been called after the last call to the next - * method. - */ - public void remove() { - throw new UnsupportedOperationException("remove() operation is not supported by RODRecordIterator"); - } - - /** Instantiates appropriate implementation of the ROD used by this iteratot. The 'name' argument is the name - * of the ROD track. - * @param name - * @return - */ - private ROD instantiateROD(final String name) { - try { - return (ROD) named_constructor.newInstance(name); - } catch (Exception e) { - throw new DynamicClassResolutionException(named_constructor.getDeclaringClass(), e); - } - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java deleted file mode 100644 index 5cdb6e9f7..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ /dev/null @@ -1,130 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.*; -import java.lang.reflect.Method; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -/** - * Class for representing arbitrary reference ordered data sets - *

- * User: mdepristo - * Date: Feb 27, 2009 - * Time: 10:47:14 AM - * To change this template use File | Settings | File Templates. - */ -public class ReferenceOrderedData implements Iterable { - private String name; - private File file = null; -// private String fieldDelimiter; - - /** Header object returned from the datum */ -// private Object header = null; - - private Class type = null; // runtime type information for object construction - - /** our log, which we want to capture anything from this class */ - private static Logger logger = Logger.getLogger(ReferenceOrderedData.class); - - /** - * given an existing file, open it and append all the valid triplet lines to an existing list - * - * @param rodTripletList the list of existing triplets - * @param filename the file to attempt to extract ROD triplets from - */ - protected static void extractRodsFromFile(List rodTripletList, String filename) { - BufferedReader str; - try { - str = new BufferedReader(new FileReader(new File(filename))); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(new File(filename), "Unable to load the ROD input file", e); - } - String line = "NO LINES READ IN"; - try { - while ((line = str.readLine()) != null) { - if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim()); - else logger.warn("the following file line didn't parsing into a triplet -> " + line); - } - } catch (IOException e) { - throw new UserException.CouldNotReadInputFile(new File(filename), "Failed reading the input rod file; last line read was " + line, e); - } - } - - - // ---------------------------------------------------------------------- - // - // Constructors - // - // ---------------------------------------------------------------------- - public ReferenceOrderedData(final String name, File file, Class type ) { - this.name = name; - this.file = file; - this.type = type; -// this.header = initializeROD(name, file, type); -// this.fieldDelimiter = newROD(name, type).delimiterRegex(); - } - - public String getName() { return name; } - - public File getFile() { return file; } - - public Class getType() { return type; } - - /** - * Special equals override to see if this ROD is compatible with the given - * name and type. 'Compatible' means that this ROD has the name that's passed - * in and its data can fit into the container specified by type. - * - * @param name Name to check. - * @param type Type to check. - * - * @return True if these parameters imply this rod. False otherwise. - */ - public boolean matches(String name, Class type) { - return this.name.equals(name) && type.isAssignableFrom(this.type); - } - - public Iterator iterator() { - Iterator it; - try { - Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class); - it = (Iterator) m.invoke(null, name, file); - } catch (java.lang.NoSuchMethodException e) { - it = new RODRecordIterator(file,name,type); - } catch (java.lang.NullPointerException e) { - throw new RuntimeException(e); - } catch (java.lang.SecurityException e) { - throw new RuntimeException(e); - } catch (java.lang.IllegalAccessException e) { - throw new RuntimeException(e); - } catch (java.lang.IllegalArgumentException e) { - throw new RuntimeException(e); - } catch (java.lang.reflect.InvocationTargetException e) { - throw new RuntimeException(e); - } - // return new RODIterator(it); - return it; - } - - // ---------------------------------------------------------------------- - // - // Manipulations of all of the data - // - // ---------------------------------------------------------------------- - - public static void write(ArrayList data, File output) throws IOException { - final FileWriter out = new FileWriter(output); - - for (ReferenceOrderedDatum rec : data) { - out.write(rec.repl() + "\n"); - } - - out.close(); - } - - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 1d622e2c7..8ccd9081b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -4,7 +4,7 @@ import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java rename to public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java index 3201769e0..b847fa2e6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.refdata.utils.helpers; +package org.broadinstitute.sting.gatk.refdata.features; import net.sf.samtools.util.SequenceUtil; import org.broad.tribble.annotation.Strand; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java index d12114f9a..a38d45428 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.refdata.features.refseq; import org.broad.tribble.Feature; -import org.broadinstitute.sting.gatk.refdata.Transcript; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/Transcript.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java rename to public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/Transcript.java index b8a0868dd..d8bf12810 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/Transcript.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/Transcript.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.refdata; +package org.broadinstitute.sting.gatk.refdata.features.refseq; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java index 085d6b5b3..85374757d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -12,7 +12,7 @@ import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java deleted file mode 100644 index 731df997d..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/QueryableTrack.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2010. The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata.tracks; - -import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.io.IOException; - -/** - * @author aaron - *

- * Interface QueryableTrack - *

- * a decorator interface for tracks that are queryable - */ -public interface QueryableTrack { - public CloseableIterator query(final GenomeLoc interval) throws IOException; - public CloseableIterator query(final GenomeLoc interval, final boolean contained) throws IOException; - public CloseableIterator query(final String contig, final int start, final int stop) throws IOException; - public CloseableIterator query(final String contig, final int start, final int stop, final boolean contained) throws IOException; - public void close(); -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java rename to public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index 19c91be1b..41e8cf15b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.tracks.builders; +package org.broadinstitute.sting.gatk.refdata.tracks; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; @@ -36,8 +36,6 @@ import org.broad.tribble.util.LittleEndianOutputStream; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -133,10 +131,8 @@ public class RMDTrackBuilder extends PluginManager { * @param fileDescriptor a description of the type of track to build. * * @return an instance of the track - * @throws RMDTrackCreationException - * if we don't know of the target class or we couldn't create it */ - public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException { + public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) { String name = fileDescriptor.getName(); File inputFile = new File(fileDescriptor.getFile()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java deleted file mode 100644 index 29aefacc6..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackCreationException.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2010. The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata.tracks; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; - - -/** - * - * @author aaron - * - * Class RMDTrackCreationException - * - * if we fail for some reason to make a track, throw this exception - */ -public class RMDTrackCreationException extends ReviewedStingException { - public RMDTrackCreationException(String msg) { - super(msg); - } - - public RMDTrackCreationException(String message, Throwable throwable) { - super(message, throwable); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeatureIterator.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeatureIterator.java deleted file mode 100644 index 17c9fa718..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeatureIterator.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2010. The Broad Institute - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata.utils; - -import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; - -import java.util.Iterator; - - -/** - * - * @author aaron - * - * Class GATKFeatureIterator - * - * Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam! - */ -public class GATKFeatureIterator implements CloseableIterator { - private final Iterator iter; - public GATKFeatureIterator(Iterator iter) { - this.iter = iter; - } - - @Override - public boolean hasNext() { - return iter.hasNext(); - } - - @Override - public GATKFeature next() { - return new GATKFeature.RODGATKFeature(iter.next()); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Remove not supported"); - } - - @Override - public void close() { - // do nothing, our underlying iterator doesn't support this - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 508d1f6ee..5db4fb417 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -32,8 +32,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index fdf498a3d..f9b1563b4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.GenomicAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.JoinTable; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index c1956f1d7..708d6bb94 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 2a0338bca..492991a14 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.BadMateFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.SampleUtils; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 443e6e9f2..78ad42ce1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -39,11 +39,11 @@ import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter; import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; -import org.broadinstitute.sting.gatk.refdata.Transcript; +import org.broadinstitute.sting.gatk.refdata.features.refseq.Transcript; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.ReadFilters; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index fec7ee4e6..a34719b18 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index fe3173506..8766ea4f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.Reference; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 0644c669b..044fc6533 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index aa0e5987f..37fd0d547 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -35,9 +35,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index 5095bd6e5..92e339aa1 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -41,7 +41,7 @@ import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.Walker; diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java index ea180d33c..02d2fd0a8 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.queue.extensions.gatk; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.WalkerManager; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Walker; diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java index 988240ef9..2bc3fa284 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.utils.interval; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; -import org.broadinstitute.sting.gatk.refdata.utils.StringToGenomeLocIteratorAdapter; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java b/public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java rename to public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java index fc7f7c58f..659260345 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java @@ -23,7 +23,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.utils; +package org.broadinstitute.sting.utils.interval; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index f6aa882ad..4ab1c1685 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -27,8 +27,8 @@ package org.broadinstitute.sting.utils.text; import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; -import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.File; diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index b32473b9d..cb156b682 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -3,13 +3,13 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java index 5b0d67e88..bd4f93d24 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.datasources.rmd; import org.broadinstitute.sting.commandline.Tags; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java deleted file mode 100644 index fa20ea913..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java +++ /dev/null @@ -1,48 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.testng.Assert; -import org.broadinstitute.sting.BaseTest; - -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.List; - - -/** - * - * @author aaron - * - * Class ReferenceOrderedDataUnitTest - * - * some functionality to test parts of the reference ordered data system that I've added. This is by NO MEANS - * a complete test suite, but additions would be extremely welcome - */ -public class ReferenceOrderedDataUnitTest extends BaseTest { - @Test - public void extractRodsFromFileTest() { - String file = validationDataLocation + "testRODFileImpl.csv"; - List lst = new ArrayList(); - ReferenceOrderedData.extractRodsFromFile(lst,file); - Assert.assertEquals(lst.size(), 6); - int index = 0; - for (String entry: lst) { - String first = entry.subSequence(0,entry.indexOf(",")).toString(); - Assert.assertTrue(first.equals("rod" + String.valueOf(++index))); - } - } - @Test - public void extractRodsFromMultiFileTest() { - String file = validationDataLocation + "testRODFileImpl.csv"; - String file2 = validationDataLocation + "testRODFileImpl2.csv"; - List lst = new ArrayList(); - ReferenceOrderedData.extractRodsFromFile(lst,file); - ReferenceOrderedData.extractRodsFromFile(lst,file2); - Assert.assertEquals(lst.size(), 12); - int index = 0; - for (String entry: lst) { - String first = entry.subSequence(0,entry.indexOf(",")).toString(); - Assert.assertTrue(first.equals("rod" + String.valueOf(++index))); - } - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java similarity index 98% rename from public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java rename to public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java index e475e732d..70d2e7a85 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/RMDTrackBuilderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java @@ -21,13 +21,14 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.tracks.builders; +package org.broadinstitute.sting.gatk.refdata.tracks; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.Tribble; import org.broad.tribble.index.Index; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.exceptions.UserException; From ebfd8df06c4e6b9532c43b2a114364c0f7310bee Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 25 Jul 2011 13:25:30 -0400 Subject: [PATCH 011/186] Restoring accidentially deleted unit test --- .../refdata/ReferenceOrderedDataUnitTest.java | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java new file mode 100644 index 000000000..fa20ea913 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java @@ -0,0 +1,48 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.testng.Assert; +import org.broadinstitute.sting.BaseTest; + +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + + +/** + * + * @author aaron + * + * Class ReferenceOrderedDataUnitTest + * + * some functionality to test parts of the reference ordered data system that I've added. This is by NO MEANS + * a complete test suite, but additions would be extremely welcome + */ +public class ReferenceOrderedDataUnitTest extends BaseTest { + @Test + public void extractRodsFromFileTest() { + String file = validationDataLocation + "testRODFileImpl.csv"; + List lst = new ArrayList(); + ReferenceOrderedData.extractRodsFromFile(lst,file); + Assert.assertEquals(lst.size(), 6); + int index = 0; + for (String entry: lst) { + String first = entry.subSequence(0,entry.indexOf(",")).toString(); + Assert.assertTrue(first.equals("rod" + String.valueOf(++index))); + } + } + @Test + public void extractRodsFromMultiFileTest() { + String file = validationDataLocation + "testRODFileImpl.csv"; + String file2 = validationDataLocation + "testRODFileImpl2.csv"; + List lst = new ArrayList(); + ReferenceOrderedData.extractRodsFromFile(lst,file); + ReferenceOrderedData.extractRodsFromFile(lst,file2); + Assert.assertEquals(lst.size(), 12); + int index = 0; + for (String entry: lst) { + String first = entry.subSequence(0,entry.indexOf(",")).toString(); + Assert.assertTrue(first.equals("rod" + String.valueOf(++index))); + } + } +} From 2a51543693691f75bd4145675f5df03cea0f8ff7 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 25 Jul 2011 13:27:42 -0400 Subject: [PATCH 012/186] Actually should have been gone... --- .../refdata/ReferenceOrderedDataUnitTest.java | 48 ------------------- 1 file changed, 48 deletions(-) delete mode 100644 public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java deleted file mode 100644 index fa20ea913..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataUnitTest.java +++ /dev/null @@ -1,48 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.testng.Assert; -import org.broadinstitute.sting.BaseTest; - -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.List; - - -/** - * - * @author aaron - * - * Class ReferenceOrderedDataUnitTest - * - * some functionality to test parts of the reference ordered data system that I've added. This is by NO MEANS - * a complete test suite, but additions would be extremely welcome - */ -public class ReferenceOrderedDataUnitTest extends BaseTest { - @Test - public void extractRodsFromFileTest() { - String file = validationDataLocation + "testRODFileImpl.csv"; - List lst = new ArrayList(); - ReferenceOrderedData.extractRodsFromFile(lst,file); - Assert.assertEquals(lst.size(), 6); - int index = 0; - for (String entry: lst) { - String first = entry.subSequence(0,entry.indexOf(",")).toString(); - Assert.assertTrue(first.equals("rod" + String.valueOf(++index))); - } - } - @Test - public void extractRodsFromMultiFileTest() { - String file = validationDataLocation + "testRODFileImpl.csv"; - String file2 = validationDataLocation + "testRODFileImpl2.csv"; - List lst = new ArrayList(); - ReferenceOrderedData.extractRodsFromFile(lst,file); - ReferenceOrderedData.extractRodsFromFile(lst,file2); - Assert.assertEquals(lst.size(), 12); - int index = 0; - for (String entry: lst) { - String first = entry.subSequence(0,entry.indexOf(",")).toString(); - Assert.assertTrue(first.equals("rod" + String.valueOf(++index))); - } - } -} From a00e3023218a8034bb1e4f7d494e9e9942f83b48 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Mon, 25 Jul 2011 14:31:40 -0400 Subject: [PATCH 015/186] Fix formatting issue. --- build.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/build.xml b/build.xml index 874a003eb..295cd95e1 100644 --- a/build.xml +++ b/build.xml @@ -489,6 +489,7 @@ + From af0b5883c3cf9141d413e9f61b3eea21c12c1991 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Mon, 25 Jul 2011 14:36:12 -0400 Subject: [PATCH 016/186] In unstable, add a reference to DocumentedGATKFeature to vcf.jar, which is now a static dependency of UserExceptions. --- build.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/build.xml b/build.xml index fb485f0e5..81e24f58f 100644 --- a/build.xml +++ b/build.xml @@ -521,6 +521,7 @@ + From 3afcb3415d47d9450623ce810ccb1d338795dbad Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 25 Jul 2011 14:58:31 -0400 Subject: [PATCH 018/186] Max of 1000 records will be loaded and compared to avoid heap size problem. --- .../sting/gatk/walkers/diffengine/DiffEngine.java | 6 +++--- public/java/test/org/broadinstitute/sting/MD5DB.java | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 5f8f19892..4a4f6f6af 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -342,12 +342,12 @@ public class DiffEngine { return reader.readFromFile(file, maxElementsToRead); } - public static boolean simpleDiffFiles(File masterFile, File testFile, DiffEngine.SummaryReportParams params) { + public static boolean simpleDiffFiles(File masterFile, File testFile, int maxElementsToRead, DiffEngine.SummaryReportParams params) { DiffEngine diffEngine = new DiffEngine(); if ( diffEngine.canRead(masterFile) && diffEngine.canRead(testFile) ) { - DiffElement master = diffEngine.createDiffableFromFile(masterFile); - DiffElement test = diffEngine.createDiffableFromFile(testFile); + DiffElement master = diffEngine.createDiffableFromFile(masterFile, maxElementsToRead); + DiffElement test = diffEngine.createDiffableFromFile(testFile, maxElementsToRead); List diffs = diffEngine.diff(master, test); diffEngine.reportSummarizedDifferences(diffs, params); return true; diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java index bea9eaec5..6f56fce4b 100644 --- a/public/java/test/org/broadinstitute/sting/MD5DB.java +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -47,6 +47,7 @@ public class MD5DB { /** * Subdirectory under the ant build directory where we store integration test md5 results */ + private static final int MAX_RECORDS_TO_READ = 10000; public static final String LOCAL_MD5_DB_DIR = "integrationtests"; public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests"; @@ -232,7 +233,7 @@ public class MD5DB { // inline differences DiffEngine.SummaryReportParams params = new DiffEngine.SummaryReportParams(System.out, 20, 10, 0); - boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), params); + boolean success = DiffEngine.simpleDiffFiles(new File(pathToExpectedMD5File), new File(pathToFileMD5File), MAX_RECORDS_TO_READ, params); if ( success ) System.out.printf("Note that the above list is not comprehensive. At most 20 lines of output, and 10 specific differences will be listed. Please use -T DiffObjects -R public/testdata/exampleFASTA.fasta -m %s -t %s to explore the differences more freely%n", pathToExpectedMD5File, pathToFileMD5File); From a29554e56573c587a0248d726a89c20bf204fc88 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 25 Jul 2011 15:10:25 -0400 Subject: [PATCH 019/186] Removing the Genomic Annotator and its supporting classes --- .../annotator/AnnotatorInputTableCodec.java | 193 --- .../annotator/AnnotatorInputTableFeature.java | 158 --- .../walkers/annotator/VariantAnnotator.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 106 +- .../genomicannotator/GenomicAnnotation.java | 299 ----- .../genomicannotator/GenomicAnnotator.java | 287 ----- .../annotator/genomicannotator/JoinTable.java | 226 ---- .../genomicannotator/JoinTableParser.java | 131 --- .../TranscriptToGenomicInfo.java | 1032 ----------------- .../genotyper/UnifiedGenotyperEngine.java | 5 +- .../walkers/phasing/AnnotateMNPsWalker.java | 890 -------------- .../genomicannotator => utils}/AminoAcid.java | 2 +- .../AminoAcidTable.java | 2 +- .../GenomicAnnotatorIntegrationTest.java | 83 -- 14 files changed, 45 insertions(+), 3371 deletions(-) delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java rename public/java/src/org/broadinstitute/sting/{gatk/walkers/annotator/genomicannotator => utils}/AminoAcid.java (97%) rename public/java/src/org/broadinstitute/sting/{gatk/walkers/annotator/genomicannotator => utils}/AminoAcidTable.java (99%) delete mode 100755 public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java deleted file mode 100755 index 6bba754be..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata.features.annotator; - -import org.apache.log4j.Logger; -import org.broad.tribble.Feature; -import org.broad.tribble.exception.CodecLineParsingException; -import org.broad.tribble.readers.AsciiLineReader; -import org.broad.tribble.readers.LineReader; -import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.StringTokenizer; - -public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec { - - private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class); - - public static final String DELIMITER = "\t"; - - private ArrayList header; - - /** - * The parser to use when resolving genome-wide locations. - */ - private GenomeLocParser genomeLocParser; - - /** - * Set the parser to use when resolving genetic data. - * @param genomeLocParser The supplied parser. - */ - public void setGenomeLocParser(GenomeLocParser genomeLocParser) { - this.genomeLocParser = genomeLocParser; - } - - /** - * Parses the header. - * - * @param reader - * - * @return The # of header lines for this file. - */ - public Object readHeader(LineReader reader) - { - int[] lineCounter = new int[1]; - try { - header = readHeader(reader, lineCounter); - } catch(IOException e) { - throw new IllegalArgumentException("Unable to read from file.", e); - } - return header; - } - - public Class getFeatureType() { - return AnnotatorInputTableFeature.class; - } - - @Override - public Feature decodeLoc(String line) { - StringTokenizer st = new StringTokenizer(line, DELIMITER); - if ( st.countTokens() < 1 ) - throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line); - - GenomeLoc loc; - String chr = st.nextToken(); - if ( chr.indexOf(":") != -1 ) { - loc = genomeLocParser.parseGenomeLoc(chr); - } else { - if ( st.countTokens() < 3 ) - throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line); - loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken())); - } - return new AnnotatorInputTableFeature(loc.getContig(), loc.getStart(), loc.getStop()); - } - - - /** - * Parses the line into an AnnotatorInputTableFeature object. - * - * @param line - */ - public AnnotatorInputTableFeature decode(String line) { - final ArrayList header = this.header; //optimization - final ArrayList values = Utils.split(line, DELIMITER, header.size()); - - if ( values.size() != header.size()) { - throw new CodecLineParsingException(String.format("Encountered a line that has %d columns while the header has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size())); - } - - final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header); - for ( int i = 0; i < header.size(); i++ ) { - feature.putColumnValue(header.get(i), values.get(i)); - } - - GenomeLoc loc; - if ( values.get(0).indexOf(":") != -1 ) - loc = genomeLocParser.parseGenomeLoc(values.get(0)); - else - loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2))); - - //parse the location - feature.setChr(loc.getContig()); - feature.setStart((int)loc.getStart()); - feature.setEnd((int)loc.getStop()); - - return feature; - } - - /** - * Returns the header. - * @param source - * @return - * @throws IOException - */ - public static ArrayList readHeader(final File source) throws IOException { - FileInputStream is = new FileInputStream(source); - try { - return readHeader(new AsciiLineReader(is), null); - } finally { - is.close(); - } - } - - - /** - * Returns the header, and also sets the 2nd arg to the number of lines in the header. - * @param source - * @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header. - * @return The header fields. - * @throws IOException - */ - private static ArrayList readHeader(final LineReader source, int[] lineCounter) throws IOException { - - ArrayList header = null; - int numLines = 0; - - //find the 1st line that's non-empty and not a comment - String line = null; - while( (line = source.readLine()) != null ) { - numLines++; - if ( line.trim().isEmpty() || line.startsWith("#") ) { - continue; - } - - //parse the header - header = Utils.split(line, DELIMITER); - break; - } - - // check that we found the header - if ( header == null ) { - throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty."); - } - - if(lineCounter != null) { - lineCounter[0] = numLines; - } - - logger.debug(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header))); - - return header; - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java deleted file mode 100755 index d12badd28..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.refdata.features.annotator; - -import org.broad.tribble.Feature; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -/** - * This class represents a single record in an AnnotatorInputTable. - */ -public class AnnotatorInputTableFeature implements Feature { - - private ArrayList columnNames; - private HashMap columnValues; //maps colum names to column values - - private String chr; - private int start; - private int end; - private String strRep = null; - - /** - * Constructor. - * @param chr The chromosome name. - * @param start The start position - * @param end The end position - */ - public AnnotatorInputTableFeature(String chr, int start, int end) { - this.chr = chr; - this.start = start; - this.end = end; - } - - - /** - * Constructor. - * @param columnNames The column names as parsed out of the file header. - */ - public AnnotatorInputTableFeature(ArrayList columnNames) { - this.columnNames = columnNames; - this.columnValues = new HashMap(); - } - - - - /** - * @return the list of column names from the file header. - */ - public ArrayList getHeader() { - return columnNames; - } - - - /** - * Returns the value of the given column. - * - * @param columnName The column name as it appears in the file header. - * @return The value - */ - public String getColumnValue(final String columnName) { - return columnValues.get(columnName); - } - - - public boolean containsColumnName(final String columnName) { - return columnValues.containsKey(columnName); - } - - - /** - * Sets the value for the given column. - * - * @param columnName The column name as it appears in the file header. - * @param value The value - * @return The existing value associated with the columnName, if there is one. - */ - protected String putColumnValue(final String columnName, final String value) { - return columnValues.put(columnName, value); - } - - /** - * @return all values in this line, hashed by their column names. - */ - public Map getColumnValues() { - return Collections.unmodifiableMap(columnValues); - } - - - public String getChr() { - return chr; - } - - public int getStart() { - return start; - } - - public int getEnd() { - return end; - } - - protected void setChr(String chr) { - this.chr = chr; - } - - protected void setStart(int start) { - this.start = start; - } - - protected void setEnd(int end) { - this.end = end; - } - - @Override - public String toString() { - if ( strRep == null ) { - StringBuilder sb = new StringBuilder(); - - for(String columnName : columnNames ) { - if ( sb.length() == 0 ) - sb.append("["); - else - sb.append(", "); - sb.append(columnName + "=" + columnValues.get(columnName)); - } - sb.append("]"); - - strRep = sb.toString(); - } - - return strRep; - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index acbeee3b2..caaa371a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -219,7 +219,7 @@ public class VariantAnnotator extends RodWalker { if ( stratifiedContexts != null ) { annotatedVCs = new ArrayList(VCs.size()); for ( VariantContext vc : VCs ) - annotatedVCs.addAll(engine.annotateContext(tracker, ref, stratifiedContexts, vc)); + annotatedVCs.add(engine.annotateContext(tracker, ref, stratifiedContexts, vc)); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index fdf498a3d..0d1b21499 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -31,8 +31,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; -import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.GenomicAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.JoinTable; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; @@ -45,7 +43,6 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -import java.util.Map.Entry; public class VariantAnnotatorEngine { @@ -58,19 +55,6 @@ public class VariantAnnotatorEngine { private HashMap dbAnnotations = new HashMap(); - // command-line option from GenomicAnnotator. - private Map> requestedColumnsMap; - - // command-line option from GenomicAnnotator. - private boolean oneToMany; - - // command-line option from GenomicAnnotator. - private List joinTables; - - // used by GenomicAnnotator. Maps binding name to number of output VCF records - // annotated with records from the input table with this binding name. Only used for - // printing out stats at the end. - private Map inputTableHitCounter = new HashMap(); private static class VAExpression { public String fullName, bindingName, fieldName; @@ -140,7 +124,7 @@ public class VariantAnnotatorEngine { return descriptions; } - public Collection annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public VariantContext annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { Map infoAnnotations = new LinkedHashMap(vc.getAttributes()); @@ -150,42 +134,18 @@ public class VariantAnnotatorEngine { // annotate expressions where available annotateExpressions(tracker, ref, infoAnnotations); - // process the info field - List> infoAnnotationOutputsList = new LinkedList>(); //each element in infoAnnotationOutputs corresponds to a single line in the output VCF file - infoAnnotationOutputsList.add(new LinkedHashMap(vc.getAttributes())); //keep the existing info-field annotations. After this infoAnnotationOutputsList.size() == 1, which means the output VCF file has 1 additional line. - infoAnnotationOutputsList.get(0).putAll(infoAnnotations); // put the DB membership info in - // go through all the requested info annotationTypes - for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) - { + for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) { Map annotationsFromCurrentType = annotationType.annotate(tracker, ref, stratifiedContexts, vc); - if ( annotationsFromCurrentType == null ) { - continue; - } - - if(annotationType instanceof GenomicAnnotation) - { - infoAnnotationOutputsList = processGenomicAnnotation( infoAnnotationOutputsList, annotationsFromCurrentType ); - } - else - { - // add the annotations to each output line. - for(Map infoAnnotationOutput : infoAnnotationOutputsList) { - infoAnnotationOutput.putAll(annotationsFromCurrentType); - } - } + if ( annotationsFromCurrentType != null ) + infoAnnotations.putAll(annotationsFromCurrentType); } - // annotate genotypes - Map genotypes = annotateGenotypes(tracker, ref, stratifiedContexts, vc); + // generate a new annotated VC + final VariantContext annotatedVC = VariantContext.modifyAttributes(vc, infoAnnotations); - // create a separate VariantContext (aka. output line) for each element in infoAnnotationOutputsList - Collection returnValue = new LinkedList(); - for(Map infoAnnotationOutput : infoAnnotationOutputsList) { - returnValue.add( new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, infoAnnotationOutput) ); - } - - return returnValue; + // annotate genotypes, creating another new VC in the process + return VariantContext.modifyGenotypes(annotatedVC, annotateGenotypes(tracker, ref, stratifiedContexts, vc)); } private void annotateDBs(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc, Map infoAnnotations) { @@ -251,6 +211,9 @@ public class VariantAnnotatorEngine { return genotypes; } + +/* + // Finish processing data from GenomicAnnotation. private List> processGenomicAnnotation( List> infoAnnotationOutputsList, Map annotationsForCurrentLocusFromAllAnnotatorInputTables) { @@ -403,12 +366,14 @@ public class VariantAnnotatorEngine { incrementStatsCounter(bindingName, infoAnnotationOutputsList.size()); } - /** + */ +/** * Records statistics that will be printed when GenomicAnnotator finishes. * * @param bindingName The table from which annotations were gotten * @param numNewRecords The number of new output VCF records created with annotations from this table - */ + *//* + private void incrementStatsCounter( final String bindingName, int numNewRecords) { //record some stats - there were infoAnnotationOutputsList.size() output VCF records annotated with data from the 'bindingName' input table. Integer counter = inputTableHitCounter.get(bindingName); @@ -453,13 +418,15 @@ public class VariantAnnotatorEngine { } - /** + */ +/** * Records statistics for the explodeInfoAnnotationOutputsList(..) calculation. * @param bindingName The table from which annotations were gotten * @param numNewVCFRecordsAnnotatedWithBindingNameData The number of new output VCF records created with annotations from this table * @param infoAnnotationOutputsList output list * @param matchingRecordsSize matching records size - */ + *//* + private void recordStats( final String bindingName, int numNewVCFRecordsAnnotatedWithBindingNameData, final List> infoAnnotationOutputsList, int matchingRecordsSize ) { //update stats for the 'bindingName' table @@ -509,13 +476,14 @@ public class VariantAnnotatorEngine { } - /** + */ +/** * Determines whether to exclude the given column from the annotations. * @param key The fully qualified columnName * @return Whether the -S arg specifies that this column should be included in the annotations. * - * TODO this function can be optimized through memoization - */ + *//* + private boolean isKeyFilteredOutBySelectArg(String key) { for(final String bindingName : requestedColumnsMap.keySet()) { @@ -536,10 +504,8 @@ public class VariantAnnotatorEngine { return false; //the -S arg doesn't have anything with the same binding name as this key, so the user implicitly requested this key } - - - - /** + */ +/** * Determines how the engine will handle the case where multiple records in a ROD file * overlap a particular single locus. If oneToMany is set to true, the output will be * one-to-many, so that each locus in the input VCF file could result in multiple @@ -551,18 +517,21 @@ public class VariantAnnotatorEngine { * See class-level comments for more details. * * @param oneToMany true if we should break out from one to many - */ + *//* + public void setOneToMany(boolean oneToMany) { this.oneToMany = oneToMany; } - /** + */ +/** * Sets the columns that will be used for the info annotation field. * Column names should be of the form bindingName.columnName (eg. dbsnp.avHet). * * @param columns An array of strings where each string is a comma-separated list * of columnNames (eg ["dbsnp.avHet,dbsnp.valid", "file2.col1,file3.col1"] ). - */ + *//* + public void setRequestedColumns(String[] columns) { if(columns == null) { throw new IllegalArgumentException("columns arg is null. Please check the -s command-line arg."); @@ -574,17 +543,20 @@ public class VariantAnnotatorEngine { } - /** + */ +/** * Passes in a pointer to the JoinTables. * * @param joinTables The list of JoinTables. There should be one JoinTable object for each -J arg. - */ + *//* + public void setJoinTables(List joinTables) { this.joinTables = joinTables; } - /** + */ +/** * Parses the columns arg and returns a Map of columns hashed by their binding name. * For example: * The command line: @@ -604,7 +576,8 @@ public class VariantAnnotatorEngine { * @param columnsArg The -s command line arg value. * * @return Map representing a parsed version of this arg - see above. - */ + *//* + private static Map> parseColumnsArg(String[] columnsArg) { Map> result = new HashMap>(); @@ -635,5 +608,6 @@ public class VariantAnnotatorEngine { return Collections.unmodifiableMap(inputTableHitCounter); } +*/ } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java deleted file mode 100644 index 0e8360484..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.*; -import java.util.Map.Entry; - -/** - * This plugin for {@link VariantAnnotatorEngine} serves as the core - * of the {@link GenomicAnnotator}. It finds all records in the -B input files - * that match the given variant's position and, optionally, the variant's reference and alternate alleles. - * - * For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator - */ -public class GenomicAnnotation extends InfoFieldAnnotation { - - public static final String CHR_COLUMN = "chr"; - public static final String START_COLUMN = "start"; - public static final String END_COLUMN = "end"; - public static final String HAPLOTYPE_REFERENCE_COLUMN = "haplotypeReference"; - public static final String HAPLOTYPE_ALTERNATE_COLUMN = "haplotypeAlternate"; - - public static final String NUM_MATCHES_SPECIAL_INFO_FIELD = "numMatchingRecords"; - - /** Characters that aren't allowed within VCF info field key-value pairs */ - public static final char[] ILLEGAL_INFO_FIELD_VALUES = { ' ', '=', ';' }; - /** Replacement for each character in ILLEGAL_INFO_FIELD_VALUES */ - public static final char[] ILLEGAL_INFO_FIELD_VALUE_SUBSTITUTES = { '_', '-', '!' }; - - - private void modifyAnnotationsForIndels(VariantContext vc, String featureName, Map annotationsForRecord) { - String inCodingRegionKey = featureName + ".inCodingRegion"; - String referenceCodonKey = featureName + ".referenceCodon"; - String variantCodonKey = featureName + ".variantCodon"; - String codingCoordStrKey = featureName + ".codingCoordStr"; - String proteinCoordStrKey = featureName + ".proteinCoordStr"; - String haplotypeReferenceKey = featureName + "." + HAPLOTYPE_REFERENCE_COLUMN; - String haplotypeAlternateKey = featureName + "." + HAPLOTYPE_ALTERNATE_COLUMN; - String functionalClassKey = featureName + ".functionalClass"; - String startKey = featureName + "." + START_COLUMN; - String endKey = featureName + "." + END_COLUMN; - String referenceAAKey = featureName + ".referenceAA"; - String variantAAKey = featureName + ".variantAA"; - String changesAAKey = featureName + ".changesAA"; - - annotationsForRecord.put(variantCodonKey, "unknown"); - annotationsForRecord.put(codingCoordStrKey, "unknown"); - annotationsForRecord.put(proteinCoordStrKey, "unknown"); - annotationsForRecord.put(referenceAAKey, "unknown"); - annotationsForRecord.put(variantAAKey, "unknown"); - - String refAllele = vc.getReference().getDisplayString(); - if (refAllele.length() == 0) { refAllele = "-"; } - - String altAllele = vc.getAlternateAllele(0).toString(); - if (altAllele.length() == 0) { altAllele = "-"; } - - annotationsForRecord.put(haplotypeReferenceKey, refAllele); - annotationsForRecord.put(haplotypeAlternateKey, altAllele); - annotationsForRecord.put(startKey, String.format("%d", vc.getStart())); - annotationsForRecord.put(endKey, String.format("%d", vc.getEnd())); - - boolean isCodingRegion = annotationsForRecord.containsKey(inCodingRegionKey) && annotationsForRecord.get(inCodingRegionKey).equalsIgnoreCase("true") ? true : false; - boolean isFrameshift = (vc.getIndelLengths().get(0) % 3 == 0) ? false : true; - - String functionalClass; - if (isCodingRegion) { - functionalClass = isFrameshift ? "frameshift" : "inframe"; - annotationsForRecord.put(changesAAKey, "true"); - } else { - functionalClass = "noncoding"; - } - - annotationsForRecord.put(functionalClassKey, functionalClass); - } - - /** - * For each -B input file, for each record which overlaps the current locus, generates a - * set of annotations of the form: - * - * bindingName.columnName1=columnValue, bindingName.columnName2=columnValue2, etc. - * - * For example: dbSNP.avHet=0.7, dbSNP.ref_allele=A, etc. - * - * @return The following is an explanation of this method's return value: - * - * The annotations from a matching in a particular file are stored in a Map - * where the key is bindingName.columnName and the value is the columnValue. - * Since a single input file can have multiple records that overlap the current - * locus (eg. dbSNP can have multiple entries for the same genomic position), a different - * Map is created for each matching record in a particular file. - * The set of matching records for each file is then represented as a List> - * - * The return value of this method is a Map of the form: - * rodName1 -> List> - * rodName2 -> List> - * rodName3 -> List> - * ... - * Where the rodNames are the -B binding names for each file that were specified on the command line (eg. -B bindingName,AnnotatorInputTable,/path/to/file). - * - * NOTE: The lists (List>) are guaranteed to have size > 0 - * because a rodName -> List> entry will only - * be created in Map if the List has at least one element. - */ - public Map annotate(final RefMetaDataTracker tracker, - final ReferenceContext ref, - final Map stratifiedContexts, - final VariantContext vc) { - - //iterate over each record that overlaps the current locus, and, if it passes certain filters, - //add its values to the list of annotations for this locus. - final Map annotations = new HashMap(); - for(final GATKFeature gatkFeature : tracker.getAllRods()) - { - final String name = gatkFeature.getName(); - if( name.equals("variant") || name.equals("interval") ) { - continue; - } - - if( ! (gatkFeature.getUnderlyingObject() instanceof AnnotatorInputTableFeature) ) { - continue; //GenericAnnotation only works with TabularRODs because it needs to be able to select individual columns. - } - - final Map annotationsForRecord = convertRecordToAnnotations( gatkFeature.getName(), ((AnnotatorInputTableFeature) gatkFeature.getUnderlyingObject()).getColumnValues()); - - //If this record contains the HAPLOTYPE_REFERENCE_COLUMN and/or HAPLOTYPE_ALTERNATE_COLUMN, check whether the - //alleles specified match the the variant's reference allele and alternate allele. - //If they don't match, this record will be skipped, and its values will not be used for annotations. - // - //If one of these columns doesn't exist in the current rod, or if its value is * (star), then this is treated as an automatic match. - //Otherwise, the HAPLOTYPE_REFERENCE_COLUMN is only considered to be matching the variant's reference if the string values of the two - //are exactly equal (case-insensitive). - - //The HAPLOTYPE_REFERENCE_COLUMN matches the variant's reference allele based on a case-insensitive string comparison. - //The HAPLOTYPE_ALTERNATE_COLUMN can optionally list more than allele separated by one of these chars: ,\/:| - // only check this value for SNPs - String hapAltValue = vc.isSNP() ? annotationsForRecord.get( generateInfoFieldKey(name, HAPLOTYPE_ALTERNATE_COLUMN) ) : null; - if ( hapAltValue != null && !hapAltValue.equals("*") ) { - Set alternateAlleles = vc.getAlternateAlleles(); - //if(alternateAlleles.isEmpty()) { - //handle a site that has been called monomorphic reference - //alternateAlleles.add(vc.getReference()); - //continue; //TODO If this site is monomorphic in the VC, and the current record specifies a particular alternate allele, skip this record. Right? - //} else - if(alternateAlleles.size() > 1) { - throw new UserException.MalformedFile("File associated with " + vc.getSource() + " contains record [" + vc + "] contains " + alternateAlleles.size() + " alternate alleles. GenomicAnnotion currently only supports annotating 1 alternate allele."); - } - - Allele vcAlt; - if(alternateAlleles.isEmpty()) { - vcAlt = vc.getReference(); - } else { - vcAlt = alternateAlleles.iterator().next(); - } - - boolean matchFound = false; - for(String hapAlt : hapAltValue.split("[,\\\\/:|]")) { - - if(!hapAlt.isEmpty() && vcAlt.basesMatch(hapAlt)) { - matchFound = true; - break; - } - } - if(!matchFound) { - continue; //skip record - none of its alternate alleles match the variant's alternate allele - } - } - - // only check this value for SNPs - String hapRefValue = vc.isSNP() ? annotationsForRecord.get( generateInfoFieldKey(name, HAPLOTYPE_REFERENCE_COLUMN) ) : null; - if(hapRefValue != null) - { - hapRefValue = hapRefValue.trim(); - if(!hapRefValue.equals("*")) - { - //match against hapolotypeReference. - Allele vcRef = vc.getReference(); - if(!vcRef.basesMatch(hapRefValue)) { - continue; //skip record - } - } - } - - if (vc.isIndel()) { - modifyAnnotationsForIndels(vc, name, annotationsForRecord); - } - - //filters passed, so add this record. - List> listOfMatchingRecords = (List>) annotations.get( name ); - if(listOfMatchingRecords == null) { - listOfMatchingRecords = new LinkedList>(); - listOfMatchingRecords.add( annotationsForRecord ); - annotations.put(name, listOfMatchingRecords); - } else { - listOfMatchingRecords.add( annotationsForRecord ); - } - } - - return annotations; - } - - - - - /** - * Converts the given record to a set of key-value pairs of the form: - * bindingName.columnName1=column1Value, bindingName.columnName2=column2Value - * (eg. dbSNP.avHet=0.7, dbSNP.ref_allele=A) - * - * @param record AnnotatorInputTableFeature corresponding to one record in one -B input file. - * @param bindingName The binding name of the given AnnotatorInputTableFeature. - * @return The map of columnName -> columnValue pairs. - */ - public static Map convertRecordToAnnotations( String bindingName, Map record) { - final Map result = new HashMap(); - - for(final Entry entry : record.entrySet()) { - final String value = entry.getValue(); - if(!value.trim().isEmpty()) { - result.put( generateInfoFieldKey(bindingName, entry.getKey()), scrubInfoFieldValue(entry.getValue())); - } - } - - return result; - } - - /** - * Combines the 2 values into a full key. - * @param rodBindingName -B name - * @param columnName column name - * @return info field key - */ - public static String generateInfoFieldKey(String rodBindingName, String columnName ) { - return rodBindingName + '.' + columnName; - } - - - - /** - * Replaces any characters that are not allowed in the info field of a VCF file. - * - * @param value info field value - * @return the value with any illegal characters replaced by legal ones. - */ - private static String scrubInfoFieldValue(String value) { - for(int i = 0; i < GenomicAnnotation.ILLEGAL_INFO_FIELD_VALUES.length; i++) { - value = value.replace(GenomicAnnotation.ILLEGAL_INFO_FIELD_VALUES[i], GenomicAnnotation.ILLEGAL_INFO_FIELD_VALUE_SUBSTITUTES[i]); - } - - return value; - } - - - - public List getDescriptions() { - return Arrays.asList(new VCFInfoHeaderLine("GenericAnnotation", 1, VCFHeaderLineType.Integer, "For each variant in the 'variants' ROD, finds all entries in the other -B files that overlap the variant's position.")); - } - - public List getKeyNames() { - return Arrays.asList("GenericAnnotation"); - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java deleted file mode 100644 index b42310780..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.io.File; -import java.io.IOException; -import java.util.*; -import java.util.Map.Entry; - -/** - * Annotates variant calls with information from user-specified tabular files. - * - * For details, see: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator - */ -@Requires(value={DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type=VariantContext.class)) -@By(DataSource.REFERENCE) -public class GenomicAnnotator extends RodWalker implements TreeReducible { - - @Output(doc="File to which variants should be written",required=true) - protected VCFWriter vcfWriter = null; - - @Argument(fullName="vcfOutput", shortName="vcf", doc="Please use --out instead", required=false) - @Deprecated - protected String oldOutArg; - - @Argument(fullName="sampleName", shortName="sample", doc="The sample (NA-ID) corresponding to the variant input (for non-VCF input only)", required=false) - protected String sampleName = null; - - @Argument(fullName="select", shortName="s", doc="Optionally specifies which subset of columns from which -B inputs should be used for annotations. For example, -B:mydbsnp,AnnotatorInputTable /path/to/mydbsnp.txt -B:mytable,AnnotatorInputTable /path/mytable.txt -s mydbsnp.avHet,mydbsnp.name,mytable.column3 will cause annotations to only be generated from the 3 columns specified using -s.", required=false) - protected String[] SELECT_COLUMNS = {}; - - @Argument(fullName="join", shortName="J", doc="Optionally specifies a file and column within that file that should be LEFT-JOIN'ed to a column in a previously-specified file. The file provided to -J must be tab-delimited, with the first non-comment/non-empty line containing column names. (example: -B:name,AnnotatorInputTable /path/to/file1 -J name2,/path/to/file2,name.columnName=name2.columnName2 - this will join the table in file2 to the table in file1) ", required=false) - protected String[] JOIN_ARGS = {}; - - @Argument(fullName="oneToMany", shortName="m", doc="If more than one record from the same file matches a particular locus (for example, multiple dbSNP records with the same position), create multiple entries in the ouptut VCF file - one for each match. If a particular tabular file has J matches, and another tabular file has K matches for a given locus, then J*K output VCF records will be generated - one for each pair of K, J. If this flag is not provided, the multiple records are still generated, but they are stored in the INFO field of a single output VCF record, with their annotation keys differentiated by appending '_i' with i varying from 1 to K*J. ", required=false) - protected Boolean ONE_TO_MANY = false; - - @Argument(fullName="maxJoinTableSize", shortName="maxJoin", doc="The maximum allowed size (i.e. number of rows) for a table provided with the -J argument", required=false) - protected Integer MAX_JOIN_TABLE_SIZE = 500000; - - @Argument(fullName="ignoreFilteredSites", shortName="noFilt", doc="If specified, don't annotate sites marked as filtered out") - protected Boolean IGNORE_FILTERED_SITES = false; - - private VariantAnnotatorEngine engine; - - /** - * Prepare the output file and the list of available features. - */ - public void initialize() { - - //read all ROD file headers and construct a set of all column names to be used for validation of command-line args - final Set allFullyQualifiedColumnNames = new LinkedHashSet(); - final Set allBindingNames = new LinkedHashSet(); - for(ReferenceOrderedDataSource ds : getToolkit().getRodDataSources()) { - if(! ds.getType().equals(AnnotatorInputTableCodec.class)) { - continue; //skip all non-AnnotatorInputTable files. - } - final String bindingName = ds.getName(); - File file = ds.getFile(); - allBindingNames.add(bindingName); - try { - final ArrayList header = AnnotatorInputTableCodec.readHeader(file); - for(String columnName : header) { - allFullyQualifiedColumnNames.add(bindingName + "." + columnName); - } - } catch(IOException e) { - throw new UserException.CouldNotReadInputFile(file, "Failed when attempting to read file header. ", e); - } - } - - //parse the JOIN_COLUMNS args, read in the specified files, and validate column names in the = relation. This end result of this loop is to populate the List of joinTables with one entry per -J arg. - final List joinTables = new LinkedList(); - for(String joinArg : JOIN_ARGS) { - - //parse the tokens - final String[] arg = joinArg.split(","); - if(arg.length != 3) { - throw new UserException.BadArgumentValue("-J", "The following -J arg: \"" + joinArg + "\" must contain 3 comma-separated values. (ex: -J name,/path/to/file,name.columnName=name2.columnName2)"); - } - final String bindingName = arg[0]; - final String filename = arg[1]; - final String columnsToJoin = arg[2]; - - if(allBindingNames.contains(bindingName)) { - throw new UserException.BadArgumentValue("-J", "The name \"" + bindingName + "\" in the -J arg: \"" + joinArg + "\" has already been used in another binding."); - } - - String[] splitOnEquals = columnsToJoin.split("=+"); - if(splitOnEquals.length != 2) { - throw new UserException.BadArgumentValue("-J", "The -J arg: \"" + joinArg + "\" must specify the columns to join on. (ex: -J name,/path/to/file,name.columnName=name2.columnName2)"); - } - - String[] splitOnDot1 = splitOnEquals[0].split("\\."); - String[] splitOnDot2 = splitOnEquals[1].split("\\."); - if(splitOnDot1.length != 2 || splitOnDot2.length != 2) { - throw new UserException.BadArgumentValue("-J", "The -J arg: \"" + joinArg + "\" must fully specify the columns to join on. (ex: -J name,/path/to/file,name.columnName=name2.columnName2)"); - } - - final String bindingName1 = splitOnDot1[0]; - final String columnName1 = splitOnDot1[1]; - final String bindingName2 = splitOnDot2[0]; - final String columnName2 = splitOnDot2[1]; - - //figure out which of the 2 binding names within the = relation matches the -J bindingName - final String localBindingName = bindingName; //alias - final String localColumnName; - final String externalBindingName; - final String externalColumnName; - if(bindingName1.equals(bindingName)) { - localColumnName = columnName1; - externalBindingName = bindingName2; - externalColumnName = columnName2; - } else if(bindingName2.equals(bindingName)) { - localColumnName = columnName2; - externalBindingName = bindingName1; - externalColumnName = columnName1; - } else { - throw new UserException.BadArgumentValue("-J", "The name \"" + bindingName + "\" in the -J arg: \"" + joinArg + "\" must be specified in one the columns to join on. (ex: -J name,/path/to/file,name.columnName=name2.columnName2)"); - } - - //validate externalColumnName - final String fullyQualifiedExternalColumnName = externalBindingName + '.' + externalColumnName; - if( !allFullyQualifiedColumnNames.contains(fullyQualifiedExternalColumnName) ) { - throw new UserException.BadArgumentValue("-J", "The -J arg: \"" + joinArg + "\" specifies an unknown column name: \"" + fullyQualifiedExternalColumnName + "\""); - } - - //read in the file contents into a JoinTable object - final JoinTable joinTable = new JoinTable(MAX_JOIN_TABLE_SIZE); - joinTable.parseFromFile(filename, localBindingName, localColumnName, externalBindingName, externalColumnName); - joinTables.add(joinTable); - - //validate localColumnName, and add all column names in this file to the list of allFullyQualifiedColumnNames so that they can be referenced from subsequent -J args. - final List columnNames = joinTable.getColumnNames(); - final List fullyQualifiedColumnNames = new LinkedList(); - boolean found = false; - for ( String columnName : columnNames ) { - if ( columnName.equals(localColumnName) ) - found = true; - fullyQualifiedColumnNames.add(localBindingName + '.' + columnName); - } - if ( !found ) - throw new UserException.BadArgumentValue("-J", "The -J arg: \"" + joinArg + "\" specifies an unknown column name: \"" + localColumnName + "\". It's not one of the column names in the header " + columnNames + " of the file: " + filename); - - allFullyQualifiedColumnNames.addAll(fullyQualifiedColumnNames); - } - - //parse the SELECT_COLUMNS arg and validate the column names - List parsedSelectColumns = new LinkedList(); - for ( String token : SELECT_COLUMNS ) - parsedSelectColumns.addAll(Arrays.asList(token.split(","))); - SELECT_COLUMNS = parsedSelectColumns.toArray(SELECT_COLUMNS); - - for ( String columnName : SELECT_COLUMNS ) { - if ( !allFullyQualifiedColumnNames.contains(columnName) ) - throw new UserException.BadArgumentValue("-s", "The column name '" + columnName + "' provided to -s doesn't match any of the column names in any of the -B files. Here is the list of available column names: " + allFullyQualifiedColumnNames); - } - - //instantiate the VariantAnnotatorEngine - ArrayList annotationsToUse = new ArrayList(); - annotationsToUse.add("GenomicAnnotation"); - engine = new VariantAnnotatorEngine(getToolkit(), new ArrayList(), annotationsToUse); - engine.setOneToMany(ONE_TO_MANY); - engine.setRequestedColumns(SELECT_COLUMNS); - engine.setJoinTables(joinTables); - - // set up the header fields - Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant"))); - hInfo.addAll(engine.getVCFAnnotationDescriptions()); - - Set rodName = new HashSet(); - rodName.add("variant"); - Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); - VCFHeader vcfHeader = new VCFHeader(hInfo, samples); - vcfWriter.writeHeader(vcfHeader); - } - - /** - * Initialize the number of loci processed to zero. - * - * @return 0 - */ - public Integer reduceInit() { return 0; } - - /** - * We want reads that span deletions - * - * @return true - */ - public boolean includeReadsWithDeletionAtLoci() { return true; } - - /** - * For each site of interest, annotate based on the requested annotation types - * - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return 1 if the locus was successfully processed, 0 if otherwise - */ - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return 0; - - Set results = new LinkedHashSet(); - for (VariantContext vc : tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false)) { - if ( (vc.isFiltered() && IGNORE_FILTERED_SITES) || - (vc.isVariant() && !vc.isBiallelic()) ) { - results.add(vc); - } else { - Map stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(context); - if ( stratifiedContexts != null ) - results.addAll(engine.annotateContext(tracker, ref, stratifiedContexts, vc)); - else - results.add(vc); - } - } - - for ( VariantContext vc : results ) - vcfWriter.add(vc ,ref.getBase()); - - return 1; - } - - public Integer reduce(Integer value, Integer sum) { - return sum + value; - } - - public Integer treeReduce(Integer lhs, Integer rhs) { - return lhs + rhs; - } - - public void onTraversalDone(Integer sum) { - - //out.printf("Generated %d annotated VCF records.\n", totalOutputVCFRecords); - Map inputTableHitCounter = engine.getInputTableHitCounter(); - for ( Entry e : inputTableHitCounter.entrySet() ) { - final String bindingName = e.getKey(); - final int counter = e.getValue(); - //final float percent = 100 * counter /(float) totalOutputVCFRecords; - //out.printf(" %-6.1f%% (%d) annotated with %s.\n", percent, counter, bindingName ); - System.out.printf(" %d annotated with %s.\n", counter, bindingName ); - } - } -} - diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java deleted file mode 100755 index 714f374cf..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTable.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -/** - * This is a container that holds all data corresponding to a single join table as specified by one -J arg (ex: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2). - * Some terminology: - * 'bindingName' is an arbitrary label for a given table that is specified on the command line with either the -B or -J arg. - * In the example above, bindingName1 is the 'local' binding name because it is attached to the join table file provided with this -J arg. bindingName2 is the 'external' binding name because - * it corresponds to some other table specified previously with another -B or -J arg. - * - * The JoinTable object stores a map entry for each record in the join table. The entry's key is the value of the join column in a given record (eg. bindingName1.columnName in the above example), - * and the entry value is an ArrayList representing the entire join table record. - * The JoinTable object also stores some other join table parameters such as the column names that were parsed out of the file header, and the bindingNames and columnNames from the -J arg. - * - * The join operation is performed by looking up the value of the join column in the external table (the one that this table is being joined to), and then using this value to do a lookup - * on the map - if there's a hit, it will provide the record from the join table that is to be joined with the record in the external table. - * - * More information can be found here: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator - */ -public class JoinTable -{ - //the list of join table column names parsed out of the file header. - private List columnNames; //not fully-qualified - - private String localBindingName; - private String externalBindingName; - private String externalColumnName; - - //stores a map entry for each record in the join table. The entry's key is the value of the join column in a given record (eg. bindingName.columnName in the above example), - //and the entry value is an ArrayList representing the entire join table record. - private HashMap> joinColumnValueToRecords = new HashMap>(); - - private int maxSize; - private boolean parsedFromFile = false; - - public JoinTable(int maxSize) { - this.maxSize = maxSize; - } - - /** - * Parses the table from the given file using the JoinTableParser. - * - * @param filename The file containing the table. - * @param localBindingName The binding name within the given file to join on. - * @param localColumnName The column name within the given file to join on. - * @param externalBindingName The binding name of another file (previously specified with either -B or -J). - * @param externalColumnName The column name in this other file to join on. - */ - public void parseFromFile(String filename, String localBindingName, String localColumnName, String externalBindingName, String externalColumnName) { - if(parsedFromFile) { - throw new ReviewedStingException("parseFromFile(" + filename +", ..) called more than once"); - } - parsedFromFile = true; - - setLocalBindingName(localBindingName); - setExternalBindingName(externalBindingName); - setExternalColumnName(externalColumnName); - - BufferedReader br = null; - try - { - br = new BufferedReader(new FileReader(filename)); - final JoinTableParser parser = new JoinTableParser(); - - //read in the header - columnNames = parser.readHeader(br); - - //get the index of the localJoinColumnName - int localColumnNameIdx = -1; - for(int i = 0; i < columnNames.size(); i++) { - final String columnName = columnNames.get(i); - if(columnName.equals(localColumnName)) { - localColumnNameIdx = i; - break; - } - } - - if(localColumnNameIdx == -1) { - throw new UserException.BadArgumentValue("-J", "The -J arg specifies an unknown column name: \"" + localColumnName + "\". It's not one of the column names in the header " + columnNames + " of the file: " + filename); - } - - //read in all records and create a map entry for each - String line; - while((line = br.readLine()) != null) { - final ArrayList columnValues = parser.parseLine(line); - if ( columnValues.size() < columnNames.size() ) - throw new UserException.BadInput("the file: " + filename + " is malformed as there are not a sufficient number of columns for this line: " + line); - final String joinColumnValue = columnValues.get(localColumnNameIdx); - put(joinColumnValue, columnValues, filename); - } - } - catch(IOException e) - { - throw new UserException.CouldNotReadInputFile(new File(filename), "Unable to parse file", e); - } - finally - { - try { - if(br != null) { - br.close(); - } - } catch(IOException e) { - throw new ReviewedStingException("Unable to close file: " + filename, e); - } - } - } - - /** - * If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2, - * this returns bindingName1. - * @return local binding name - */ - public String getLocalBindingName() { - return localBindingName; - } - - public void setLocalBindingName(String localBindingName) { - this.localBindingName = localBindingName; - } - - /** - * @return the list of join table column names parsed out of the file header. - */ - public List getColumnNames() { - return columnNames; //not fully-qualified - } - - protected void setColumnNames(List columnNames) { - this.columnNames = columnNames; - } - - /** - * If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2, - * this returns columnName2. - * @return external column name - */ - public String getExternalColumnName() { - return externalColumnName; - } - - protected void setExternalColumnName( - String externalColumnName) { - this.externalColumnName = externalColumnName; - } - - /** - * If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2, - * this returns bindingName2. - * @return external binding name - */ - public String getExternalBindingName() { - return externalBindingName; - } - - protected void setExternalBindingName( - String externalBindingName) { - this.externalBindingName = externalBindingName; - } - - /** - * Whether any join table records have the given value in the join column. - * @param joinColumnValue value - * @return true if the given name value exists in the file - */ - public boolean containsJoinColumnValue(String joinColumnValue) { - return joinColumnValueToRecords.containsKey(joinColumnValue); - } - - /** - * Returns all records in the table where the join column has the given value. - * @param joinColumnValue column value - * @return row - */ - public ArrayList get(String joinColumnValue) { - return joinColumnValueToRecords.get(joinColumnValue); - } - - /** - * Adds the given record to the map. - * @param joinColumnValue value - * @param record row - * @param filename the source file name - */ - protected void put(String joinColumnValue, ArrayList record, String filename) { - if ( joinColumnValueToRecords.containsKey(joinColumnValue) ) - throw new UserException.BadInput("the file " + filename + " contains non-unique entries for the requested column, which isn't allowed."); - joinColumnValueToRecords.put(joinColumnValue, record); - if ( joinColumnValueToRecords.size() > maxSize ) - throw new UserException.BadInput("the file " + filename + " contains more than the maximum number (" + maxSize + ") of allowed rows (see the --maxJoinTableSize argument)."); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java deleted file mode 100755 index 3b6c87f90..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/JoinTableParser.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Used to parse files passed to the GenomicAnnotator via the -J arg. - * The files must be tab-delimited, and the first non-empty/non-commented line - * must be a header containing column names. - * - * More information can be found here: http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator - */ -public class JoinTableParser -{ - public static final String DELIMITER = "\t"; - - private List header; //column names parsed out of the header line - - - /** - * Constructor. - */ - public JoinTableParser() {} - - /** - * Returns the header and returns it. - * @param br source - * @return column names - * @throws IOException on read - */ - public List readHeader(BufferedReader br) throws IOException - { - if(header != null) { - throw new ReviewedStingException("readHeader(..) called more than once. Header is currently set to: " + header); - } - - header = Collections.unmodifiableList(parseHeader(br)); - - return header; - } - - - /** - * @return A list containing the column names. - */ - public List getHeader() { - return header; - } - - - /** - * Parses the line into an ArrayList containing the values for each column. - * - * @param line to parse - * @return tokens - */ - public ArrayList parseLine(String line) { - - final ArrayList values = Utils.split(line, DELIMITER, header.size()); - - if ( values.size() != header.size() ) { - throw new UserException.MalformedFile(String.format("Encountered a row with %d columns which is different from the number or columns in the header: %d\nHeader: " + header + "\nLine: " + values, values.size(), header.size())); - } - - return values; - } - - - /** - * Returns the header. - * @param br The file to read. - * @return ArrayList containing column names from the header. - * @throws IOException on reading - */ - public static ArrayList parseHeader(final BufferedReader br) throws IOException - { - ArrayList header = null; - - //find the 1st line that's non-empty and not a comment - String line; - while( (line = br.readLine()) != null ) { - line = line.trim(); - if ( line.isEmpty() || line.startsWith("#") ) { - continue; - } - - //parse the header - header = Utils.split(line, DELIMITER); - break; - } - - // check that header was found - if ( header == null ) { - throw new IllegalArgumentException("No header in " + br + ". All lines are either comments or empty."); - } - - return header; - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java deleted file mode 100755 index 0bbfa51b4..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ /dev/null @@ -1,1032 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; -import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; -import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.IOException; -import java.io.PrintStream; -import java.util.*; - -/** - * Takes a table of transcripts (eg. UCSC refGene, knownGene, and CCDS tables) and generates the big table which contains - * annotations for each possible variant at each transcript position (eg. 4 variants at each genomic position). - * - * Required args: - * -B - specifies the input file (ex. -B transcripts,AnnotatorInputTable,/path/to/transcript_table_file.txt) - * -n - Specifies which column(s) from the transcript table contain the gene name(s). (ex. -n name,name2 (for the UCSC refGene table)) - * WARNING: The gene names for each record, when taken together, should provide a unique id for that record relative to all other records in the file. - * - * - * The map & reduce types are both TreeMap. - * Each TreeMap entry represents one line in the output file. The TreeMap key is a combination of a given output line's position (so that this key can be used to sort all output lines - * by reference order), as well as allele and gene names (so that its unique across all output lines). The String value is the output line itself. - */ -@Reference(window=@Window(start=-4,stop=4)) -@By(DataSource.REFERENCE) -@Requires(value={DataSource.REFERENCE}, referenceMetaData={ @RMD(name=TranscriptToGenomicInfo.ROD_NAME,type=AnnotatorInputTableFeature.class) } ) -public class TranscriptToGenomicInfo extends RodWalker { - public static final String ROD_NAME = "transcripts"; - - //@Argument(fullName="pass-through", shortName="t", doc="Optionally specifies which columns from the transcript table should be copied verbatim (aka. passed-through) to the records in the output table. For example, -B transcripts,AnnotatorInputTable,/data/refGene.txt -t id will cause the refGene id column to be copied to the output table.", required=false) - //protected String[] PASS_THROUGH_COLUMNS = {}; - - @Output - private PrintStream out; - - @Argument(fullName="unique-gene-name-columns", shortName="n", doc="Specifies which column(s) from the transcript table contains the gene name(s). For example, -B transcripts,AnnotatorInputTable,/data/refGene.txt -n name,name2 specifies that the name and name2 columns are gene names. WARNING: the gene names for each record, when taken together, should provide a unique id for that record relative to all other records in the file. If this is not the case, an error will be thrown. ", required=true) - private String[] GENE_NAME_COLUMNS = {}; - - private final char[] ALLELES = {'A','C','G','T'}; - - /** Output columns */ - private static final String[] GENOMIC_ANNOTATION_COLUMNS = { - GenomicAnnotation.CHR_COLUMN, - GenomicAnnotation.START_COLUMN, - GenomicAnnotation.END_COLUMN, - GenomicAnnotation.HAPLOTYPE_REFERENCE_COLUMN, - GenomicAnnotation.HAPLOTYPE_ALTERNATE_COLUMN}; - - private static final String OUTPUT_TRANSCRIPT_STRAND = "transcriptStrand"; //rg. +/- - private static final String OUTPUT_IN_CODING_REGION = "inCodingRegion"; //eg. true - private static final String OUTPUT_FRAME = "frame"; //eg. 0,1,2 - private static final String OUTPUT_POSITION_TYPE = "positionType"; //eg. utr5, cds, utr3, intron, intergenic - private static final String OUTPUT_MRNA_COORD = "mrnaCoord"; //1-based offset within the transcript - private static final String OUTPUT_SPLICE_DISTANCE = "spliceDist"; //eg. integer, bp to nearest exon/intron boundary - private static final String OUTPUT_CODON_NUMBER = "codonCoord"; //eg. 20 - private static final String OUTPUT_REFERENCE_CODON = "referenceCodon"; - private static final String OUTPUT_REFERENCE_AA = "referenceAA"; - private static final String OUTPUT_VARIANT_CODON = "variantCodon"; - private static final String OUTPUT_VARIANT_AA = "variantAA"; - private static final String OUTPUT_CHANGES_AMINO_ACID = "changesAA"; //eg. true - private static final String OUTPUT_FUNCTIONAL_CLASS = "functionalClass"; //eg. missense - private static final String OUTPUT_CODING_COORD_STR = "codingCoordStr"; - private static final String OUTPUT_PROTEIN_COORD_STR = "proteinCoordStr"; - private static final String OUTPUT_SPLICE_INFO = "spliceInfo"; //(eg "splice-donor -4", or "splice-acceptor 3") for the 10bp surrounding each exon/intron boundary - private static final String OUTPUT_UORF_CHANGE = "uorfChange"; // (eg +1 or -1, indicating the addition or interruption of an ATG trinucleotide in the annotated utr5) - private static final String[] TRANSCRIPT_COLUMNS = { - OUTPUT_TRANSCRIPT_STRAND, - OUTPUT_POSITION_TYPE, - OUTPUT_FRAME, - OUTPUT_MRNA_COORD, - OUTPUT_CODON_NUMBER, - OUTPUT_SPLICE_DISTANCE, - OUTPUT_REFERENCE_CODON, - OUTPUT_REFERENCE_AA, - OUTPUT_VARIANT_CODON, - OUTPUT_VARIANT_AA, - OUTPUT_CHANGES_AMINO_ACID, - OUTPUT_FUNCTIONAL_CLASS, - OUTPUT_CODING_COORD_STR, - OUTPUT_PROTEIN_COORD_STR, - OUTPUT_IN_CODING_REGION, - OUTPUT_SPLICE_INFO, - OUTPUT_UORF_CHANGE }; - - //This list specifies the order of output columns in the big table. - private final List outputColumnNames = new LinkedList(); - - private int transcriptsProcessedCounter = 0; - - private long transcriptsThatDontStartWithMethionineOrEndWithStopCodonCounter = 0; - private long transcriptsThatDontStartWithMethionineCounter = 0; - private long transcriptsThatDontEndWithStopCodonCounter = 0; - private long skippedTranscriptCounter = 0; - - private long skippedPositionsCounter = 0; - private long totalPositionsCounter = 0; - - /** Possible values for the "POSITION_TYPE" output column. */ - private enum PositionType { - intergenic, intron, utr5, CDS, utr3, non_coding_exon, non_coding_intron - } - - /** - * Store rods until we hit their ends so that we don't have to recompute - * basic information every time we see them in map(). - */ - private Map storedTranscriptInfo = new HashMap(); - - /** - * Prepare the output file and the list of available features. - */ - public void initialize() { - - //parse the GENE_NAME_COLUMNS arg and validate the column names - final List parsedGeneNameColumns = new LinkedList(); - for(String token : GENE_NAME_COLUMNS) { - parsedGeneNameColumns.addAll(Arrays.asList(token.split(","))); - } - GENE_NAME_COLUMNS = parsedGeneNameColumns.toArray(GENE_NAME_COLUMNS); - - ReferenceOrderedDataSource transcriptsDataSource = null; - for(ReferenceOrderedDataSource ds : getToolkit().getRodDataSources()) { - if(ds.getName().equals(ROD_NAME)) { - transcriptsDataSource = ds; - break; - } - } - - // sanity check - if ( transcriptsDataSource == null ) - throw new IllegalStateException("No rod bound to " + ROD_NAME + " found in rod sources"); - - final ArrayList header; - try { - header = AnnotatorInputTableCodec.readHeader(transcriptsDataSource.getFile()); - } catch(Exception e) { - throw new UserException.MalformedFile(transcriptsDataSource.getFile(), "Failed when attempting to read header from file", e); - } - - for ( String columnName : GENE_NAME_COLUMNS ) { - if ( !header.contains(columnName) ) - throw new UserException.CommandLineException("The column name '" + columnName + "' provided to -n doesn't match any of the column names in: " + transcriptsDataSource.getFile()); - } - - //init outputColumnNames list - outputColumnNames.addAll(Arrays.asList(GENOMIC_ANNOTATION_COLUMNS)); - outputColumnNames.addAll(Arrays.asList(GENE_NAME_COLUMNS)); - outputColumnNames.addAll(Arrays.asList(TRANSCRIPT_COLUMNS)); - - //init OUTPUT_HEADER_LINE - StringBuilder outputHeaderLine = new StringBuilder(); - for( final String column : outputColumnNames ) { - if(outputHeaderLine.length() != 0) { - outputHeaderLine.append( AnnotatorInputTableCodec.DELIMITER ); - } - outputHeaderLine.append(column); - } - - out.println(outputHeaderLine.toString()); - } - - public Integer reduceInit() { return 0; } - - /** - * For each site of interest, generate the appropriate fields. - * - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return 1 if the locus was successfully processed, 0 if otherwise - */ - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return 0; - - final Collection rods = tracker.getBoundRodTracks(); - //if there's nothing overlapping this locus, skip it. - if ( rods.size() == 0 ) - return 0; - - final List transcriptRODs = tracker.getReferenceMetaData(ROD_NAME); - - //there may be multiple transcriptRODs that overlap this locus - for ( Object transcriptRodObject : transcriptRODs ) { - //parse this ROD if it hasn't been already. - final AnnotatorInputTableFeature transcriptRod = (AnnotatorInputTableFeature) transcriptRodObject; - String featureKey = transcriptRod.toString(); - - TranscriptTableRecord parsedTranscriptRod = storedTranscriptInfo.get(featureKey); - if ( parsedTranscriptRod == null ) { - parsedTranscriptRod = new TranscriptTableRecord(transcriptRod, GENE_NAME_COLUMNS); - storedTranscriptInfo.put(featureKey, parsedTranscriptRod); - } - - //populate parsedTranscriptRod.txSequence - if(parsedTranscriptRod.positiveStrand) { - parsedTranscriptRod.txSequence.append((char)ref.getBase()); - } else { - final char complementBase = (char)BaseUtils.simpleComplement(ref.getBase()); - parsedTranscriptRod.txSequence.insert(0, complementBase); - } - - //populate parsedTranscriptRod.utr5Sequence and parsedTranscriptRod.cdsSequence - final int position = (int) ref.getLocus().getStart(); - if(parsedTranscriptRod.isProteinCodingTranscript() && parsedTranscriptRod.isWithinExon(position) ) - { - //we're within an exon of a proteinCodingTranscript - - if(parsedTranscriptRod.positiveStrand) - { - if(position < parsedTranscriptRod.cdsStart) - { - parsedTranscriptRod.utr5Sequence.append((char)ref.getBase()); //within utr5 - } - else if(position >= parsedTranscriptRod.cdsStart && position <= parsedTranscriptRod.cdsEnd) - { - parsedTranscriptRod.cdsSequence.append((char)ref.getBase()); //within CDS - } - } - else - { - final char complementBase = (char)BaseUtils.simpleComplement(ref.getBase()); - if(position > parsedTranscriptRod.cdsEnd) - { - //As we move left to right (aka. 3' to 5'), we do insert(0,..) to reverse the sequence so that it become 5' to 3' in parsedTranscriptRod.utr5Sequence. - parsedTranscriptRod.utr5Sequence.insert(0,complementBase); //within utr5. - } - else if(position >= parsedTranscriptRod.cdsStart && position <= parsedTranscriptRod.cdsEnd) - { - parsedTranscriptRod.cdsSequence.insert(0,complementBase); //within CDS - } - } - } - - if ( position == parsedTranscriptRod.txEnd ) { - //we've reached the end of the transcript - compute all data and write it out. - try { - generateOutputRecordsForROD(parsedTranscriptRod); - } - catch(IOException e) { - throw new RuntimeException(Thread.currentThread().getName() + " - Unexpected error occurred at position: [" + parsedTranscriptRod.txChrom + ":" + position + "] in transcript: " + parsedTranscriptRod, e); - } - - // remove it from the cache - storedTranscriptInfo.remove(featureKey); - - transcriptsProcessedCounter++; - if ( transcriptsProcessedCounter % 100 == 0 ) - logger.info(new Date() + ": " + transcriptsProcessedCounter + " transcripts processed"); - } - } - - return 1; - } - - private static boolean isChrM(final TranscriptTableRecord record) { - return record.txChrom.equals("chrM") || record.txChrom.equals("MT")|| record.txChrom.equals("CRS"); - } - - private void generateOutputRecordsForROD(TranscriptTableRecord parsedTranscriptRod) throws IOException { - //Transcripts that don't produce proteins are indicated in transcript by cdsStart == cdsEnd - //These will be handled by generating only one record, with haplotypeAlternate == "*". - final boolean isProteinCodingTranscript = parsedTranscriptRod.isProteinCodingTranscript(); - final boolean isMitochondrialTranscript = isChrM(parsedTranscriptRod); - - final boolean positiveStrand = parsedTranscriptRod.positiveStrand; //alias - - - if(isProteinCodingTranscript && parsedTranscriptRod.cdsSequence.length() % 3 != 0) { - if (!isMitochondrialTranscript) { - logger.error("ERROR: Transcript " + parsedTranscriptRod +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] has " + parsedTranscriptRod.cdsSequence.length() + " nucleotides in its CDS region, which is not divisible by 3. Skipping..."); - //discard transcripts where CDS length is not a multiple of 3 - skippedTranscriptCounter++; - return; - } else { - - //In mitochondrial genes, the polyA tail may complete the stop codon, allowing transcript . To check for this special case: - //1. check that the CDS covers the entire transcript - //2. add 1 or 2 A's to the 3' end of the transcript (as needed to make it divisible by 3) - //3. check whether the last 3 letters now form a stop codon using the mitochondrial AA table - //4. If not, skip this gene, else incorporate the A's and process it like any other gene. - - if( parsedTranscriptRod.txSequence.length() == parsedTranscriptRod.cdsSequence.length()) { - do { //append A's until sequence length is divisible by 3 - parsedTranscriptRod.txSequence.append('*'); - parsedTranscriptRod.cdsSequence.append('a'); - if(positiveStrand) { - parsedTranscriptRod.txEnd++; - parsedTranscriptRod.cdsEnd++; - parsedTranscriptRod.exonEnds[0]++; - } else { - parsedTranscriptRod.txStart--; - parsedTranscriptRod.cdsStart--; - parsedTranscriptRod.exonStarts[0]--; - } - } while( parsedTranscriptRod.cdsSequence.length() % 3 != 0); - - } else { - logger.error("ERROR: Mitochnodrial transcript " + parsedTranscriptRod +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] has " + parsedTranscriptRod.cdsSequence.length() + " nucleotides in its CDS region, which is not divisible by 3. The CDS does not cover the entire transcript, so its not possible to use A's from the polyA tail. Skipping..."); - skippedTranscriptCounter++; - return; - } - } - } - - - //warn if the first codon isn't Methionine and/or the last codon isn't a stop codon. - if(isProteinCodingTranscript) { - final int cdsSequenceLength = parsedTranscriptRod.cdsSequence.length(); - - final String firstCodon = parsedTranscriptRod.cdsSequence.substring(0, 3); - final AminoAcid firstAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( firstCodon, true ) : AminoAcidTable.getEukaryoticAA( firstCodon ) ; - - final String lastCodon = parsedTranscriptRod.cdsSequence.substring(cdsSequenceLength - 3, cdsSequenceLength); - final AminoAcid lastAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA( lastCodon, false ) : AminoAcidTable.getEukaryoticAA( lastCodon ) ; - - if( firstAA != AminoAcidTable.METHIONINE && !lastAA.isStop()) { - transcriptsThatDontStartWithMethionineOrEndWithStopCodonCounter++; - logger.warn("WARNING: The CDS of transcript " + parsedTranscriptRod.geneNames[0] +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] does not start with Methionine or end in a stop codon. The first codon is: " + firstCodon + " (" + firstAA + "). The last codon is: " + lastCodon + " (" + lastAA + "). NOTE: This is just a warning - the transcript will be included in the output."); - } else if( firstAA != AminoAcidTable.METHIONINE) { - transcriptsThatDontStartWithMethionineCounter++; - logger.warn("WARNING: The CDS of transcript " + parsedTranscriptRod.geneNames[0] +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] does not start with Methionine. The first codon is: " + firstCodon + " (" + firstAA + "). NOTE: This is just a warning - the transcript will be included in the output."); - } else if(!lastAA.isStop()) { - transcriptsThatDontEndWithStopCodonCounter++; - logger.warn("WARNING: The CDS of transcript " + parsedTranscriptRod.geneNames[0] +" at position ["+ parsedTranscriptRod.txChrom + ":" +parsedTranscriptRod.txStart + "-" + parsedTranscriptRod.txEnd + "] does not end in a stop codon. The last codon is: " + lastCodon + " (" + lastAA + "). NOTE: This is just a warning - the transcript will be included in the output."); - } - } - - final int txStart_5prime = positiveStrand ? parsedTranscriptRod.txStart : parsedTranscriptRod.txEnd; //1-based, inclusive - final int txEnd_3prime = positiveStrand ? parsedTranscriptRod.txEnd : parsedTranscriptRod.txStart; //1-based, inclusive - final int increment_5to3 = positiveStrand ? 1 : -1; //whether to increment or decrement - final int strandSign = increment_5to3; //alias - - final int cdsStart_5prime = positiveStrand ? parsedTranscriptRod.cdsStart : parsedTranscriptRod.cdsEnd; //1-based, inclusive - final int cdsEnd_3prime = positiveStrand ? parsedTranscriptRod.cdsEnd : parsedTranscriptRod.cdsStart ; //1-based, inclusive - - int frame = 0; //the frame of the current position - int txOffset_from5 = 1; //goes from txStart 5' to txEnd 3' for both + and - strand - int utr5Count_from5 = 0; - int mrnaCoord_from5 = 1; //goes from txStart 5' to txEnd 3' for both + and - strand, but only counts bases within exons. - char[] utr5NucBuffer_5to3 = null; //used to find uORFs - size = 5 because to hold the 3 codons that overlap any given position: [-2,-1,0], [-1,0,1], and [0,1,2] - - int codonCount_from5 = 1; //goes from cdsStart 5' to cdsEnd 3' for both + and - strand - counts the number of codons - 1-based - int codingCoord_from5 = isProteinCodingTranscript ? parsedTranscriptRod.computeInitialCodingCoord() : -1; //goes from cdsStart 5' to cdsEnd 3' for both + and - strand - boolean codingCoordResetForCDS = false; - boolean codingCoordResetForUtr3 = false; - final char[] currentCodon_5to3 = isProteinCodingTranscript ? new char[3] : null; //holds the current RNA codon - 5' to 3' - - PositionType positionType = null; - boolean isWithinIntronAndFarFromSpliceJunction = false; - int intronStart_5prime = -1; - int intronEnd_5prime; - - final Map outputLineFields = new HashMap(); - - for(int txCoord_5to3 = txStart_5prime; txCoord_5to3 != txEnd_3prime + increment_5to3; txCoord_5to3 += increment_5to3) - { - ++totalPositionsCounter; - - //compute certain attributes of the current position - final boolean isWithinExon = parsedTranscriptRod.isWithinExon(txCoord_5to3); //TODO if necessary, this can be sped up by keeping track of current exon/intron - - final int distanceToNearestSpliceSite = parsedTranscriptRod.computeDistanceToNearestSpliceSite(txCoord_5to3); - final boolean isWithin10bpOfSpliceJunction = Math.abs(distanceToNearestSpliceSite) <= 10; - - - //increment coding coord is necessary - if(isWithinExon) { - codingCoord_from5++; - } - - //figure out the current positionType - final PositionType prevPositionType = positionType; //save the position before it is updated - if(isProteinCodingTranscript) - { - if(isWithinExon) - { - if( strandSign*(txCoord_5to3 - cdsStart_5prime) < 0 ) { //utr5 (multiplying by strandSign is like doing absolute value.) - positionType = PositionType.utr5; - } else if( strandSign*(txCoord_5to3 - cdsEnd_3prime) > 0 ) { //utr3 (multiplying by strandSign is like doing absolute value.) - positionType = PositionType.utr3; - } else { - positionType = PositionType.CDS; - } - } else { - positionType = PositionType.intron; - } - } else { - if(isWithinExon) { - positionType = PositionType.non_coding_exon; - } else { - positionType = PositionType.non_coding_intron; - } - } - - //handle transitions - if(positionType == PositionType.CDS && prevPositionType != PositionType.CDS && !codingCoordResetForCDS) { - //transitioning from utr5 to CDS, reset the coding coord from -1 to 1. - codingCoord_from5 = 1; - codingCoordResetForCDS = true; - } else if(positionType == PositionType.utr3 && prevPositionType != PositionType.utr3 && !codingCoordResetForUtr3) { - //transitioning from CDS to utr3, reset the coding coord to 1. - codingCoord_from5 = 1; - codingCoordResetForUtr3 = true; - } - - - try - { - //handle introns - boolean wasWithinIntronAndFarFromSpliceJunction = isWithinIntronAndFarFromSpliceJunction; - isWithinIntronAndFarFromSpliceJunction = !isWithinExon && !isWithin10bpOfSpliceJunction; - - if(!wasWithinIntronAndFarFromSpliceJunction && isWithinIntronAndFarFromSpliceJunction) { - //save intron start - intronStart_5prime = txCoord_5to3; - - } else if(wasWithinIntronAndFarFromSpliceJunction && !isWithinIntronAndFarFromSpliceJunction) { - //output intron record - intronEnd_5prime = txCoord_5to3 - increment_5to3; - - final int intronStart = (intronStart_5prime < intronEnd_5prime ? intronStart_5prime : intronEnd_5prime) ; - final int intronEnd = (intronEnd_5prime > intronStart_5prime ? intronEnd_5prime : intronStart_5prime); - outputLineFields.clear(); - outputLineFields.put(GenomicAnnotation.CHR_COLUMN, parsedTranscriptRod.txChrom); - outputLineFields.put(GenomicAnnotation.START_COLUMN, String.valueOf(intronStart)); - outputLineFields.put(GenomicAnnotation.END_COLUMN, String.valueOf(intronEnd)); - outputLineFields.put(GenomicAnnotation.HAPLOTYPE_REFERENCE_COLUMN, Character.toString( '*' ) ); - outputLineFields.put(GenomicAnnotation.HAPLOTYPE_REFERENCE_COLUMN, Character.toString( '*' ) ); - for(int i = 0; i < GENE_NAME_COLUMNS.length; i++) { - outputLineFields.put(GENE_NAME_COLUMNS[i], parsedTranscriptRod.geneNames[i] ); - } - - outputLineFields.put(OUTPUT_POSITION_TYPE, positionType.toString() ); - outputLineFields.put(OUTPUT_TRANSCRIPT_STRAND, positiveStrand ? "+" : "-" ); - - if ( isProteinCodingTranscript ) - outputLineFields.put(OUTPUT_IN_CODING_REGION, Boolean.toString(positionType == PositionType.CDS) ); - - addThisLineToResult(outputLineFields); - } - - //when in utr5, compute the utr5NucBuffer_5to3 which is later used to compute the OUTPUT_UORF_CHANGE field - if(positionType == PositionType.utr5) - { - if(utr5Count_from5 < parsedTranscriptRod.utr5Sequence.length()) - { - if(utr5NucBuffer_5to3 == null) { - //initialize - utr5NucBuffer_5to3 = new char[5]; - utr5NucBuffer_5to3[3] = parsedTranscriptRod.utr5Sequence.charAt( utr5Count_from5 ); - - if(utr5Count_from5 + 1 < parsedTranscriptRod.utr5Sequence.length() ) { - utr5NucBuffer_5to3[4] = parsedTranscriptRod.utr5Sequence.charAt( utr5Count_from5 + 1 ); - } - } - - //as we move 5' to 3', shift nucleotides down to the 5' end, making room for the new 3' nucleotide: - utr5NucBuffer_5to3[0] = utr5NucBuffer_5to3[1]; - utr5NucBuffer_5to3[1] = utr5NucBuffer_5to3[2]; - utr5NucBuffer_5to3[2] = utr5NucBuffer_5to3[3]; - utr5NucBuffer_5to3[3] = utr5NucBuffer_5to3[4]; - - char nextRefBase = 0; - if( utr5Count_from5 + 2 < parsedTranscriptRod.utr5Sequence.length() ) - { - nextRefBase = parsedTranscriptRod.utr5Sequence.charAt( utr5Count_from5 + 2 ); - } - utr5NucBuffer_5to3[4] = nextRefBase; - - //check for bad bases - if( (utr5NucBuffer_5to3[0] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[0])) || - (utr5NucBuffer_5to3[1] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[1])) || - (utr5NucBuffer_5to3[2] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[2])) || - (utr5NucBuffer_5to3[3] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[3])) || - (utr5NucBuffer_5to3[4] != 0 && !BaseUtils.isRegularBase(utr5NucBuffer_5to3[4]))) - { - logger.debug("Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() +". utr5NucBuffer_5to3 contains irregular base:" + utr5NucBuffer_5to3[0] + utr5NucBuffer_5to3[1] + utr5NucBuffer_5to3[2] + utr5NucBuffer_5to3[3] + utr5NucBuffer_5to3[4]);// +". Transcript is: " + parsedTranscriptRod); - ++skippedPositionsCounter; - continue; - } - - } else { // if(utr5Count_from5 >= parsedTranscriptRod.utr5Sequence.length()) - //defensive programming - throw new RuntimeException("Exception: Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() +". utr5Count_from5 is now " + utr5Count_from5 + ", while parsedTranscriptRod.utr5Sequence.length() == " + parsedTranscriptRod.utr5Sequence.length() + ". This means parsedTranscriptRod.utr5Sequence isn't as long as it should be. This is a bug in handling this record: " + parsedTranscriptRod); - - } - } - - - //when in CDS, compute current codon - if(positionType == PositionType.CDS) - { - if(frame == 0) - { - currentCodon_5to3[0] = parsedTranscriptRod.cdsSequence.charAt( codingCoord_from5 - 1 ); //subtract 1 to go to zero-based coords - currentCodon_5to3[1] = parsedTranscriptRod.cdsSequence.charAt( codingCoord_from5 ); - currentCodon_5to3[2] = parsedTranscriptRod.cdsSequence.charAt( codingCoord_from5 + 1); - } - - //check for bad bases - if(!BaseUtils.isRegularBase(currentCodon_5to3[0]) || !BaseUtils.isRegularBase(currentCodon_5to3[1]) || !BaseUtils.isRegularBase(currentCodon_5to3[2])) { - logger.debug("Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() +". CDS codon contains irregular base:" + currentCodon_5to3[0] + currentCodon_5to3[1] + currentCodon_5to3[2]);// +". Transcript is: " + parsedTranscriptRod); - ++skippedPositionsCounter; - continue; - } - - } - - char haplotypeReference = parsedTranscriptRod.txSequence.charAt( txOffset_from5 - 1 ); - if(!positiveStrand) { - haplotypeReference = BaseUtils.simpleComplement(haplotypeReference); //txSequence contents depend on whether its +/- strand - } - char haplotypeReferenceStrandSpecific= positiveStrand ? haplotypeReference : BaseUtils.simpleComplement(haplotypeReference); - - - - if(!BaseUtils.isRegularBase(haplotypeReference) && haplotypeReference != '*') { //* is special case for mitochondrial genes where polyA tail completes the last codon - //check for bad bases - logger.debug("Skipping current position [" + parsedTranscriptRod.txChrom + ":" +txCoord_5to3 + "] in transcript " + parsedTranscriptRod.geneNames.toString() + ". The reference contains an irregular base:" + haplotypeReference); // +". Transcript is: " + parsedTranscriptRod); - ++skippedPositionsCounter; - continue; - } - - - char haplotypeAlternateStrandSpecific; - for(char haplotypeAlternate : ALLELES ) - { - haplotypeAlternateStrandSpecific= positiveStrand ? haplotypeAlternate : BaseUtils.simpleComplement(haplotypeAlternate); - outputLineFields.clear(); - - if(!isProteinCodingTranscript || isWithinIntronAndFarFromSpliceJunction) { - haplotypeReference = '*'; - haplotypeAlternate = '*'; - } - - //compute simple OUTPUT fields. - outputLineFields.put(GenomicAnnotation.CHR_COLUMN, parsedTranscriptRod.txChrom); - outputLineFields.put(GenomicAnnotation.START_COLUMN, String.valueOf(txCoord_5to3)); - outputLineFields.put(GenomicAnnotation.END_COLUMN, String.valueOf(txCoord_5to3)); - outputLineFields.put(GenomicAnnotation.HAPLOTYPE_REFERENCE_COLUMN, Character.toString( haplotypeReference ) ); - outputLineFields.put(GenomicAnnotation.HAPLOTYPE_ALTERNATE_COLUMN, Character.toString( haplotypeAlternate ) ); - for(int i = 0; i < GENE_NAME_COLUMNS.length; i++) { - outputLineFields.put(GENE_NAME_COLUMNS[i], parsedTranscriptRod.geneNames[i] ); - } - - outputLineFields.put(OUTPUT_POSITION_TYPE, positionType.toString() ); - outputLineFields.put(OUTPUT_TRANSCRIPT_STRAND, positiveStrand ? "+" : "-" ); - if(isWithinExon) { - outputLineFields.put(OUTPUT_MRNA_COORD, Integer.toString(mrnaCoord_from5) ); - } - outputLineFields.put(OUTPUT_SPLICE_DISTANCE, Integer.toString(distanceToNearestSpliceSite) ); - - //compute OUTPUT_SPLICE_INFO - final String spliceInfoString; - if(isWithin10bpOfSpliceJunction) { - if(distanceToNearestSpliceSite < 0) { - //is on the 5' side of the splice junction - if(isWithinExon) { - spliceInfoString = "splice-donor_" + distanceToNearestSpliceSite; - } else { - spliceInfoString = "splice-acceptor_" + distanceToNearestSpliceSite; - } - } else { - if(isWithinExon) { - spliceInfoString = "splice-acceptor_" + distanceToNearestSpliceSite; - } else { - spliceInfoString = "splice-donor_" + distanceToNearestSpliceSite; - } - } - outputLineFields.put(OUTPUT_SPLICE_INFO, spliceInfoString); - } - - //compute OUTPUT_IN_CODING_REGION - if(isProteinCodingTranscript) - { - outputLineFields.put(OUTPUT_IN_CODING_REGION, Boolean.toString(positionType == PositionType.CDS) ); - } - - - //compute OUTPUT_UORF_CHANGE - if(positionType == PositionType.utr5) - { - String refCodon1 = (Character.toString(utr5NucBuffer_5to3[0]) + Character.toString(utr5NucBuffer_5to3[1]) + utr5NucBuffer_5to3[2]).toUpperCase(); - String refCodon2 = (Character.toString(utr5NucBuffer_5to3[1]) + Character.toString(utr5NucBuffer_5to3[2]) + utr5NucBuffer_5to3[3]).toUpperCase(); - String refCodon3 = (Character.toString(utr5NucBuffer_5to3[2]) + Character.toString(utr5NucBuffer_5to3[3]) + utr5NucBuffer_5to3[4]).toUpperCase(); - - String varCodon1 = (Character.toString(utr5NucBuffer_5to3[0]) + Character.toString(utr5NucBuffer_5to3[1]) + haplotypeAlternateStrandSpecific).toUpperCase(); - String varCodon2 = (Character.toString(utr5NucBuffer_5to3[1]) + Character.toString(haplotypeAlternateStrandSpecific) + utr5NucBuffer_5to3[3]).toUpperCase(); - String varCodon3 = (Character.toString(haplotypeAlternateStrandSpecific) + Character.toString(utr5NucBuffer_5to3[3]) + utr5NucBuffer_5to3[4]).toUpperCase(); - - //check for +1 (eg. addition of new ATG uORF) and -1 (eg. disruption of existing ATG uORF) - String uORFChangeStr = null; - if( (refCodon1.equals("ATG") && !varCodon1.equals("ATG")) || - (refCodon2.equals("ATG") && !varCodon2.equals("ATG")) || - (refCodon3.equals("ATG") && !varCodon3.equals("ATG"))) - { - uORFChangeStr = "-1"; - } - else if((varCodon1.equals("ATG") && !refCodon1.equals("ATG")) || - (varCodon2.equals("ATG") && !refCodon2.equals("ATG")) || - (varCodon3.equals("ATG") && !refCodon3.equals("ATG"))) - { - uORFChangeStr = "+1"; - } - - outputLineFields.put(OUTPUT_UORF_CHANGE, uORFChangeStr ); - } - //compute CDS-specific fields - else if (positionType == PositionType.CDS) { - final String referenceCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2]; - final char temp = currentCodon_5to3[frame]; - currentCodon_5to3[frame] = haplotypeAlternateStrandSpecific; - final String variantCodon = Character.toString(currentCodon_5to3[0]) + Character.toString(currentCodon_5to3[1]) + currentCodon_5to3[2]; - currentCodon_5to3[frame] = temp; - - final AminoAcid refAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA(referenceCodon, codonCount_from5 == 1) : AminoAcidTable.getEukaryoticAA( referenceCodon ) ; - final AminoAcid variantAA = isMitochondrialTranscript ? AminoAcidTable.getMitochondrialAA(variantCodon, codonCount_from5 == 1) : AminoAcidTable.getEukaryoticAA( variantCodon ) ; - - if (refAA.isUnknown() || variantAA.isUnknown()) { - logger.warn("Illegal amino acid detected: refCodon=" + referenceCodon + " altCodon=" + variantCodon); - } - outputLineFields.put(OUTPUT_TRANSCRIPT_STRAND, positiveStrand ? "+" : "-" ); - outputLineFields.put(OUTPUT_FRAME, Integer.toString(frame)); - outputLineFields.put(OUTPUT_CODON_NUMBER, Integer.toString(codonCount_from5)); - outputLineFields.put(OUTPUT_REFERENCE_CODON, referenceCodon); - outputLineFields.put(OUTPUT_REFERENCE_AA, refAA.getCode()); - - outputLineFields.put(OUTPUT_VARIANT_CODON, variantCodon); - outputLineFields.put(OUTPUT_VARIANT_AA, variantAA.getCode()); - - outputLineFields.put(OUTPUT_PROTEIN_COORD_STR, "p." + refAA.getLetter() + Integer.toString(codonCount_from5) + variantAA.getLetter()); //for example: "p.K7$ - - boolean changesAA = !refAA.equals(variantAA); - outputLineFields.put(OUTPUT_CHANGES_AMINO_ACID, Boolean.toString(changesAA)); - final String functionalClass; - if (changesAA) { - if (variantAA.isStop()) { - functionalClass = "nonsense"; - } else if (refAA.isStop()) { - functionalClass = "readthrough"; - } else { - functionalClass = "missense"; - } - } else { - functionalClass = "silent"; - } - outputLineFields.put(OUTPUT_FUNCTIONAL_CLASS, functionalClass); - } - - //compute OUTPUT_CODING_COORD_STR - if(isProteinCodingTranscript) - { - //compute coding coord - final StringBuilder codingCoordStr = new StringBuilder(); - codingCoordStr.append( "c." ); - if(positionType == PositionType.utr3) { - codingCoordStr.append( '*' ); - } - - if(isWithinExon) { - codingCoordStr.append( Integer.toString(codingCoord_from5) ); - - codingCoordStr.append ( haplotypeReferenceStrandSpecific + ">" + haplotypeAlternateStrandSpecific); - } else { - //intronic coordinates - if(distanceToNearestSpliceSite < 0) { - codingCoordStr.append( Integer.toString(codingCoord_from5 + 1) ); - } else { - codingCoordStr.append( Integer.toString(codingCoord_from5 ) ); - codingCoordStr.append( "+" ); - } - - codingCoordStr.append( Integer.toString( distanceToNearestSpliceSite ) ); - } - - outputLineFields.put(OUTPUT_CODING_COORD_STR, codingCoordStr.toString()); - } - - - //generate the output line and add it to 'result' map. - if ( !isWithinIntronAndFarFromSpliceJunction ) - addThisLineToResult(outputLineFields); - - if( haplotypeAlternate == '*' ) { - //need only one record for this position with "*" for haplotypeAlternate, instead of the 4 individual alleles - break; - } - - } //ALLELE for-loop - } - finally - { - //increment coords - txOffset_from5++; - if(isWithinExon) { - mrnaCoord_from5++; - } - - if(positionType == PositionType.utr5) { - utr5Count_from5++; - } else if(positionType == PositionType.CDS) { - frame = (frame + 1) % 3; - if(frame == 0) { - codonCount_from5++; - } - } - } - } // l for-loop - - } //method close - - - /** - * Utility method. Creates a line containing the outputLineFields, and adds it to result, hashed by the sortKey. - * - * @param outputLineFields Column-name to value pairs. - */ - private void addThisLineToResult(final Map outputLineFields) { - final StringBuilder outputLine = new StringBuilder(); - for( final String column : outputColumnNames ) { - if(outputLine.length() != 0) { - outputLine.append( AnnotatorInputTableCodec.DELIMITER ); - } - final String value = outputLineFields.get(column); - if(value != null) { - outputLine.append(value); - } - } - - out.println(outputLine.toString()); - } - - public Integer reduce(Integer value, Integer sum) { return sum + value; } - - public void onTraversalDone(Integer result) { - logger.info("Skipped " + skippedPositionsCounter + " in-transcript genomic positions out of "+ totalPositionsCounter + " total (" + ( totalPositionsCounter == 0 ? 0 : (100*skippedPositionsCounter)/totalPositionsCounter) + "%)"); - logger.info("Skipped " + skippedTranscriptCounter + " transcripts out of "+ transcriptsProcessedCounter + " total (" + ( transcriptsProcessedCounter == 0 ? 0 : (100*skippedTranscriptCounter)/transcriptsProcessedCounter) + "%)"); - logger.info("Protein-coding transcripts (eg. with a CDS region) that don't start with Methionine or end in a stop codon: " + transcriptsThatDontStartWithMethionineOrEndWithStopCodonCounter + " transcripts out of "+ transcriptsProcessedCounter + " total (" + ( transcriptsProcessedCounter == 0 ? 0 : (100*transcriptsThatDontStartWithMethionineOrEndWithStopCodonCounter)/transcriptsProcessedCounter) + "%)"); - logger.info("Protein-coding transcripts (eg. with a CDS region) that don't start with Methionine: " + transcriptsThatDontStartWithMethionineCounter + " transcripts out of "+ transcriptsProcessedCounter + " total (" + ( transcriptsProcessedCounter == 0 ? 0 : (100*transcriptsThatDontStartWithMethionineCounter)/transcriptsProcessedCounter) + "%)"); - logger.info("Protein-coding transcripts (eg. with a CDS region) that don't end in a stop codon: " + transcriptsThatDontEndWithStopCodonCounter + " transcripts out of "+ transcriptsProcessedCounter + " total (" + ( transcriptsProcessedCounter == 0 ? 0 : (100*transcriptsThatDontEndWithStopCodonCounter)/transcriptsProcessedCounter) + "%)"); - } - - - /** - * Container for all data fields from a single row of the transcript table. - */ - protected static class TranscriptTableRecord - { - public static final String STRAND_COLUMN = "strand"; //eg. + - public static final String CDS_START_COLUMN = "cdsStart"; - public static final String CDS_END_COLUMN = "cdsEnd"; - public static final String EXON_COUNT_COLUMN = "exonCount"; - public static final String EXON_STARTS_COLUMN = "exonStarts"; - public static final String EXON_ENDS_COLUMN = "exonEnds"; - //public static final String EXON_FRAMES_COLUMN = "exonFrames"; - - - /** - * This StringBuffer accumulates the entire transcript sequence. - * This buffer is used instead of using the GATK window mechanism - * because arbitrary-length look-aheads and look-behinds are needed to deal - * with codons that span splice-junctions in + & - strand transcripts. - * The window mechanism requires hard-coding the window size, which would - * translate into a limit on maximum supported intron size. To avoid this, the - * sequence is accumulated as the transcript is scanned left-to-right. - * Then, all calculations are performed at the end. - */ - public StringBuilder txSequence; //the sequence of the entire transcript in order from 5' to 3' - public StringBuilder utr5Sequence; //the protein coding sequence (with introns removed) in order from 5' to 3' - public StringBuilder cdsSequence; //the protein coding sequence (with introns removed) in order from 5' to 3' - - public boolean positiveStrand; //whether the transcript is on the + or the - strand. - public String[] geneNames; //eg. NM_021649 - - public String txChrom; //The chromosome name - public int txStart; - public int txEnd; - - public int cdsStart; - public int cdsEnd; - - public int[] exonStarts; - public int[] exonEnds; - //public int[] exonFrames; - not used for anything, frame is computed another way - - /** - * Constructor. - * - * @param transcriptRod A rod representing a single record in the transcript table. - * @param geneNameColumns name columns. - */ - public TranscriptTableRecord(final AnnotatorInputTableFeature transcriptRod, String[] geneNameColumns) { - - //String binStr = transcriptRod.get("bin"); - //String idStr = transcriptRod.get("id"); //int(10) unsigned range Unique identifier ( usually 0 for some reason - even for translated ) - String strandStr = transcriptRod.getColumnValue(STRAND_COLUMN); - if(strandStr == null) { - throw new IllegalArgumentException("Transcript table record doesn't contain a 'strand' column. Make sure the transcripts input file has a header and the usual columns: \"" + strandStr + "\""); - } else if(strandStr.equals("+")) { - positiveStrand = true; - } else if(strandStr.equals("-")) { - positiveStrand = false; - } else { - throw new IllegalArgumentException("Transcript table record contains unexpected value for 'strand' column: \"" + strandStr + "\""); - } - - geneNames = new String[geneNameColumns.length]; - for(int i = 0; i < geneNameColumns.length; i++) { - geneNames[i] = transcriptRod.getColumnValue(geneNameColumns[i]); - } - - //String txStartStr = transcriptRod.get(TXSTART_COLUMN); //These fields were used to generate column 1 of the ROD file (eg. they got turned into chr:txStart-txStop) - //String txEndStr = transcriptRod.get(TXEND_COLUMN); - txChrom = transcriptRod.getChr(); - txStart = transcriptRod.getStart(); - txEnd = transcriptRod.getEnd(); - - String cdsStartStr = transcriptRod.getColumnValue(CDS_START_COLUMN); - String cdsEndStr = transcriptRod.getColumnValue(CDS_END_COLUMN); - - cdsStart = Integer.parseInt(cdsStartStr); - cdsEnd = Integer.parseInt(cdsEndStr); - - txSequence = new StringBuilder( (txEnd - txStart + 1) ); //the sequence of the entire transcript in order from 5' to 3' - if(isProteinCodingTranscript()) { - utr5Sequence = new StringBuilder( positiveStrand ? (cdsStart - txStart + 1) : (txEnd - cdsEnd + 1) ); //TODO reduce init size by size of introns - cdsSequence = new StringBuilder( (cdsEnd - cdsStart + 1) ); //TODO reduce init size by size of introns - } - - String exonCountStr = transcriptRod.getColumnValue(EXON_COUNT_COLUMN); - String exonStartsStr = transcriptRod.getColumnValue(EXON_STARTS_COLUMN); - String exonEndsStr = transcriptRod.getColumnValue(EXON_ENDS_COLUMN); - //String exonFramesStr = transcriptRod.get(EXON_FRAMES_COLUMN); - - String[] exonStartStrs = exonStartsStr.split(","); - String[] exonEndStrs = exonEndsStr.split(","); - //String[] exonFrameStrs = exonFramesStr.split(","); - - int exonCount = Integer.parseInt(exonCountStr); - if(exonCount != exonStartStrs.length || exonCount != exonEndStrs.length /* || exonCount != exonFrameStrs.length */) - { - throw new RuntimeException("exonCount != exonStarts.length || exonCount != exonEnds.length || exonCount != exonFrames.length. Exon starts: " + exonStartsStr + ", Exon ends: " + exonEndsStr + /*", Exon frames: " + exonFramesStr + */", Exon count: " + exonCountStr +". transcriptRod = " + transcriptRod); - } - - exonStarts = new int[exonCount]; - exonEnds = new int[exonCount]; - //exonFrames = new int[exonCount]; - for(int i = 0; i < exonCount; i++) { - exonStarts[i] = Integer.parseInt(exonStartStrs[i]); - exonEnds[i] = Integer.parseInt(exonEndStrs[i]); - //exonFrames[i] = Integer.parseInt(exonFrameStrs[i]); - } - } - - - /** - * Takes a genomic position on the same contig as the transcript, and - * returns true if this position falls within an exon. - */ - public boolean isWithinExon(final int genomPosition) { - for(int i = 0; i < exonStarts.length; i++) { - final int curStart = exonStarts[i]; - if(genomPosition < curStart) { - return false; - } - final int curStop = exonEnds[i]; - if(genomPosition <= curStop) { - return true; - } - } - - return false; - } - - /** - * Computes the distance to the nearest splice-site. - * The returned value is negative its on the 5' side (eg. upstream) of the juntion, and - * positive if its on the 3' side. - */ - public int computeDistanceToNearestSpliceSite(final int genomPosition) { - int prevDistance = Integer.MAX_VALUE; - for(int i = 0; i < exonStarts.length; i++) { - final int curStart = exonStarts[i]; - int curDistance = curStart - genomPosition; - if(genomPosition < curStart) { - //position is within the current intron - if(prevDistance < curDistance) { - return positiveStrand ? prevDistance : -prevDistance; - } else { - return positiveStrand ? -curDistance : curDistance; - } - } else { - prevDistance = genomPosition - curStart + 1; - } - - final int curStop = exonEnds[i]; - curDistance = curStop - genomPosition + 1; - if(genomPosition <= curStop) { - //position is within an exon - if(prevDistance < curDistance) { - return positiveStrand ? prevDistance : -prevDistance; - } else { - return positiveStrand ? -curDistance : curDistance; - } - } else { - prevDistance = genomPosition - curStop; - } - } - - throw new IllegalArgumentException("Genomic position: [" + genomPosition +"] not found within transcript: " + this +". " + - "This method should not have been called for this position. NOTE: this method assumes that all transcripts start " + - "with an exon and end with an exon (rather than an intron). Is this wrong?"); - //return prevDistance; //out of exons. return genomPosition-curStop - } - - - /** - * Returns true if this is a coding transcript (eg. is translated - * into proteins). Returns false for non-coding RNA. - */ - public boolean isProteinCodingTranscript() { - return cdsStart < cdsEnd; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("chrpos=" + txChrom + ':' + txStart + '-' + txEnd + ", strand=" + (positiveStrand ? '+':'-') + ", gene-names=" + Arrays.toString(geneNames) + ", cds="+ cdsStart + '-' + cdsEnd + ", exonStarts=" + Arrays.toString(exonStarts) + ", exonEnds=" + Arrays.toString(exonEnds)); - return sb.toString(); - } - - - - /** - * Computes the coding coord of the 1st nucleotide in the transcript. - * If the 1st nucleotide is in the 5'utr, the returned value will be negative. - * Otherwise (if the 1st nucleotide is CDS), the returned value is 1. - */ - public int computeInitialCodingCoord() { - if(!isProteinCodingTranscript()) { - throw new ReviewedStingException("This method should only be called for protein-coding transcripts"); - } - - if(positiveStrand) - { - if( cdsStart == exonStarts[0] ) { - //the 1st nucleotide of the transcript is CDS. - return 1; - } - - int result = 0; - for(int i = 0; i < exonStarts.length; i++) - { - final int exonStart = exonStarts[i]; - final int exonEnd = exonEnds[i]; - if(cdsStart <= exonEnd) { //eg. exonEnd is now on the 3' side of cdsStart - //this means cdsStart is within the current exon - result += (cdsStart - exonStart) + 1; - break; - } else { - //cdsStart is downstream of the current exon - result += (exonEnd - exonStart) + 1; - } - } - return -result; //negate because 5' UTR coding coord is negative - } - else //(negative strand) - { - final int cdsStart_5prime = cdsEnd; - if(cdsStart_5prime == exonEnds[exonEnds.length - 1]) { - //the 1st nucleotide of the transcript is CDS. - return 1; - } - - int result = 0; - for(int i = exonEnds.length - 1; i >= 0; i--) - { - final int exonStart = exonEnds[i]; //when its the negative strand, the 5' coord of the 1st exon is exonEnds[i] - final int exonEnd = exonStarts[i]; - if( exonEnd <= cdsStart_5prime ) { //eg. exonEnd is now on the 3' side of cdsStart - //this means cdsStart is within the current exon - result += -(cdsStart_5prime - exonStart) + 1; - break; - } else { - //cdsStart is downstream of the current exon - result += -(exonEnd - exonStart) + 1; - } - } - return -result; //negate because 5' UTR coding coord is negative - } - } - } - - -} - diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index a10897172..3fcf87bd5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -255,7 +255,7 @@ public class UnifiedGenotyperEngine { pileup = rawContext.getBasePileup(); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); - vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc).iterator().next(); + vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc); } return new VariantCallContext(vc, ref.getBase(), false); @@ -436,8 +436,7 @@ public class UnifiedGenotyperEngine { pileup = rawContext.getBasePileup(); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); - Collection variantContexts = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall); - vcCall = variantContexts.iterator().next(); // we know the collection will always have exactly 1 element. + vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall); } VariantCallContext call = new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java deleted file mode 100755 index 9aa370d3f..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java +++ /dev/null @@ -1,890 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.phasing; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.AminoAcid; -import org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator.AminoAcidTable; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; - -import java.util.*; - -import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFromRods; - - -/** - * Walks along all variant ROD loci, and dynamically annotates alleles at MNP records. - */ -@Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}, referenceMetaData = {@RMD(name = AnnotateMNPsWalker.REFSEQ_ROD_NAME, type = AnnotatorInputTableFeature.class), @RMD(name = AnnotateMNPsWalker.VARIANT_ROD_NAME, type = ReferenceOrderedDatum.class)}) - -public class AnnotateMNPsWalker extends RodWalker { - - @Output(doc = "File to which variants should be written", required = true) - protected VCFWriter writer = null; - private ManualSortingVCFWriter sortingWriter = null; - - @Argument(fullName = "emitOnlyMNPs", shortName = "emitOnlyMNPs", doc = "Only output MNP records; [default:false]", required = false) - protected boolean emitOnlyMNPs = false; - - private LinkedList rodNames = null; - private GenomeLocParser locParser = null; - private TreeMap> MNPstartToStops = null; // Must be TreeMap sorted by START sites! - - public final static String REFSEQ_ROD_NAME = "refseq"; - public final static String VARIANT_ROD_NAME = "variant"; - - private LocusToFeatures locusToRefSeqFeatures = null; - - - protected final static String MNP_ANNOTATION_KEY_PREFIX = "MNP.refseq."; - - protected final static String REFSEQ_NAME = "name"; - protected final static String REFSEQ_NAME2 = "name2"; - - protected final static String REFSEQ_POSITION_TYPE = "positionType"; - protected final static String REFSEQ_CDS = "CDS"; - - protected final static String REFSEQ_STRAND = "transcriptStrand"; - protected final static String REFSEQ_POS_STRAND = "+"; - protected final static String REFSEQ_NEG_STRAND = "-"; - - protected final static String REFSEQ_CODON_COORD = "codonCoord"; - protected final static String REFSEQ_CODING_FRAME = "frame"; - - protected final static String REFSEQ_REF_CODON = "referenceCodon"; - protected final static String REFSEQ_REF_AA = "referenceAA"; - - protected final static String REFSEQ_ALT_BASE = "haplotypeAlternate"; - - protected final static String REFSEQ_VARIANT_CODON = "variantCodon"; - protected final static String REFSEQ_VARIANT_AA = "variantAA"; - protected final static String REFSEQ_CHANGES_AA = "changesAA"; - protected final static String REFSEQ_FUNCTIONAL_CLASS = "functionalClass"; - protected final static String REFSEQ_PROTEIN_COORD_DESCRIPTION = "proteinCoordStr"; - - protected final static String REFSEQ_CODING_ANNOTATIONS = "codingVariants"; - protected final static String REFSEQ_NUM_AA_CHANGES = "numAAchanges"; - protected final static String REFSEQ_HAS_MULT_AA_CHANGES = "alleleHasMultAAchanges"; - - public void initialize() { - rodNames = new LinkedList(); - rodNames.add(VARIANT_ROD_NAME); - - locParser = getToolkit().getGenomeLocParser(); - MNPstartToStops = new TreeMap>(); // sorted by start sites - - initializeVcfWriter(); - - locusToRefSeqFeatures = new LocusToFeatures(); - } - - private void initializeVcfWriter() { - sortingWriter = new ManualSortingVCFWriter(writer); - writer = sortingWriter; - - // setup the header fields: - Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); - - Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames); - writer.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples()))); - } - - public boolean generateExtendedEvents() { - return false; - } - - public Integer reduceInit() { - return 0; - } - - /** - * For each site of interest, annotate it if it's a MNP. - * - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return count of MNPs observed - */ - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if (tracker == null) - return null; - - int numMNPsObserved = 0; - GenomeLoc curLocus = ref.getLocus(); - clearOldLocusFeatures(curLocus); - - boolean requireStartHere = false; // see EVERY site of the MNP - boolean takeFirstOnly = false; // take as many entries as the VCF file has - for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) { - GenomeLoc vcLoc = VariantContextUtils.getLocation(locParser, vc); - boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart(); - boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop(); - - if (vc.isMNP()) { - logger.debug("Observed MNP at " + vcLoc); - - if (isChrM(vc)) { - if (atStartOfVc) { - logger.warn("Skipping mitochondrial MNP at " + vcLoc + " due to complexity of coding table [need to know if first codon, etc.]..."); - writeVCF(vc); - } - continue; - } - - GenomeLoc stopLoc = locParser.createGenomeLoc(curLocus.getContig(), vcLoc.getStop()); - final List refSeqRODs = tracker.getReferenceMetaData(REFSEQ_ROD_NAME); - for (Object refSeqObject : refSeqRODs) { - AnnotatorInputTableFeature refSeqAnnotation = (AnnotatorInputTableFeature) refSeqObject; - locusToRefSeqFeatures.putLocusFeatures(curLocus, refSeqAnnotation, stopLoc); - } - - if (atStartOfVc) { // MNP is starting here, so register that we're waiting for it - Set stopLocs = MNPstartToStops.get(curLocus); - if (stopLocs == null) { - stopLocs = new HashSet(); - MNPstartToStops.put(curLocus, stopLocs); - } - stopLocs.add(stopLoc); - } - - if (atEndOfVc) { - numMNPsObserved++; // only count a MNP at its stop site - logger.debug("Observed end of MNP at " + curLocus); - logger.debug("Current list of per-locus features\n" + locusToRefSeqFeatures); - - Map MNPannotations = annotateMNP(vc); - MNPannotations.putAll(RefSeqDataParser.removeRefSeqAttributes(vc.getAttributes())); // remove any RefSeq INFO, since adding it in more thoroughly here - vc = VariantContext.modifyAttributes(vc, MNPannotations); - writeVCF(vc); - - GenomeLoc startLoc = locParser.createGenomeLoc(curLocus.getContig(), vcLoc.getStart()); - Set stopLocs = MNPstartToStops.get(startLoc); - if (stopLocs != null) { // otherwise, just removed stopLocs due to another MNP that has the same (start, stop) - stopLocs.remove(stopLoc); - if (stopLocs.isEmpty()) // no longer waiting for startLoc - MNPstartToStops.remove(startLoc); - } - } - } - else if (atStartOfVc && !emitOnlyMNPs) {// only want to write other VariantContexts records once (where they start): - writeVCF(vc); - } - } - - Integer mostUpstreamWritableLoc = null; - if (!MNPstartToStops.isEmpty()) { - GenomeLoc waitingForLoc = MNPstartToStops.entrySet().iterator().next().getKey(); - mostUpstreamWritableLoc = waitingForLoc.getStart() - 1; - } - sortingWriter.setmostUpstreamWritableLocus(mostUpstreamWritableLoc); - - return numMNPsObserved; - } - - private static boolean isChrM(final VariantContext vc) { - return vc.getChr().equals("chrM") || vc.getChr().equals("MT"); - } - - private Map annotateMNP(VariantContext vc) { - Map annotations = new HashMap(); - - RefSeqNameToFeatures nameToPositionalFeatures = new RefSeqNameToFeatures(vc); - MNPannotationKeyBuilder kb = new MNPannotationKeyBuilder(nameToPositionalFeatures); - - for (Map.Entry nameToFeatureEntry : nameToPositionalFeatures.entrySet()) { - String featureName = nameToFeatureEntry.getKey(); - RefSeqFeatureList feature = nameToFeatureEntry.getValue(); - CodonAnnotationsForAltAlleles codonAnnotationsForAlleles = new CodonAnnotationsForAltAlleles(vc, feature); - - annotations.put(kb.getKey(REFSEQ_CODING_ANNOTATIONS), codonAnnotationsForAlleles.getCodonAnnotationsString()); - annotations.put(kb.getKey(REFSEQ_NUM_AA_CHANGES), codonAnnotationsForAlleles.getNumAAchangesString()); - annotations.put(kb.getKey(REFSEQ_HAS_MULT_AA_CHANGES), codonAnnotationsForAlleles.hasAlleleWithMultipleAAchanges); - annotations.put(kb.getKey(REFSEQ_NAME), featureName); - annotations.put(kb.getKey(REFSEQ_NAME2), feature.name2); - annotations.put(kb.getKey(REFSEQ_POSITION_TYPE), REFSEQ_CDS); - annotations.put(kb.getKey(REFSEQ_STRAND), (feature.positiveStrand ? REFSEQ_POS_STRAND : REFSEQ_NEG_STRAND)); - annotations.put(kb.getKey(REFSEQ_CODON_COORD), feature.getCodonCoordString()); - - kb.incrementFeatureIndex(); - } - - return annotations; - } - - private static class MNPannotationKeyBuilder { - private int featureIndex; - private boolean multipleEntries; - - public MNPannotationKeyBuilder(RefSeqNameToFeatures nameToPositionalFeatures) { - this.featureIndex = 1; - this.multipleEntries = nameToPositionalFeatures.nameToFeatures.size() > 1; - } - - public void incrementFeatureIndex() { - featureIndex++; - } - - public String getKey(String type) { - String annotationKey = MNP_ANNOTATION_KEY_PREFIX + type; - if (multipleEntries) - annotationKey += "_" + featureIndex; - return annotationKey; - } - } - - private static byte[] ByteArrayToPrimitive(Byte[] nonNullArray) { - byte[] primArray = new byte[nonNullArray.length]; - - for (int i = 0; i < nonNullArray.length; i++) { - if (nonNullArray[i] == null) - throw new ReviewedStingException("nonNullArray[i] == null"); - primArray[i] = nonNullArray[i]; - } - - return primArray; - } - - private void clearOldLocusFeatures(GenomeLoc curLoc) { - Iterator> locusFeaturesIt = locusToRefSeqFeatures.entrySet().iterator(); - while (locusFeaturesIt.hasNext()) { - Map.Entry locusFeaturesEntry = locusFeaturesIt.next(); - if (curLoc.isPast(locusFeaturesEntry.getValue().getFurthestLocusUsingFeatures())) - locusFeaturesIt.remove(); - } - } - - public Integer reduce(Integer count, Integer total) { - if (count != null) - total = total + count; - - return total; - } - - /** - * @param result the number of MNPs processed. - */ - public void onTraversalDone(Integer result) { - System.out.println("Number of MNPs observed: " + result); - writer.close(); - } - - private void writeVCF(VariantContext vc) { - WriteVCF.writeVCF(vc, writer, logger); - } - - /* - Inner classes: - */ - - // Maps: RefSeq entry name -> features for ALL positions of a particular VariantContext MNP: - - private class RefSeqNameToFeatures { - private Map nameToFeatures; - - public RefSeqNameToFeatures(VariantContext vc) { - this.nameToFeatures = new HashMap(); - - int MNPstart = vc.getStart(); - int MNPstop = vc.getEnd(); - int MNPlength = MNPstop - MNPstart + 1; - - for (int i = 0; i < MNPlength; i++) { - int genomicPosition = MNPstart + i; - GenomeLoc posLoc = locParser.createGenomeLoc(vc.getChr(), genomicPosition); - - PositionalRefSeqFeatures locFeatures = locusToRefSeqFeatures.getLocusFeatures(posLoc); - if (locFeatures == null) // no features for posLoc - continue; - - for (Map.Entry nameToFeatureEntry : locFeatures.entrySet()) { - String name = nameToFeatureEntry.getKey(); - PositionalRefSeqFeature posFeature = nameToFeatureEntry.getValue(); - - RefSeqFeatureList featureList = nameToFeatures.get(name); - if (featureList == null) { - featureList = new RefSeqFeatureList(MNPlength); - nameToFeatures.put(name, featureList); - } - featureList.updateFeatureAtPosition(i, posFeature); - } - } - } - - public Set> entrySet() { - return nameToFeatures.entrySet(); - } - } - - // For a particular RefSeq entry, contains the features for ALL positions of a particular VariantContext MNP - - private static class RefSeqFeatureList { - private final static String CODON_FRAME_START = "("; - private final static String CODON_FRAME_END = ")"; - private final static String CODON_DELIM = "|"; - - private CodingRefSeqFeature[] refSeqFeatures; - private String name2; - private Boolean positiveStrand; - - private Map> codonToIndices; // Map of: codon index -> MNP indices that refer to codon - - public RefSeqFeatureList(int MNPlength) { - this.refSeqFeatures = new CodingRefSeqFeature[MNPlength]; - for (int i = 0; i < MNPlength; i++) - this.refSeqFeatures[i] = null; - - this.name2 = null; - this.positiveStrand = null; - this.codonToIndices = new TreeMap>(); - } - - public void updateFeatureAtPosition(int index, PositionalRefSeqFeature feature) { - if (name2 == null) { - name2 = feature.name2; - positiveStrand = feature.positiveStrand; - } - else if (!name2.equals(feature.name2) || positiveStrand != feature.positiveStrand) { - throw new UserException("Inconsistency between previous RefSeq entry and: " + feature); - } - - CodingRefSeqFeature crsf = new CodingRefSeqFeature(feature); - refSeqFeatures[index] = crsf; - - List indicesWithCodon = codonToIndices.get(crsf.codonCoord); - if (indicesWithCodon == null) { - indicesWithCodon = new LinkedList(); - codonToIndices.put(crsf.codonCoord, indicesWithCodon); - } - indicesWithCodon.add(index); - } - - public Set>> codonIndicesEntrySet() { - return codonToIndices.entrySet(); - } - - public String getCodonCoordString() { - StringBuilder sb = new StringBuilder(); - - for (int i = 0; i < refSeqFeatures.length; i++) { - CodingRefSeqFeature crsf = refSeqFeatures[i]; - if (crsf != null) - sb.append(crsf.codonCoord).append(CODON_FRAME_START).append(crsf.codingFrame).append(CODON_FRAME_END); - if (i < refSeqFeatures.length - 1) - sb.append(CODON_DELIM); - } - - return sb.toString(); - } - } - - private static class CodingRefSeqFeature { - protected int codonCoord; - protected int codingFrame; - protected String referenceCodon; - protected String referenceAA; - - public CodingRefSeqFeature(PositionalRefSeqFeature feature) { - this.codonCoord = feature.codonCoord; - this.codingFrame = feature.codingFrame; - this.referenceCodon = feature.referenceCodon.toUpperCase(); - this.referenceAA = feature.referenceAA; - } - } - - private static class CodonAnnotationsForAltAlleles { - protected final static int MIN_CODON_INDEX = 0; - protected final static int NUM_CODON_INDICES = 3; - private final static String CODON_ANNOTATION_DELIM = ","; - - private List alleleAnnotations; - private int[] alleleToNumAAchanges; - private boolean hasAlleleWithMultipleAAchanges; - - public CodonAnnotationsForAltAlleles(VariantContext vc, RefSeqFeatureList feature) { - this.alleleAnnotations = new LinkedList(); - - Set altAlleles = vc.getAlternateAlleles(); - int numAltAlleles = altAlleles.size(); - this.alleleToNumAAchanges = new int[numAltAlleles]; - for (int i = 0; i < numAltAlleles; i++) - this.alleleToNumAAchanges[i] = 0; - - int MNPstart = vc.getStart(); - int MNPstop = vc.getEnd(); - int MNPlength = MNPstop - MNPstart + 1; - - for (Map.Entry> codonToIndicesEntry : feature.codonIndicesEntrySet()) { - int codonIndex = codonToIndicesEntry.getKey(); - List indices = codonToIndicesEntry.getValue(); - if (indices.isEmpty()) - throw new ReviewedStingException("indices should not exist if it's empty!"); - - for (int index : indices) { - int frame = feature.refSeqFeatures[index].codingFrame; - if (feature.refSeqFeatures[index].codonCoord != codonIndex) - throw new ReviewedStingException("LOGICAL ERROR: feature.refSeqFeatures[index].codonCoord != codonIndex"); - if (frame < MIN_CODON_INDEX || frame >= NUM_CODON_INDICES) - throw new UserException("RefSeq codon frame not one of {0,1,2}"); - } - CodingRefSeqFeature firstFeatureForCodon = feature.refSeqFeatures[indices.get(0)]; - String refCodon = firstFeatureForCodon.referenceCodon; - - SingleCodonAnnotationsForAlleles codonAnnotation = new SingleCodonAnnotationsForAlleles(codonIndex, altAlleles, MNPlength, refCodon, firstFeatureForCodon, indices, feature); - alleleAnnotations.add(codonAnnotation); - - // From a single codon, summarize the data for ALL alleles: - for (int i = 0; i < numAltAlleles; i++) { - if (codonAnnotation.annotationsForAlleles[i].codonFunc.changesAA) { - alleleToNumAAchanges[i]++; - if (alleleToNumAAchanges[i] > 1) - this.hasAlleleWithMultipleAAchanges = true; - } - } - } - } - - public String getCodonAnnotationsString() { - StringBuilder sb = new StringBuilder(); - - int index = 0; - for (SingleCodonAnnotationsForAlleles codonToAlleles : alleleAnnotations) { - sb.append(codonToAlleles); - if (index < alleleAnnotations.size() - 1) - sb.append(CODON_ANNOTATION_DELIM); - index++; - } - - return sb.toString(); - } - - public String getNumAAchangesString() { - StringBuilder sb = new StringBuilder(); - - for (int index = 0; index < alleleToNumAAchanges.length; index++) { - sb.append(alleleToNumAAchanges[index]); - if (index < alleleToNumAAchanges.length - 1) - sb.append(SingleCodonAnnotationsForAlleles.ALLELE_ANNOTATION_DELIM); - } - - return sb.toString(); - } - } - - private static class SingleCodonAnnotationsForAlleles { - private final static String CODON_MAP_SYMBOL = "->"; - private final static String CODON_ANNOTATION_START = "["; - private final static String CODON_ANNOTATION_END = "]"; - private final static String REF_CODON_INFO_DELIM = "|"; - private final static String ALLELE_ANNOTATION_DELIM = ","; - private final static String ASSIGNMENT = ":"; - - private int codonIndex; - private String refCodon; - private String refAA; - - private SingleCodonAnnotationsForAllele[] annotationsForAlleles; - - public SingleCodonAnnotationsForAlleles(int codonIndex, Collection altAlleles, int MNPlength, String refCodon, CodingRefSeqFeature firstFeatureForCodon, List indices, RefSeqFeatureList feature) { - if (refCodon.length() != CodonAnnotationsForAltAlleles.NUM_CODON_INDICES) - throw new UserException("RefSeq reference codon " + refCodon + " is not of length " + CodonAnnotationsForAltAlleles.NUM_CODON_INDICES); - - AminoAcid refAA = AminoAcidTable.getEukaryoticAA(refCodon); - if (!refAA.getCode().equals(firstFeatureForCodon.referenceAA)) - throw new UserException("RefSeq: translated reference codon= " + refAA + " != " + firstFeatureForCodon.referenceAA + " = reference AA"); - - this.codonIndex = codonIndex; - this.refCodon = refCodon; - this.refAA = refAA.getCode(); - this.annotationsForAlleles = new SingleCodonAnnotationsForAllele[altAlleles.size()]; - - int altInd = 0; - for (Allele altAllele : altAlleles) { - if (altAllele.length() != MNPlength) - throw new ReviewedStingException("length(altAllele) != length(MNP)"); - byte[] altBases = altAllele.getBases(); - - Byte[] variantCodonArr = new Byte[CodonAnnotationsForAltAlleles.NUM_CODON_INDICES]; - for (int i = CodonAnnotationsForAltAlleles.MIN_CODON_INDEX; i < CodonAnnotationsForAltAlleles.NUM_CODON_INDICES; i++) - variantCodonArr[i] = null; - - for (int index : indices) { - int frame = feature.refSeqFeatures[index].codingFrame; - if (variantCodonArr[frame] != null) - throw new UserException("RefSeq assigns codon " + codonIndex + " twice at same frame: " + frame); - - byte base = altBases[index]; - if (!feature.positiveStrand) // negative strand codon - base = BaseUtils.simpleComplement(base); - - variantCodonArr[frame] = base; - } - - /* For missing frames, there MUST exist AT LEAST one index that refers to this codon, - so use it to derive the missing bases [ALREADY complemented if on the negative strand]: - */ - for (int frame = CodonAnnotationsForAltAlleles.MIN_CODON_INDEX; frame < CodonAnnotationsForAltAlleles.NUM_CODON_INDICES; frame++) { - if (variantCodonArr[frame] == null) - variantCodonArr[frame] = (byte) refCodon.charAt(frame); - } - String variantCodon = new String(ByteArrayToPrimitive(variantCodonArr)).toUpperCase(); - - SingleCodonAnnotationsForAllele alleleAnnotation = new SingleCodonAnnotationsForAllele(variantCodon, refCodon, refAA, codonIndex); - annotationsForAlleles[altInd] = alleleAnnotation; - altInd++; - } - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - - sb.append(codonIndex).append(CODON_MAP_SYMBOL).append(CODON_ANNOTATION_START); - sb.append(REFSEQ_REF_CODON).append(ASSIGNMENT).append(refCodon).append(REF_CODON_INFO_DELIM); - sb.append(REFSEQ_REF_AA).append(ASSIGNMENT).append(refAA).append(REF_CODON_INFO_DELIM); - - int index = 0; - for (SingleCodonAnnotationsForAllele annotation : annotationsForAlleles) { - sb.append(annotation); - if (index < annotationsForAlleles.length - 1) - sb.append(ALLELE_ANNOTATION_DELIM); - index++; - } - sb.append(CODON_ANNOTATION_END); - - return sb.toString(); - } - } - - private static class SingleCodonAnnotationsForAllele { - private final static String ALLELE_START = "{"; - private final static String ALLELE_END = "}"; - private final static String CODON_INFO_DELIM = "|"; - private final static String ASSIGNMENT = ":"; - private final static String MNP_DEPENDENT_AA = "MNPdependentAA"; - - private CodonFunction codonFunc; - private String proteinCoordStr; - private boolean MNPdependentAA; - private String originalAA; - - public SingleCodonAnnotationsForAllele(String variantCodon, String refCodon, AminoAcid refAA, int codonIndex) { - this.codonFunc = new CodonFunction(variantCodon, refCodon, refAA); - this.proteinCoordStr = "p." + refAA.getLetter() + codonIndex + codonFunc.variantAA.getLetter(); - - int refCodonLength = refCodon.length(); - if (codonFunc.variantCodon.length() != refCodonLength) - throw new ReviewedStingException("codonFunc.variantCodon.length() != refCodonLength, but ALREADY checked that they're both 3"); - - this.MNPdependentAA = true; - this.originalAA = "("; - for (int i = 0; i < refCodonLength; i++) { - // Take [0,i-1] and [i+1, end] from refCodon, and i from variantCodon: - String singleBaseChangeCodon = refCodon.substring(0, i) + variantCodon.substring(i, i+1) + refCodon.substring(i+1, refCodonLength); - CodonFunction singleBaseChangeCodonFunc = new CodonFunction(singleBaseChangeCodon, refCodon, refAA); - if (singleBaseChangeCodonFunc.variantAA.equals(codonFunc.variantAA)) { - this.MNPdependentAA = false; - this.originalAA = ""; - break; - } - - this.originalAA = this.originalAA + "" + singleBaseChangeCodonFunc.variantAA.getLetter(); - if (i < refCodonLength - 1) - this.originalAA = this.originalAA + ","; - } - - if (this.MNPdependentAA) - this.originalAA = this.originalAA + ")"; - } - - private static class CodonFunction { - private String variantCodon; - private AminoAcid variantAA; - private boolean changesAA; - private String functionalClass; - - public CodonFunction(String variantCodon, String refCodon, AminoAcid refAA) { - this.variantCodon = variantCodon; - this.variantAA = AminoAcidTable.getEukaryoticAA(this.variantCodon); - this.changesAA = !refAA.equals(variantAA); - - if (!this.variantCodon.equals(refCodon)) { - if (changesAA) { - if (variantAA.isStop()) { - functionalClass = "nonsense"; - } - else if (refAA.isStop()) { - functionalClass = "readthrough"; - } - else { - functionalClass = "missense"; - } - } - else { // the same aa: - functionalClass = "silent"; - } - } - else { // the same codon: - functionalClass = "no_change"; - } - } - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - - sb.append(ALLELE_START); - sb.append(REFSEQ_VARIANT_CODON).append(ASSIGNMENT).append(codonFunc.variantCodon).append(CODON_INFO_DELIM); - sb.append(REFSEQ_VARIANT_AA).append(ASSIGNMENT).append(codonFunc.variantAA.getCode()).append(CODON_INFO_DELIM); - sb.append(REFSEQ_CHANGES_AA).append(ASSIGNMENT).append(codonFunc.changesAA).append(CODON_INFO_DELIM); - sb.append(REFSEQ_FUNCTIONAL_CLASS).append(ASSIGNMENT).append(codonFunc.functionalClass).append(CODON_INFO_DELIM); - sb.append(REFSEQ_PROTEIN_COORD_DESCRIPTION).append(ASSIGNMENT).append(proteinCoordStr).append(CODON_INFO_DELIM); - sb.append(MNP_DEPENDENT_AA).append(ASSIGNMENT).append(MNPdependentAA).append(originalAA); - sb.append(ALLELE_END); - - return sb.toString(); - } - } -} - - -// External classes: - -class LocusToFeatures { - private Map locusToFeatures; - - public LocusToFeatures() { - this.locusToFeatures = new TreeMap(); - } - - public PositionalRefSeqFeatures getLocusFeatures(GenomeLoc loc) { - return locusToFeatures.get(loc); - } - - public void putLocusFeatures(GenomeLoc loc, AnnotatorInputTableFeature refSeqAnnotation, GenomeLoc locusUsingThis) { - PositionalRefSeqFeatures locFeatures = locusToFeatures.get(loc); - if (locFeatures == null) { - locFeatures = new PositionalRefSeqFeatures(locusUsingThis); - locusToFeatures.put(loc, locFeatures); - } - locFeatures.putFeature(refSeqAnnotation, locusUsingThis); - } - - public Set> entrySet() { - return locusToFeatures.entrySet(); - } - - public String toString() { // INTERNAL use only - StringBuilder sb = new StringBuilder(); - - for (Map.Entry locFeatures : entrySet()) { - GenomeLoc loc = locFeatures.getKey(); - PositionalRefSeqFeatures features = locFeatures.getValue(); - sb.append("Locus: ").append(loc).append("\n").append(features); - } - - return sb.toString(); - } -} - -class PositionalRefSeqFeatures { - private final static String[] REQUIRE_COLUMNS = - {AnnotateMNPsWalker.REFSEQ_NAME, AnnotateMNPsWalker.REFSEQ_POSITION_TYPE}; - - private Map nameToFeature; - private GenomeLoc furthestLocusUsingFeatures; - - public PositionalRefSeqFeatures(GenomeLoc locusUsingThis) { - this.nameToFeature = new HashMap(); - this.furthestLocusUsingFeatures = locusUsingThis; - } - - public void putFeature(AnnotatorInputTableFeature refSeqAnnotation, GenomeLoc locusUsingThis) { - for (String column : REQUIRE_COLUMNS) { - if (!refSeqAnnotation.containsColumnName(column)) - throw new UserException("In RefSeq: " + refSeqAnnotation + " Missing column " + column); - } - - if (locusUsingThis.isPast(furthestLocusUsingFeatures)) - furthestLocusUsingFeatures = locusUsingThis; - - String posType = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_POSITION_TYPE); - if (!posType.equals(AnnotateMNPsWalker.REFSEQ_CDS)) // only interested in coding sequence annotations - return; - - PositionalRefSeqFeature newLocusFeature = new PositionalRefSeqFeature(refSeqAnnotation); - - String refSeqName = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_NAME); - PositionalRefSeqFeature locusFeature = nameToFeature.get(refSeqName); - if (locusFeature == null) { - locusFeature = newLocusFeature; - nameToFeature.put(refSeqName, locusFeature); - } - else if (!locusFeature.equals(newLocusFeature)) { - throw new UserException("Inconsistency between previous RefSeq entry and: " + refSeqAnnotation); - } - - locusFeature.updateFeature(refSeqAnnotation); - } - - public GenomeLoc getFurthestLocusUsingFeatures() { - return furthestLocusUsingFeatures; - } - - public Set> entrySet() { - return nameToFeature.entrySet(); - } - - public String toString() { // INTERNAL use only - StringBuilder sb = new StringBuilder(); - - for (Map.Entry nameFeatureEntry : entrySet()) { - String name = nameFeatureEntry.getKey(); - PositionalRefSeqFeature feature = nameFeatureEntry.getValue(); - sb.append(name).append(" -> [").append(feature).append("]\n"); - } - - return sb.toString(); - } -} - -class PositionalRefSeqFeature { - private final static String[] REQUIRE_COLUMNS = - {AnnotateMNPsWalker.REFSEQ_NAME2, AnnotateMNPsWalker.REFSEQ_STRAND, - AnnotateMNPsWalker.REFSEQ_CODON_COORD, AnnotateMNPsWalker.REFSEQ_CODING_FRAME, - AnnotateMNPsWalker.REFSEQ_REF_CODON, AnnotateMNPsWalker.REFSEQ_REF_AA}; - - protected String name2; - protected boolean positiveStrand; - protected int codonCoord; - protected int codingFrame; - protected String referenceCodon; - protected String referenceAA; - - private Map baseToAnnotations; - - public PositionalRefSeqFeature(AnnotatorInputTableFeature refSeqAnnotation) { - for (String column : REQUIRE_COLUMNS) { - if (!refSeqAnnotation.containsColumnName(column)) - throw new UserException("In RefSeq: " + refSeqAnnotation + " Missing column " + column); - } - this.name2 = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_NAME2); - this.positiveStrand = (refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_STRAND).equals(AnnotateMNPsWalker.REFSEQ_POS_STRAND)); - this.codonCoord = Integer.parseInt(refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_CODON_COORD)); - this.codingFrame = Integer.parseInt(refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_CODING_FRAME)); - this.referenceCodon = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_REF_CODON); - this.referenceAA = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_REF_AA); - - this.baseToAnnotations = new HashMap(); - } - - public boolean equals(PositionalRefSeqFeature that) { - return this.name2.equals(that.name2) && this.positiveStrand == that.positiveStrand && this.codonCoord == that.codonCoord && this.codingFrame == that.codingFrame - && this.referenceCodon.equals(that.referenceCodon) && this.referenceAA.equals(that.referenceAA); - } - - public void updateFeature(AnnotatorInputTableFeature refSeqAnnotation) { - if (!refSeqAnnotation.containsColumnName(AnnotateMNPsWalker.REFSEQ_ALT_BASE)) - throw new UserException("In RefSeq: " + refSeqAnnotation + " Missing column " + AnnotateMNPsWalker.REFSEQ_ALT_BASE); - String base = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_ALT_BASE); - - baseToAnnotations.put(base, new BaseAnnotations(refSeqAnnotation)); - } - - public String toString() { // INTERNAL use only - StringBuilder sb = new StringBuilder(); - - sb.append("name2= ").append(name2); - sb.append(", positiveStrand= ").append(positiveStrand); - sb.append(", codonCoord= ").append(codonCoord); - sb.append(", codingFrame= ").append(codingFrame); - sb.append(", referenceCodon= ").append(referenceCodon); - sb.append(", referenceAA= ").append(referenceAA); - - sb.append(", baseAnnotations= {"); - for (Map.Entry baseToAnnotationsEntry : baseToAnnotations.entrySet()) { - String base = baseToAnnotationsEntry.getKey(); - BaseAnnotations annotations = baseToAnnotationsEntry.getValue(); - sb.append(" ").append(base).append(" -> {").append(annotations).append("}"); - } - sb.append(" }"); - - return sb.toString(); - } -} - -class BaseAnnotations { - private final static String[] REQUIRE_COLUMNS = - {AnnotateMNPsWalker.REFSEQ_VARIANT_CODON, AnnotateMNPsWalker.REFSEQ_VARIANT_AA, - AnnotateMNPsWalker.REFSEQ_CHANGES_AA, AnnotateMNPsWalker.REFSEQ_FUNCTIONAL_CLASS, - AnnotateMNPsWalker.REFSEQ_PROTEIN_COORD_DESCRIPTION}; - - protected String variantCodon; - protected String variantAA; - protected boolean changesAA; - protected String functionalClass; - protected String proteinCoordStr; - - public BaseAnnotations(AnnotatorInputTableFeature refSeqAnnotation) { - for (String column : REQUIRE_COLUMNS) { - if (!refSeqAnnotation.containsColumnName(column)) - throw new UserException("In RefSeq: " + refSeqAnnotation + " Missing column " + column); - } - this.variantCodon = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_VARIANT_CODON); - this.variantAA = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_VARIANT_AA); - this.changesAA = Boolean.parseBoolean(refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_CHANGES_AA)); - this.functionalClass = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_FUNCTIONAL_CLASS); - this.proteinCoordStr = refSeqAnnotation.getColumnValue(AnnotateMNPsWalker.REFSEQ_PROTEIN_COORD_DESCRIPTION); - } - - - public String toString() { // INTERNAL use only - StringBuilder sb = new StringBuilder(); - - sb.append("variantCodon= ").append(variantCodon); - sb.append(", variantAA= ").append(variantAA); - sb.append(", changesAA= ").append(changesAA); - sb.append(", functionalClass= ").append(functionalClass); - sb.append(", proteinCoordStr= ").append(proteinCoordStr); - - return sb.toString(); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcid.java b/public/java/src/org/broadinstitute/sting/utils/AminoAcid.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcid.java rename to public/java/src/org/broadinstitute/sting/utils/AminoAcid.java index 0d0b906e0..0b47093fa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcid.java +++ b/public/java/src/org/broadinstitute/sting/utils/AminoAcid.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; +package org.broadinstitute.sting.utils; /** * Represents a single amino acid. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcidTable.java b/public/java/src/org/broadinstitute/sting/utils/AminoAcidTable.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcidTable.java rename to public/java/src/org/broadinstitute/sting/utils/AminoAcidTable.java index c10eb5dd7..1ae28ffb3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/AminoAcidTable.java +++ b/public/java/src/org/broadinstitute/sting/utils/AminoAcidTable.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; +package org.broadinstitute.sting.utils; import java.util.HashMap; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java deleted file mode 100755 index c75a5b2dc..000000000 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotatorIntegrationTest.java +++ /dev/null @@ -1,83 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.annotator.genomicannotator; - - -import java.util.Arrays; - -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; - -public class GenomicAnnotatorIntegrationTest extends WalkerTest { - String testFileWithIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.cleaned.indels.vcf"; - String testFileWithSNPsAndIndels = validationDataLocation + "/GenomicAnnotatorValidation/1KGBroadWEx.variants.vcf"; - - @Test - public void testGenomicAnnotatorOnDbSNP() { - - /* - TODO put this test back in once it gets faster. - String[] md5 = {"d19d6d1eb52fb09e7493653dc645d92a"}; - WalkerTestSpec spec = new WalkerTestSpec( - "-T GenomicAnnotator -R " + b36KGReference + " " + - "-B:variant,vcf /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf " + - "-B:dbsnp,AnnotatorInputTable /humgen/gsa-hpprojects/GATK/data/Annotations/dbsnp/b130/snp130-b36-only-the-SNPs.txt " + - "-m " + //generate many records from one input record if necessary - "-o %s " + - "-BTI variant", - 1, - Arrays.asList(md5)); - executeTest("test with dbSNP", spec); - */ - - - String[] md5WithDashSArg = {"efba4ce1641cfa2ef88a64395f2ebce8"}; - WalkerTestSpec specWithSArg = new WalkerTestSpec( - "-T GenomicAnnotator -R " + b36KGReference + - " -B:variant,vcf3 /humgen/gsa-hpprojects/GATK/data/Annotations/examples/CEU_hapmap_nogt_23_subset.vcf" + - " -B:dbsnp,AnnotatorInputTable /humgen/gsa-hpprojects/GATK/data/Annotations/dbsnp/b130/snp130-b36-only-the-SNPs.txt" + - " -m" + //generate many records from one input record if necessary - " -o %s" + - " -BTI variant" + - " -s dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet" + - " -NO_HEADER", - 1, - Arrays.asList(md5WithDashSArg)); - executeTest("test with dbSNP and -s arg", specWithSArg); - - } - - @Test - public void testGenomicAnnotatorOnIndels() { - WalkerTestSpec testOnIndels = new WalkerTestSpec( - buildCommandLine( - "-T GenomicAnnotator", - "-R " + b37KGReference, - "-L 22:10000000-20000000", - "-B:refseq,AnnotatorInputTable " + b37Refseq, - "-B:variant,VCF " + testFileWithIndels, - "-NO_HEADER", - "-o %s" - ), - 1, - Arrays.asList("772fc3f43b70770ec6c6acbb8bbbd4c0") - ); - executeTest("testGenomicAnnotatorOnIndels", testOnIndels); - } - - @Test - public void testGenomicAnnotatorOnSNPsAndIndels() { - WalkerTestSpec testOnSNPsAndIndels = new WalkerTestSpec( - buildCommandLine( - "-T GenomicAnnotator", - "-R " + b37KGReference, - "-L 22:10000000-20000000", - "-B:refseq,AnnotatorInputTable " + b37Refseq, - "-B:variant,VCF " + testFileWithSNPsAndIndels, - "-NO_HEADER", - "-o %s" - ), - 1, - Arrays.asList("081ade7f3d2d3c5f19cb1e8651a626f3") - ); - executeTest("testGenomicAnnotatorOnSNPsAndIndels", testOnSNPsAndIndels); - } -} From 6f43284053cbae9e263ac43056b6c4445fc19017 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Mon, 25 Jul 2011 15:19:41 -0400 Subject: [PATCH 020/186] Switch to including our entire exception tree rather than just UserException. --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 295cd95e1..a9348eae3 100644 --- a/build.xml +++ b/build.xml @@ -489,7 +489,7 @@ - + From 38969b97836e593a8879e4790940dd48a13e64b4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 26 Jul 2011 11:09:06 -0400 Subject: [PATCH 030/186] Prototype of RODBinding @Arguments instead of -B syntax Initial version of RodBinding class. Flow from walker Rodbinding @Arguments -> RMDTriplet (old system) -> GATK engine (standard). Will need refactoring. --- .../commandline/ArgumentTypeDescriptor.java | 38 ++- .../sting/commandline/ParsingEngine.java | 1 + .../sting/commandline/RodBinding.java | 63 +++++ .../sting/gatk/CommandLineExecutable.java | 25 +- .../VariantsToTableNewRodStyle.java | 225 ++++++++++++++++++ .../sting/utils/text/ListFileUtils.java | 39 +++ 6 files changed, 386 insertions(+), 5 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/commandline/RodBinding.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 9c33e084d..eaabe4da2 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import java.io.File; import java.lang.annotation.Annotation; import java.lang.reflect.*; import java.util.*; @@ -275,6 +276,35 @@ public abstract class ArgumentTypeDescriptor { } } +/** + * Parser for RodBinding objects + */ +class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { + /** + * We only want RodBinding class objects + * @param type The type to check. + * @return true if the provided class is a RodBinding.class + */ + @Override + public boolean supports( Class type ) { + return isRodBinding(type); + } + + public static boolean isRodBinding( Class type ) { + return type.isAssignableFrom(RodBinding.class); + } + + @Override + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { + ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); + String value = getArgumentValue( defaultDefinition, matches ); + RodBinding result = new RodBinding(source.field.getName(), new File(value)); + Tags tags = getArgumentTags(matches); + parsingEngine.addTags(result,tags); + return result; + } +} + /** * Parse simple argument types: java primitives, wrapper classes, and anything that has * a simple String constructor. @@ -282,9 +312,10 @@ public abstract class ArgumentTypeDescriptor { class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public boolean supports( Class type ) { - if( type.isPrimitive() ) return true; - if( type.isEnum() ) return true; - if( primitiveToWrapperMap.containsValue(type) ) return true; + if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false; + if ( type.isPrimitive() ) return true; + if ( type.isEnum() ) return true; + if ( primitiveToWrapperMap.containsValue(type) ) return true; try { type.getConstructor(String.class); @@ -385,7 +416,6 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) { Class componentType; Object result; - Tags tags; if( Collection.class.isAssignableFrom(type) ) { diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 0dc18e6f9..e2e694cfb 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -64,6 +64,7 @@ public class ParsingEngine { * The type of set used must be ordered (but not necessarily sorted). */ private static final Set STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet( Arrays.asList(new SimpleArgumentTypeDescriptor(), + new RodBindingArgumentTypeDescriptor(), new CompoundArgumentTypeDescriptor(), new MultiplexArgumentTypeDescriptor()) ); diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java new file mode 100644 index 000000000..2f5046b3a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.io.File; +import java.util.List; + +/** + * + */ +// TODO -- should have a derived class called VariantContentRodBinding with simple accessors +public class RodBinding { + final String variableName; + final File sourceFile; + + public RodBinding(final String variableName, final File sourceFile) { + this.variableName = variableName; + this.sourceFile = sourceFile; + } + + public String getVariableName() { + return variableName; + } + + public File getSourceFile() { + return sourceFile; + } + + public List getAll(RefMetaDataTracker tracker) { + return (List)tracker.getReferenceMetaData(variableName); + } + + public T getVariantContext(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc) { + return (T)tracker.getVariantContext(ref, variableName, loc); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index a080ab439..10573cf25 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -25,8 +25,10 @@ package org.broadinstitute.sting.gatk; +import org.broadinstitute.sting.commandline.ArgumentSource; import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import org.broadinstitute.sting.commandline.CommandLineProgram; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; @@ -34,12 +36,15 @@ import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescripto import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.text.ListFileUtils; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.List; /** * @author aaron @@ -81,7 +86,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram { // File lists can require a bit of additional expansion. Set these explicitly by the engine. engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser)); - engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser)); engine.setWalker(walker); walker.setToolkit(engine); @@ -96,6 +100,11 @@ public abstract class CommandLineExecutable extends CommandLineProgram { loadArgumentsIntoObject(walker); argumentSources.add(walker); + Collection newStyle = ListFileUtils.unpackRODBindings(getRodBindingsInWalker(walker), parser); + Collection oldStyle = ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings, getArgumentCollection().DBSNPFile, parser); + oldStyle.addAll(newStyle); + engine.setReferenceMetaDataFiles(oldStyle); + for (ReadFilter filter: filters) { loadArgumentsIntoObject(filter); argumentSources.add(filter); @@ -112,6 +121,20 @@ public abstract class CommandLineExecutable extends CommandLineProgram { return 0; } + private List getRodBindingsInWalker(Walker walker) { + List rods = new ArrayList(); + + for ( ArgumentSource source : parser.extractArgumentSources(walker.getClass()) ) { + Object obj = JVMUtils.getFieldValue(source.field, walker); + if ( obj instanceof RodBinding ) { + System.out.printf("Found rod binding for field %s of %s%n", obj, source.field); + rods.add((RodBinding)obj); + } + } + + return rods; + } + /** * Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled. * This report will be written to either STDOUT or to the run repository, depending on the options diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java new file mode 100644 index 000000000..b44983e3d --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.io.PrintStream; +import java.util.*; + +/** + * Emits specific fields as dictated by the user from one or more VCF files. + */ +public class VariantsToTableNewRodStyle extends RodWalker { + @Output(doc="File to which results should be written",required=true) + protected PrintStream out; + + @Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true) + public ArrayList fieldsToTake = new ArrayList(); + + @Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records") + public boolean showFiltered = false; + + @Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false) + public int MAX_RECORDS = -1; + int nRecords = 0; + + @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) + public boolean keepMultiAllelic = false; + + @Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false) + public boolean ALLOW_MISSING_DATA = false; + + @Input(fullName="variants", shortName="V", doc="The variant file we will convert to a table", required=true) + public RodBinding variants; + + public void initialize() { + out.println(Utils.join("\t", fieldsToTake)); + } + + public static abstract class Getter { public abstract String get(VariantContext vc); } + public static Map getters = new HashMap(); + + static { + // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT + getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } }); + getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } }); + getters.put("REF", new Getter() { + public String get(VariantContext vc) { + String x = ""; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x=x+new String(new byte[]{refByte}); + } + return x+vc.getReference().getDisplayString(); + } + }); + getters.put("ALT", new Getter() { + public String get(VariantContext vc) { + StringBuilder x = new StringBuilder(); + int n = vc.getAlternateAlleles().size(); + if ( n == 0 ) return "."; + if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { + Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + x.append(new String(new byte[]{refByte})); + } + + for ( int i = 0; i < n; i++ ) { + if ( i != 0 ) x.append(","); + x.append(vc.getAlternateAllele(i).getDisplayString()); + } + return x.toString(); + } + }); + getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } }); + getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) { + if ( vc.isSNP() && vc.isBiallelic() ) + return VariantContextUtils.isTransition(vc) ? "1" : "0"; + else + return "-1"; + }}); + getters.put("FILTER", new Getter() { public String get(VariantContext vc) { + return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); } + }); + + getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } }); + getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } }); + getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } }); + getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } }); + getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } }); + getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } }); + getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } }); + getters.put("GQ", new Getter() { public String get(VariantContext vc) { + if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF"); + return String.format("%.2f", 10 * vc.getGenotype(0).getNegLog10PError()); + }}); + } + + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) // RodWalkers can make funky map calls + return 0; + + if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { + VariantContext vc = variants.getVariantContext(tracker, ref, context.getLocation()); + if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { + List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); + out.println(Utils.join("\t", vals)); + } + return 1; + } else { + if ( nRecords >= MAX_RECORDS ) { + logger.warn("Calling sys exit to leave after " + nRecords + " records"); + System.exit(0); // todo -- what's the recommend way to abort like this? + } + return 0; + } + } + + private static final boolean isWildCard(String s) { + return s.endsWith("*"); + } + + public static List extractFields(VariantContext vc, List fields, boolean allowMissingData) { + List vals = new ArrayList(); + + for ( String field : fields ) { + String val = "NA"; + + if ( getters.containsKey(field) ) { + val = getters.get(field).get(vc); + } else if ( vc.hasAttribute(field) ) { + val = vc.getAttributeAsString(field); + } else if ( isWildCard(field) ) { + Set wildVals = new HashSet(); + for ( Map.Entry elt : vc.getAttributes().entrySet()) { + if ( elt.getKey().startsWith(field.substring(0, field.length() - 1)) ) { + wildVals.add(elt.getValue().toString()); + } + } + + if ( wildVals.size() > 0 ) { + List toVal = new ArrayList(wildVals); + Collections.sort(toVal); + val = Utils.join(",", toVal); + } + } else if ( ! allowMissingData ) { + throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc)); + } + + if (field.equals("AF") || field.equals("AC")) { + String afo = val; + + double af=0; + if (afo.contains(",")) { + String[] afs = afo.split(","); + afs[0] = afs[0].substring(1,afs[0].length()); + afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); + + double[] afd = new double[afs.length]; + + for (int k=0; k < afd.length; k++) + afd[k] = Double.valueOf(afs[k]); + + af = MathUtils.arrayMax(afd); + //af = Double.valueOf(afs[0]); + + } + else + if (!afo.equals("NA")) + af = Double.valueOf(afo); + + val = Double.toString(af); + + } + vals.add(val); + } + + return vals; + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + + public void onTraversalDone(Integer sum) {} +} diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 4ab1c1685..d9bf86aba 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils.text; import org.broadinstitute.sting.commandline.ParsingEngine; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; @@ -93,6 +94,7 @@ public class ListFileUtils { * @return a list of expanded, bound RODs. */ public static Collection unpackRODBindings(final List RODBindings, final String dbSNPFile, final ParsingEngine parser) { + // todo -- this is a strange home for this code. Move into ROD system Collection rodBindings = new ArrayList(); for (String fileName: RODBindings) { @@ -134,6 +136,43 @@ public class ListFileUtils { return rodBindings; } + /** + * Convert command-line argument representation of ROD bindings to something more easily understandable by the engine. + * @param RODBindings a text equivale + * @return a list of expanded, bound RODs. + */ + public static Collection unpackRODBindings(final List RODBindings, final ParsingEngine parser) { + // todo -- this is a strange home for this code. Move into ROD system + Collection rodBindings = new ArrayList(); + + for (RodBinding rodBinding: RODBindings) { + String argValue = rodBinding.getSourceFile().getPath(); + String fileName = expandFileName(argValue); + final Tags tags = parser.getTags(rodBinding); + + List positionalTags = tags.getPositionalTags(); + if(positionalTags.size() != 1) + throw new UserException("Invalid syntax for RODBinding (reference-ordered data) input . " + + "Please use the following syntax when providing reference-ordered " + + "data: -: ."); + // Assume that if tags are present, those tags are name and type. + // Name is always first, followed by type. + String name = rodBinding.getVariableName(); + String type = positionalTags.get(0); + + RMDTriplet.RMDStorageType storageType = null; + if(tags.getValue("storage") != null) + storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,tags.getValue("storage")); + else if(fileName.toLowerCase().endsWith("stdin")) + storageType = RMDTriplet.RMDStorageType.STREAM; + else + storageType = RMDTriplet.RMDStorageType.FILE; + + rodBindings.add(new RMDTriplet(name,type,fileName,storageType,tags)); + } + + return rodBindings; + } /** * Expand any special characters that appear in the filename. Right now, '-' is expanded to From 7ab8b5333930435a53716078956c6ff209633475 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 26 Jul 2011 11:37:31 -0400 Subject: [PATCH 031/186] Support for List argument type --- .../sting/commandline/RodBinding.java | 15 ++++++++++----- .../variantutils/VariantsToTableNewRodStyle.java | 8 +++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index 2f5046b3a..f4086fead 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.util.List; @@ -35,7 +36,7 @@ import java.util.List; * */ // TODO -- should have a derived class called VariantContentRodBinding with simple accessors -public class RodBinding { +public class RodBinding { final String variableName; final File sourceFile; @@ -52,12 +53,16 @@ public class RodBinding { return sourceFile; } - public List getAll(RefMetaDataTracker tracker) { - return (List)tracker.getReferenceMetaData(variableName); + public List getAll(RefMetaDataTracker tracker) { + return tracker.getReferenceMetaData(variableName); } - public T getVariantContext(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc) { - return (T)tracker.getVariantContext(ref, variableName, loc); + public VariantContext getVariantContext(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc) { + return tracker.getVariantContext(ref, variableName, loc); + } + + public String toString() { + return String.format("(RodBinding name=%s source=%s)", variableName, sourceFile); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java index b44983e3d..552c5bd07 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java @@ -66,7 +66,10 @@ public class VariantsToTableNewRodStyle extends RodWalker { public boolean ALLOW_MISSING_DATA = false; @Input(fullName="variants", shortName="V", doc="The variant file we will convert to a table", required=true) - public RodBinding variants; + public RodBinding variants; + + @Input(fullName="variantsList", shortName="VL", doc="The variant file we will convert to a table", required=true) + public List variantsList; public void initialize() { out.println(Utils.join("\t", fieldsToTake)); @@ -135,6 +138,9 @@ public class VariantsToTableNewRodStyle extends RodWalker { if ( tracker == null ) // RodWalkers can make funky map calls return 0; + for ( RodBinding binding : variantsList ) + System.out.printf("VariantList binding %s%n", binding); + if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { VariantContext vc = variants.getVariantContext(tracker, ref, context.getLocation()); if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { From d0badd5bd6f337d0b2c56159a140ae449827c9db Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 26 Jul 2011 13:54:55 -0400 Subject: [PATCH 032/186] RodBinding subclassed to VariantContextRodBinding for easy access to VariantContext providing RODs --- .../commandline/ArgumentTypeDescriptor.java | 21 +++++++-- .../sting/commandline/RodBinding.java | 15 ++---- .../commandline/VariantContextRodBinding.java | 46 +++++++++++++++++++ .../VariantsToTableNewRodStyle.java | 12 ++--- 4 files changed, 69 insertions(+), 25 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index eaabe4da2..16e02c5bd 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -291,17 +291,28 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { } public static boolean isRodBinding( Class type ) { - return type.isAssignableFrom(RodBinding.class); + return RodBinding.class.isAssignableFrom(type); } @Override public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); - RodBinding result = new RodBinding(source.field.getName(), new File(value)); - Tags tags = getArgumentTags(matches); - parsingEngine.addTags(result,tags); - return result; + try { + Constructor ctor = type.getConstructor(String.class, String.class); + RodBinding result = (RodBinding)ctor.newInstance(source.field.getName(), value); + Tags tags = getArgumentTags(matches); + parsingEngine.addTags(result,tags); + return result; + } catch (InvocationTargetException e) { + throw new UserException.CommandLineException( + String.format("Failed to parse value %s for argument %s.", + value, source.field.getName())); + } catch (Exception e) { + throw new UserException.CommandLineException( + String.format("Failed to parse value %s for argument %s.", + value, source.field.getName())); + } } } diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index f4086fead..c5b33a17e 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -24,23 +24,18 @@ package org.broadinstitute.sting.commandline; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.File; import java.util.List; /** * */ -// TODO -- should have a derived class called VariantContentRodBinding with simple accessors public class RodBinding { final String variableName; - final File sourceFile; + final String sourceFile; - public RodBinding(final String variableName, final File sourceFile) { + public RodBinding(final String variableName, final String sourceFile) { this.variableName = variableName; this.sourceFile = sourceFile; } @@ -49,7 +44,7 @@ public class RodBinding { return variableName; } - public File getSourceFile() { + public String getSourceFile() { return sourceFile; } @@ -57,10 +52,6 @@ public class RodBinding { return tracker.getReferenceMetaData(variableName); } - public VariantContext getVariantContext(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc) { - return tracker.getVariantContext(ref, variableName, loc); - } - public String toString() { return String.format("(RodBinding name=%s source=%s)", variableName, sourceFile); } diff --git a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java new file mode 100644 index 000000000..66a428369 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.File; +import java.util.List; + +/** + * + */ +public class VariantContextRodBinding extends RodBinding { + public VariantContextRodBinding(final String variableName, final String sourceFile) { + super(variableName, sourceFile); + } + + public VariantContext getVariantContext(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc) { + return tracker.getVariantContext(ref, variableName, loc); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java index 552c5bd07..1b913e895 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java @@ -24,14 +24,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; @@ -66,9 +62,9 @@ public class VariantsToTableNewRodStyle extends RodWalker { public boolean ALLOW_MISSING_DATA = false; @Input(fullName="variants", shortName="V", doc="The variant file we will convert to a table", required=true) - public RodBinding variants; + public VariantContextRodBinding variants; - @Input(fullName="variantsList", shortName="VL", doc="The variant file we will convert to a table", required=true) + @Input(fullName="rodList", shortName="RL", doc="A list of ROD types that we will convert to a table", required=true) public List variantsList; public void initialize() { @@ -139,7 +135,7 @@ public class VariantsToTableNewRodStyle extends RodWalker { return 0; for ( RodBinding binding : variantsList ) - System.out.printf("VariantList binding %s%n", binding); + System.out.printf("VariantList binding %s tags=%s%n", binding, getToolkit().getTags(binding).getPositionalTags()); if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { VariantContext vc = variants.getVariantContext(tracker, ref, context.getLocation()); From 9dfb57168a5c43332e94c0f8af850d5c0c0bc833 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 26 Jul 2011 13:59:44 -0400 Subject: [PATCH 033/186] RodBinding source is no longer assumed to be a file --- .../broadinstitute/sting/commandline/RodBinding.java | 12 ++++++------ .../sting/utils/text/ListFileUtils.java | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index c5b33a17e..028d2f411 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -33,19 +33,19 @@ import java.util.List; */ public class RodBinding { final String variableName; - final String sourceFile; + final String source; - public RodBinding(final String variableName, final String sourceFile) { + public RodBinding(final String variableName, final String source) { this.variableName = variableName; - this.sourceFile = sourceFile; + this.source = source; } public String getVariableName() { return variableName; } - public String getSourceFile() { - return sourceFile; + public String getSource() { + return source; } public List getAll(RefMetaDataTracker tracker) { @@ -53,7 +53,7 @@ public class RodBinding { } public String toString() { - return String.format("(RodBinding name=%s source=%s)", variableName, sourceFile); + return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index d9bf86aba..7d4e47a94 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -146,7 +146,7 @@ public class ListFileUtils { Collection rodBindings = new ArrayList(); for (RodBinding rodBinding: RODBindings) { - String argValue = rodBinding.getSourceFile().getPath(); + String argValue = rodBinding.getSource(); String fileName = expandFileName(argValue); final Tags tags = parser.getTags(rodBinding); From fec495e2928a2dc13e5957ab1f3131b151b18a08 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Tue, 26 Jul 2011 15:43:20 -0400 Subject: [PATCH 034/186] Fix a nasty little bug in the sharding system: if the last shard in contig n overlaps exactly on disk with the first shard in contig n+1, the shards would be merged together to avoid duplicate extraction. Unfortunately, the interval overlap filter couldn't handle shards spanning contigs, and was choosing to filter out reads from contig n+1 which should have been included. I'm not completely sure why the BAM indexing code would ever specify that the end of one chromosome had the same on-disk location as the start of the next one. I suspect that this is a indexer performance bug. --- .../gatk/datasources/reads/SAMDataSource.java | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 6064806f3..572970349 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -893,6 +893,7 @@ public class SAMDataSource { * Custom representation of interval bounds. * Makes it simpler to track current position. */ + private int[] intervalContigIndices; private int[] intervalStarts; private int[] intervalEnds; @@ -917,12 +918,14 @@ public class SAMDataSource { if(foundMappedIntervals) { if(keepOnlyUnmappedReads) throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads"); + this.intervalContigIndices = new int[intervals.size()]; this.intervalStarts = new int[intervals.size()]; this.intervalEnds = new int[intervals.size()]; int i = 0; for(GenomeLoc interval: intervals) { - intervalStarts[i] = (int)interval.getStart(); - intervalEnds[i] = (int)interval.getStop(); + intervalContigIndices[i] = interval.getContigIndex(); + intervalStarts[i] = interval.getStart(); + intervalEnds[i] = interval.getStop(); i++; } } @@ -961,11 +964,10 @@ public class SAMDataSource { while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) { if(!keepOnlyUnmappedReads) { // Mapped read filter; check against GenomeLoc-derived bounds. - if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] || - (candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) { - // This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval. + if(readEndsOnOrAfterStartingBound(candidateRead)) { + // This read ends after the current interval begins. // Promising, but this read must be checked against the ending bound. - if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) { + if(readStartsOnOrBeforeEndingBound(candidateRead)) { // Yes, this read is within both bounds. This must be our next read. nextRead = candidateRead; break; @@ -993,6 +995,37 @@ public class SAMDataSource { candidateRead = iterator.next(); } } + + /** + * Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its + * end will be distorted, so rely only on the alignment start. + * @param read The read to position-check. + * @return True if the read starts after the current bounds. False otherwise. + */ + private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) { + return + // Read ends on a later contig, or... + read.getReferenceIndex() > intervalContigIndices[currentBound] || + // Read ends of this contig... + (read.getReferenceIndex() == intervalContigIndices[currentBound] && + // either after this location, or... + (read.getAlignmentEnd() >= intervalStarts[currentBound] || + // read is unmapped but positioned and alignment start is on or after this start point. + (read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound]))); + } + + /** + * Check whether the read lies before the end of the current bound. + * @param read The read to position-check. + * @return True if the read starts after the current bounds. False otherwise. + */ + private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) { + return + // Read starts on a prior contig, or... + read.getReferenceIndex() < intervalContigIndices[currentBound] || + // Read starts on this contig and the alignment start is registered before this end point. + (read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]); + } } /** From f6a5e0e36aedad122f456db3b0dadfdde4c18e12 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 26 Jul 2011 17:35:30 -0400 Subject: [PATCH 036/186] Go for global integrationtest path first, if possible. --- public/java/test/org/broadinstitute/sting/MD5DB.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/MD5DB.java b/public/java/test/org/broadinstitute/sting/MD5DB.java index 6f56fce4b..0194e114a 100644 --- a/public/java/test/org/broadinstitute/sting/MD5DB.java +++ b/public/java/test/org/broadinstitute/sting/MD5DB.java @@ -79,8 +79,8 @@ public class MD5DB { * @return */ public static String getMD5FilePath(final String md5, final String valueIfNotFound) { - // we prefer the local db to the global DB, so match it first - for ( String dir : Arrays.asList(LOCAL_MD5_DB_DIR, GLOBAL_MD5_DB_DIR)) { + // we prefer the global db to the local DB, so match it first + for ( String dir : Arrays.asList(GLOBAL_MD5_DB_DIR, LOCAL_MD5_DB_DIR)) { File f = getFileForMD5(md5, dir); if ( f.exists() && f.canRead() ) return f.getPath(); From 412c466de6cee301082086cb0e99dc193029faff Mon Sep 17 00:00:00 2001 From: Kiran V Garimella Date: Tue, 26 Jul 2011 17:43:43 -0400 Subject: [PATCH 037/186] Bug fix, wherein triple-hets after genotype refinement need to be left unphased, not just prior to refinement --- .../gatk/walkers/phasing/PhaseByTransmission.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index d3ed46ce8..cf4afbb6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -234,7 +234,7 @@ public class PhaseByTransmission extends RodWalker { finalGenotypes.add(father); finalGenotypes.add(child); - if (mother.isCalled() && father.isCalled() && child.isCalled() && !(mother.isHet() && father.isHet() && child.isHet())) { + if (mother.isCalled() && father.isCalled() && child.isCalled()) { ArrayList possibleMotherGenotypes = createAllThreeGenotypes(ref, alt, mother); ArrayList possibleFatherGenotypes = createAllThreeGenotypes(ref, alt, father); ArrayList possibleChildGenotypes = createAllThreeGenotypes(ref, alt, child); @@ -265,12 +265,14 @@ public class PhaseByTransmission extends RodWalker { } } - Map attributes = new HashMap(); - attributes.putAll(bestChildGenotype.getAttributes()); - attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, bestPrior*bestConfigurationLikelihood / norm); - bestChildGenotype = Genotype.modifyAttributes(bestChildGenotype, attributes); + if (!(bestMotherGenotype.isHet() && bestFatherGenotype.isHet() && bestChildGenotype.isHet())) { + Map attributes = new HashMap(); + attributes.putAll(bestChildGenotype.getAttributes()); + attributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, bestPrior*bestConfigurationLikelihood / norm); + bestChildGenotype = Genotype.modifyAttributes(bestChildGenotype, attributes); - finalGenotypes = getPhasedGenotypes(bestMotherGenotype, bestFatherGenotype, bestChildGenotype); + finalGenotypes = getPhasedGenotypes(bestMotherGenotype, bestFatherGenotype, bestChildGenotype); + } } return finalGenotypes; From 92a11ed8dcc6899697196e329ea2b3d944fb3bf2 Mon Sep 17 00:00:00 2001 From: Kiran V Garimella Date: Tue, 26 Jul 2011 17:52:25 -0400 Subject: [PATCH 038/186] Updated MD5 for PhaseByTransmissionIntegrationTest --- .../walkers/phasing/PhaseByTransmissionIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java index 69f98b700..f62f12082 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -20,7 +20,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("ff02b1583ee3a12ed66a9c0e08e346b2") + Arrays.asList("45fef0e23113e2fcd9570379e2fc1b75") ); executeTest("testBasicFunctionality", spec); } From 321afac4e8ebc3a6db2c0e9b00094f0dfe12f0e6 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 26 Jul 2011 19:29:25 -0400 Subject: [PATCH 039/186] Updates to the help layout. *New style.css, new template for the walker auto-generated html. Short description is no longer repeated in the long description of the walker. *Updated DiffObjectsWalker and ContigStatsWalker as "reference" documented walkers. --- .../walkers/diffengine/DiffObjectsWalker.java | 74 +++++++++++-------- .../help/GenericDocumentationHandler.java | 2 +- settings/helpTemplates/common.html | 6 +- settings/helpTemplates/generic.template.html | 7 +- settings/helpTemplates/style.css | 52 +++++++++++-- 5 files changed, 96 insertions(+), 45 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index b679f967a..f43d1342d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.diffengine; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -35,34 +34,45 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import java.io.File; import java.io.PrintStream; -import java.util.Arrays; import java.util.List; /** * A generic engine for comparing tree-structured objects + * *

- * Compares two record-oriented files, itemizing specific difference between equivalent - * records in the two files. Reports both itemized and summarized differences. - *

- * What are the summarized differences and the DiffObjectsWalker + * Compares two record-oriented files, itemizing specific difference between equivalent + * records in the two files. Reports both itemized and summarized differences. + *

+ * + *

What are the summarized differences and the DiffObjectsWalker?

+ * *

* The GATK contains a summarizing difference engine that compares hierarchical data structures to emit: - *

    - *
  • A list of specific differences between the two data structures. This is similar to saying the value in field A in record 1 in file F differences from the value in field A in record 1 in file G. - *
  • A summarized list of differences ordered by frequency of the difference. This output is similar to saying field A in 50 records in files F and G differed. - *
+ *
    + *
  • A list of specific differences between the two data structures. This is similar to saying the value in field A in record 1 in file F differences from the value in field A in record 1 in file G. + *
  • A summarized list of differences ordered by frequency of the difference. This output is similar to saying field A in 50 records in files F and G differed. + *
+ *

* *

- * The GATK contains a private walker DiffObjects that allows you access to the DiffEngine capabilities on the command line. Simply provide the walker with the master and test files and it will emit summarized differences for you. + * The GATK contains a private walker DiffObjects that allows you access to the DiffEngine capabilities on the command line. Simply provide the walker with the master and test files and it will emit summarized differences for you. + *

+ * + *

Why?

* *

- * Why? - *

- * The reason for this system is that it allows you to compare two structured files -- such as BAMs and VCFs -- for common differences among them. This is primarily useful in regression testing or optimization, where you want to ensure that the differences are those that you expect and not any others. + * The reason for this system is that it allows you to compare two structured files -- such as BAMs and VCFs -- for common differences among them. This is primarily useful in regression testing or optimization, where you want to ensure that the differences are those that you expect and not any others. + *

* - *

Understanding the output - *

The DiffEngine system compares to two hierarchical data structures for specific differences in the values of named - * nodes. Suppose I have two trees: + *

Input

+ *

+ * The DiffObjectsWalker works with BAM or VCF files. + *

+ * + *

Output

+ *

+ * The DiffEngine system compares to two hierarchical data structures for specific differences in the values of named + * nodes. Suppose I have two trees: *

  *     Tree1=(A=1 B=(C=2 D=3))
  *     Tree2=(A=1 B=(C=3 D=3 E=4))
@@ -70,33 +80,37 @@ import java.util.List;
  * 
*

* where every node in the tree is named, or is a raw value (here all leaf values are integers). The DiffEngine - * traverses these data structures by name, identifies equivalent nodes by fully qualified names - * (Tree1.A is distinct from Tree2.A, and determines where their values are equal (Tree1.A=1, Tree2.A=1, so they are). - * These itemized differences are listed as: + * traverses these data structures by name, identifies equivalent nodes by fully qualified names + * (Tree1.A is distinct from Tree2.A, and determines where their values are equal (Tree1.A=1, Tree2.A=1, so they are). + * These itemized differences are listed as: *

  *     Tree1.B.C=2 != Tree2.B.C=3
  *     Tree1.B.C=2 != Tree3.B.C=4
  *     Tree2.B.C=3 != Tree3.B.C=4
  *     Tree1.B.E=MISSING != Tree2.B.E=4
  * 
+ * *

- * This conceptually very similar to the output of the unix command line tool diff. What's nice about DiffEngine though - * is that it computes similarity among the itemized differences and displays the count of differences names - * in the system. In the above example, the field C is not equal three times, while the missing E in Tree1 occurs - * only once. So the summary is: + * This conceptually very similar to the output of the unix command line tool diff. What's nice about DiffEngine though + * is that it computes similarity among the itemized differences and displays the count of differences names + * in the system. In the above example, the field C is not equal three times, while the missing E in Tree1 occurs + * only once. So the summary is: * *

  *     *.B.C : 3
  *     *.B.E : 1
  * 
- *

where the * operator indicates that any named field matches. This output is sorted by counts, and provides an - * immediate picture of the commonly occurring differences among the files. + * *

- * Below is a detailed example of two VCF fields that differ because of a bug in the AC, AF, and AN counting routines, - * detected by the integrationtest integration (more below). You can see that in the although there are many specific - * instances of these differences between the two files, the summarized differences provide an immediate picture that - * the AC, AF, and AN fields are the major causes of the differences. + * where the * operator indicates that any named field matches. This output is sorted by counts, and provides an + * immediate picture of the commonly occurring differences among the files. *

+ * Below is a detailed example of two VCF fields that differ because of a bug in the AC, AF, and AN counting routines, + * detected by the integrationtest integration (more below). You can see that in the although there are many specific + * instances of these differences between the two files, the summarized differences provide an immediate picture that + * the AC, AF, and AN fields are the major causes of the differences. + *

+ * *

    [testng] path                                                             count
    [testng] *.*.*.AC                                                         6
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
index fd1048844..c69345816 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java
@@ -92,7 +92,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
         for(Tag tag: classdoc.firstSentenceTags())
             summaryBuilder.append(tag.text());
         root.put("summary", summaryBuilder.toString());
-        root.put("description", classdoc.commentText());
+        root.put("description", classdoc.commentText().substring(summaryBuilder.toString().length()));
         root.put("timestamp", toProcess.buildTimestamp);
         root.put("version", toProcess.absoluteVersion);
 
diff --git a/settings/helpTemplates/common.html b/settings/helpTemplates/common.html
index ebc060d0a..1554a1d40 100644
--- a/settings/helpTemplates/common.html
+++ b/settings/helpTemplates/common.html
@@ -6,10 +6,10 @@
 
 
 <#macro headerInfo>
+
+
+<#macro footerInfo>
     

See also Main index | GATK wiki | GATK support forum

GATK version ${version} built at ${timestamp}.

-<#macro footerInfo> - - diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html index ca0d1e76f..032407164 100644 --- a/settings/helpTemplates/generic.template.html +++ b/settings/helpTemplates/generic.template.html @@ -53,19 +53,18 @@

${name}

<@headerInfo /> -

Brief summary

- ${summary} +

${summary}

<#if author??>

Author

${author} -

Detailed description

+

Introduction

${description} <#-- Create the argument summary --> <#if arguments.all?size != 0>
-

Feature specific arguments

+

${name} specific arguments

diff --git a/settings/helpTemplates/style.css b/settings/helpTemplates/style.css index 79f409f55..1d7bcc576 100644 --- a/settings/helpTemplates/style.css +++ b/settings/helpTemplates/style.css @@ -14,29 +14,67 @@ p, ul, ol, dl, dt, dd, td font-size: 12pt; } -p.version, p.see-also +p { - font-size: 8pt; + margin-left: 1em; } -h1, h2, h3 +p.summary +{ + margin-left: 2em; + margin-top: -20pt; + font-style: italic; +} + +p.see-also +{ + font-size: 10pt; + margin-left: 0em; + margin-top: 3em; + text-align: center; +} + +p.version +{ + font-size: 8pt; + margin-left: 0em; + margin-top: -8pt; + text-align: center; +} + + +h1, h2, h3, h4 { font-family: Corbel, Arial, Helvetica, Sans-Serif; font-weight: bold; text-align: left; - color: #669; } h1 { font-size: 32pt; letter-spacing: -2px; + color: #669; } -h3 +h2 { - font-size: 16pt; - font-weight: normal; + font-size: 16pt; + font-weight: bold; + margin-top: 2em; + color: #669; +} + +h3 +{ + font-size: 12pt; + margin-left: 1em; + color: #000; +} + +hr +{ + margin-top: 4em; } /* From dc8061e7a69b81715ac3ba967e34dd23026a8e6d Mon Sep 17 00:00:00 2001 From: Kiran V Garimella Date: Wed, 27 Jul 2011 10:34:56 -0400 Subject: [PATCH 041/186] Moved gsalib from private/ to public/ --- build.xml | 2 +- public/R/src/gsalib/DESCRIPTION | 10 ++ public/R/src/gsalib/R/gsa.error.R | 12 ++ public/R/src/gsalib/R/gsa.getargs.R | 116 ++++++++++++++++++ public/R/src/gsalib/R/gsa.message.R | 3 + public/R/src/gsalib/R/gsa.plot.venn.R | 50 ++++++++ public/R/src/gsalib/R/gsa.read.eval.R | 83 +++++++++++++ public/R/src/gsalib/R/gsa.read.gatkreport.R | 64 ++++++++++ public/R/src/gsalib/R/gsa.read.squidmetrics.R | 28 +++++ public/R/src/gsalib/R/gsa.read.vcf.R | 23 ++++ public/R/src/gsalib/R/gsa.warn.R | 3 + public/R/src/gsalib/Read-and-delete-me | 9 ++ public/R/src/gsalib/data/tearsheetdrop.jpg | Bin 0 -> 50343 bytes public/R/src/gsalib/man/gsa.error.Rd | 49 ++++++++ public/R/src/gsalib/man/gsa.getargs.Rd | 57 +++++++++ public/R/src/gsalib/man/gsa.message.Rd | 44 +++++++ public/R/src/gsalib/man/gsa.plot.venn.Rd | 75 +++++++++++ public/R/src/gsalib/man/gsa.read.eval.Rd | 111 +++++++++++++++++ .../R/src/gsalib/man/gsa.read.gatkreport.Rd | 55 +++++++++ .../R/src/gsalib/man/gsa.read.squidmetrics.Rd | 48 ++++++++ public/R/src/gsalib/man/gsa.read.vcf.Rd | 53 ++++++++ public/R/src/gsalib/man/gsa.warn.Rd | 46 +++++++ public/R/src/gsalib/man/gsalib-package.Rd | 68 ++++++++++ 23 files changed, 1008 insertions(+), 1 deletion(-) create mode 100644 public/R/src/gsalib/DESCRIPTION create mode 100644 public/R/src/gsalib/R/gsa.error.R create mode 100644 public/R/src/gsalib/R/gsa.getargs.R create mode 100644 public/R/src/gsalib/R/gsa.message.R create mode 100644 public/R/src/gsalib/R/gsa.plot.venn.R create mode 100644 public/R/src/gsalib/R/gsa.read.eval.R create mode 100644 public/R/src/gsalib/R/gsa.read.gatkreport.R create mode 100644 public/R/src/gsalib/R/gsa.read.squidmetrics.R create mode 100644 public/R/src/gsalib/R/gsa.read.vcf.R create mode 100644 public/R/src/gsalib/R/gsa.warn.R create mode 100644 public/R/src/gsalib/Read-and-delete-me create mode 100755 public/R/src/gsalib/data/tearsheetdrop.jpg create mode 100644 public/R/src/gsalib/man/gsa.error.Rd create mode 100644 public/R/src/gsalib/man/gsa.getargs.Rd create mode 100644 public/R/src/gsalib/man/gsa.message.Rd create mode 100644 public/R/src/gsalib/man/gsa.plot.venn.Rd create mode 100644 public/R/src/gsalib/man/gsa.read.eval.Rd create mode 100644 public/R/src/gsalib/man/gsa.read.gatkreport.Rd create mode 100644 public/R/src/gsalib/man/gsa.read.squidmetrics.Rd create mode 100644 public/R/src/gsalib/man/gsa.read.vcf.Rd create mode 100644 public/R/src/gsalib/man/gsa.warn.Rd create mode 100644 public/R/src/gsalib/man/gsalib-package.Rd diff --git a/build.xml b/build.xml index 60c678591..438e9c90c 100644 --- a/build.xml +++ b/build.xml @@ -1089,7 +1089,7 @@ - + diff --git a/public/R/src/gsalib/DESCRIPTION b/public/R/src/gsalib/DESCRIPTION new file mode 100644 index 000000000..6116e8c66 --- /dev/null +++ b/public/R/src/gsalib/DESCRIPTION @@ -0,0 +1,10 @@ +Package: gsalib +Type: Package +Title: Utility functions +Version: 1.0 +Date: 2010-10-02 +Author: Kiran Garimella +Maintainer: Kiran Garimella +Description: Utility functions for GATK NGS analyses +License: BSD +LazyLoad: yes diff --git a/public/R/src/gsalib/R/gsa.error.R b/public/R/src/gsalib/R/gsa.error.R new file mode 100644 index 000000000..1c6a56046 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.error.R @@ -0,0 +1,12 @@ +gsa.error <- function(message) { + message(""); + gsa.message("Error: **********"); + gsa.message(sprintf("Error: %s", message)); + gsa.message("Error: **********"); + message(""); + + traceback(); + + message(""); + stop(message, call. = FALSE); +} diff --git a/public/R/src/gsalib/R/gsa.getargs.R b/public/R/src/gsalib/R/gsa.getargs.R new file mode 100644 index 000000000..94613bf93 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.getargs.R @@ -0,0 +1,116 @@ +.gsa.getargs.usage <- function(argspec, doc) { + cargs = commandArgs(); + + usage = "Usage:"; + + fileIndex = grep("--file=", cargs); + if (length(fileIndex) > 0) { + progname = gsub("--file=", "", cargs[fileIndex[1]]); + + usage = sprintf("Usage: Rscript %s [arguments]", progname); + + if (!is.na(doc)) { + message(sprintf("%s: %s\n", progname, doc)); + } + } + + message(usage); + + for (argname in names(argspec)) { + key = argname; + defaultValue = 0; + doc = ""; + + if (is.list(argspec[[argname]])) { + defaultValue = argspec[[argname]]$value; + doc = argspec[[argname]]$doc; + } + + message(sprintf(" -%-10s\t[default: %s]\t%s", key, defaultValue, doc)); + } + + message(""); + + stop(call. = FALSE); +} + +gsa.getargs <- function(argspec, doc = NA) { + argsenv = new.env(); + + for (argname in names(argspec)) { + value = 0; + if (is.list(argspec[[argname]])) { + value = argspec[[argname]]$value; + } else { + value = argspec[[argname]]; + } + + assign(argname, value, envir=argsenv); + } + + if (interactive()) { + for (argname in names(argspec)) { + value = get(argname, envir=argsenv); + + if (is.na(value) | is.null(value)) { + if (exists("cmdargs")) { + assign(argname, cmdargs[[argname]], envir=argsenv); + } else { + assign(argname, readline(sprintf("Please enter a value for '%s': ", argname)), envir=argsenv); + } + } else { + assign(argname, value, envir=argsenv); + } + } + } else { + cargs = commandArgs(TRUE); + + if (length(cargs) == 0) { + .gsa.getargs.usage(argspec, doc); + } + + for (i in 1:length(cargs)) { + if (length(grep("^-", cargs[i], ignore.case=TRUE)) > 0) { + key = gsub("-", "", cargs[i]); + value = cargs[i+1]; + + if (key == "h" | key == "help") { + .gsa.getargs.usage(argspec, doc); + } + + if (length(grep("^[\\d\\.e\\+\\-]+$", value, perl=TRUE, ignore.case=TRUE)) > 0) { + value = as.numeric(value); + } + + assign(key, value, envir=argsenv); + } + } + } + + args = as.list(argsenv); + + isMissingArgs = 0; + missingArgs = c(); + + for (arg in names(argspec)) { + if (is.na(args[[arg]]) | is.null(args[[arg]])) { + gsa.warn(sprintf("Value for required argument '-%s' was not specified", arg)); + + isMissingArgs = 1; + missingArgs = c(missingArgs, arg); + } + } + + if (isMissingArgs) { + gsa.error( + paste( + "Missing required arguments: -", + paste(missingArgs, collapse=" -"), + ". Specify -h or -help to this script for a list of available arguments.", + sep="" + ) + ); + } + + args; +} diff --git a/public/R/src/gsalib/R/gsa.message.R b/public/R/src/gsalib/R/gsa.message.R new file mode 100644 index 000000000..a2b909d3d --- /dev/null +++ b/public/R/src/gsalib/R/gsa.message.R @@ -0,0 +1,3 @@ +gsa.message <- function(message) { + message(sprintf("[gsalib] %s", message)); +} diff --git a/public/R/src/gsalib/R/gsa.plot.venn.R b/public/R/src/gsalib/R/gsa.plot.venn.R new file mode 100644 index 000000000..b1353ccc1 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.plot.venn.R @@ -0,0 +1,50 @@ +gsa.plot.venn <- +function(a, b, c=0, a_and_b, a_and_c=0, b_and_c=0, + col=c("#FF6342", "#63C6DE", "#ADDE63"), + pos=c(0.20, 0.20, 0.80, 0.82), + debug=0 + ) { + library(png); + library(graphics); + + # Set up properties + for (i in 1:length(col)) { + rgbcol = col2rgb(col[i]); + col[i] = sprintf("%02X%02X%02X", rgbcol[1], rgbcol[2], rgbcol[3]); + } + + chco = paste(col[1], col[2], col[3], sep=","); + chd = paste(a, b, c, a_and_b, a_and_c, b_and_c, sep=","); + + props = c( + 'cht=v', + 'chs=525x525', + 'chds=0,10000000000', + paste('chco=', chco, sep=""), + paste('chd=t:', chd, sep="") + ); + proplist = paste(props[1], props[2], props[3], props[4], props[5], sep='&'); + + # Get the venn diagram (as a temporary file) + filename = tempfile("venn"); + cmd = paste("wget -O ", filename, " 'http://chart.apis.google.com/chart?", proplist, "' > /dev/null 2>&1", sep=""); + + if (debug == 1) { + print(cmd); + } + system(cmd); + + # Render the temp png file into a plotting frame + a = readPNG(filename); + + plot(0, 0, type="n", xaxt="n", yaxt="n", bty="n", xlim=c(0, 1), ylim=c(0, 1), xlab="", ylab=""); + if (c == 0 || a >= b) { + rasterImage(a, pos[1], pos[2], pos[3], pos[4]); + } else { + rasterImage(a, 0.37+pos[1], 0.37+pos[2], 0.37+pos[3], 0.37+pos[4], angle=180); + } + + # Clean up! + unlink(filename); +} + diff --git a/public/R/src/gsalib/R/gsa.read.eval.R b/public/R/src/gsalib/R/gsa.read.eval.R new file mode 100644 index 000000000..f1d49092b --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.eval.R @@ -0,0 +1,83 @@ +.gsa.attemptToLoadFile <- function(filename) { + file = NA; + + if (file.exists(filename) & file.info(filename)$size > 500) { + file = read.csv(filename, header=TRUE, comment.char="#"); + } + + file; +} + +gsa.read.eval <- +function(evalRoot) { + fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); + fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); + fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); + fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); + fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); + fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); + fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); + fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); + fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); + fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); + fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); + fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); + fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); + fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); + + eval = list( + AlleleCountStats = NA, + CompOverlap = NA, + CountVariants = NA, + GenotypeConcordance = NA, + MetricsByAc = NA, + MetricsBySample = NA, + Quality_Metrics_by_allele_count = NA, + QualityScoreHistogram = NA, + SampleStatistics = NA, + SampleSummaryStatistics = NA, + SimpleMetricsBySample = NA, + TiTv = NA, + TiTvStats = NA, + Variant_Quality_Score = NA, + + CallsetNames = c(), + CallsetOnlyNames = c(), + CallsetFilteredNames = c() + ); + + eval$AlleleCountStats = .gsa.attemptToLoadFile(fileAlleleCountStats); + eval$CompOverlap = .gsa.attemptToLoadFile(fileCompOverlap); + eval$CountVariants = .gsa.attemptToLoadFile(fileCountVariants); + eval$GenotypeConcordance = .gsa.attemptToLoadFile(fileGenotypeConcordance); + eval$MetricsByAc = .gsa.attemptToLoadFile(fileMetricsByAc); + eval$MetricsBySample = .gsa.attemptToLoadFile(fileMetricsBySample); + eval$Quality_Metrics_by_allele_count = .gsa.attemptToLoadFile(fileQuality_Metrics_by_allele_count); + eval$QualityScoreHistogram = .gsa.attemptToLoadFile(fileQualityScoreHistogram); + eval$SampleStatistics = .gsa.attemptToLoadFile(fileSampleStatistics); + eval$SampleSummaryStatistics = .gsa.attemptToLoadFile(fileSampleSummaryStatistics); + eval$SimpleMetricsBySample = .gsa.attemptToLoadFile(fileSimpleMetricsBySample); + eval$TiTv = .gsa.attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); + eval$TiTvStats = .gsa.attemptToLoadFile(fileTiTvStats); + eval$Variant_Quality_Score = .gsa.attemptToLoadFile(fileVariant_Quality_Score); + + uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); + eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); + eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); + eval$CallsetFilteredNames = as.vector(c( + paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), sep=""), + paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), sep="")) + ); + + if (!(eval$CallsetFilteredNames[1] %in% unique(eval$TiTv$jexl_expression))) { + eval$CallsetFilteredNames[1] = paste("In", eval$CallsetNames[1], "-FilteredIn", eval$CallsetNames[2], sep=""); + } + + if (!(eval$CallsetFilteredNames[2] %in% unique(eval$TiTv$jexl_expression))) { + eval$CallsetFilteredNames[2] = paste("In", eval$CallsetNames[2], "-FilteredIn", eval$CallsetNames[1], sep=""); + #eval$CallsetFilteredNames[2] = paste(gsub("^(\\w)", "In", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In", eval$CallsetNames[1], perl=TRUE), sep=""); + } + + eval; +} + diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/gsalib/R/gsa.read.gatkreport.R new file mode 100644 index 000000000..9b3ef1ad1 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.gatkreport.R @@ -0,0 +1,64 @@ +# Load a table into the specified environment. Make sure that each new table gets a unique name (this allows one to cat a bunch of tables with the same name together and load them into R without each table overwriting the last. +.gsa.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) { + d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE); + colnames(d) = tableHeader; + + for (i in 1:ncol(d)) { + v = suppressWarnings(as.numeric(d[,i])); + + if (length(na.omit(as.numeric(v))) == length(d[,i])) { + d[,i] = v; + } + } + + usedNames = ls(envir=tableEnv, pattern=tableName); + + if (length(usedNames) > 0) { + tableName = paste(tableName, ".", length(usedNames), sep=""); + } + + assign(tableName, d, envir=tableEnv); +} + +# Load all GATKReport tables from a file +gsa.read.gatkreport <- function(filename) { + con = file(filename, "r", blocking = TRUE); + lines = readLines(con); + close(con); + + tableEnv = new.env(); + + tableName = NA; + tableHeader = c(); + tableRows = c(); + + for (line in lines) { + if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { + headerFields = unlist(strsplit(line, "[[:space:]]+")); + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + tableName = headerFields[2]; + tableHeader = c(); + tableRows = c(); + } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) { + # do nothing + } else if (!is.na(tableName)) { + row = unlist(strsplit(line, "[[:space:]]+")); + + if (length(tableHeader) == 0) { + tableHeader = row; + } else { + tableRows = rbind(tableRows, row); + } + } + } + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + gatkreport = as.list(tableEnv); +} diff --git a/public/R/src/gsalib/R/gsa.read.squidmetrics.R b/public/R/src/gsalib/R/gsa.read.squidmetrics.R new file mode 100644 index 000000000..39fa1ad32 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.squidmetrics.R @@ -0,0 +1,28 @@ +gsa.read.squidmetrics = function(project, bylane = FALSE) { + suppressMessages(library(ROracle)); + + drv = dbDriver("Oracle"); + con = dbConnect(drv, "REPORTING/REPORTING@ora01:1521/SEQPROD"); + + if (bylane) { + statement = paste("SELECT * FROM ILLUMINA_PICARD_METRICS WHERE \"Project\" = '", project, "'", sep=""); + print(statement); + + rs = dbSendQuery(con, statement = statement); + d = fetch(rs, n=-1); + dbHasCompleted(rs); + dbClearResult(rs); + } else { + statement = paste("SELECT * FROM ILLUMINA_SAMPLE_STATUS_AGG WHERE \"Project\" = '", project, "'", sep=""); + print(statement); + + rs = dbSendQuery(con, statement = statement); + d = fetch(rs, n=-1); + dbHasCompleted(rs); + dbClearResult(rs); + } + + oraCloseDriver(drv); + + subset(d, Project == project); +} diff --git a/public/R/src/gsalib/R/gsa.read.vcf.R b/public/R/src/gsalib/R/gsa.read.vcf.R new file mode 100644 index 000000000..5beb6455d --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.vcf.R @@ -0,0 +1,23 @@ +gsa.read.vcf <- function(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) { + headers = readLines(vcffile, n=100); + headerline = headers[grep("#CHROM", headers)]; + header = unlist(strsplit(gsub("#", "", headerline), "\t")) + + d = read.table(vcffile, header=FALSE, skip=skip, nrows=nrows, stringsAsFactors=FALSE); + colnames(d) = header; + + if (expandGenotypeFields) { + columns = ncol(d); + + offset = columns + 1; + for (sampleIndex in 10:columns) { + gt = unlist(lapply(strsplit(d[,sampleIndex], ":"), function(x) x[1])); + d[,offset] = gt; + colnames(d)[offset] = sprintf("%s.GT", colnames(d)[sampleIndex]); + + offset = offset + 1; + } + } + + return(d); +} diff --git a/public/R/src/gsalib/R/gsa.warn.R b/public/R/src/gsalib/R/gsa.warn.R new file mode 100644 index 000000000..7ee08ce65 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.warn.R @@ -0,0 +1,3 @@ +gsa.warn <- function(message) { + gsa.message(sprintf("Warning: %s", message)); +} diff --git a/public/R/src/gsalib/Read-and-delete-me b/public/R/src/gsalib/Read-and-delete-me new file mode 100644 index 000000000..d04323a6e --- /dev/null +++ b/public/R/src/gsalib/Read-and-delete-me @@ -0,0 +1,9 @@ +* Edit the help file skeletons in 'man', possibly combining help files + for multiple functions. +* Put any C/C++/Fortran code in 'src'. +* If you have compiled code, add a .First.lib() function in 'R' to load + the shared library. +* Run R CMD build to build the package tarball. +* Run R CMD check to check the package tarball. + +Read "Writing R Extensions" for more information. diff --git a/public/R/src/gsalib/data/tearsheetdrop.jpg b/public/R/src/gsalib/data/tearsheetdrop.jpg new file mode 100755 index 0000000000000000000000000000000000000000..c9d480fa05f4acf066e3bf1cf469db47b8a1afc3 GIT binary patch literal 50343 zcmdSAcT`kOw>Nmm8I+tsL86kgK!Zw_tbjx%2gx}$K|r!(1q2k33@SP2B$65=$3{eQ zw?NZC(|pJ0eeb>Bnl*RLH+RiHQ_bn~qq@@GRl92MT~*if*XzIyO;rt500##LPzOK2 zH5^Y*F_Vy{1;pQ3lIOrel`w&^Ui}<#Ld>p4FGV7 zK`diy=WPRGB@o-&dfT{y_-{FyuCDH2`M4(_=Ckqe0`uarf|&a+U%)pQ+||JBSTH{A zzv5ni?PvhG!1%w#@n``6H5&jl;rv&+WBPx1g+u?%+g@DkpW<-Hy#Rm!eSLilrX^|x zfGe-->&y4o*H^`09SZ=^?P=rd<@ZlokPw_P0Qi6U|6er!t8@UE0%-ulKYjFnkN?G& ze^QA6Kv6dUJP8MY$Kd-N_~vB*fSbVppa+2QN&v37_Joyg{JOWgJ$KZz(Fa*Q^ z2|xsp1;D2LtAD{%z>O*ZD3lSARCzimN)`S$=6?@9b^-tJ?V-CzpqG<_qYtMtIJ)ib za%#BQ2;Jor6A={!t~Y>il>jGu0MOP31i;+H009m)C^>(*4e}d@8I+@cab6RL<+(O*{NI_6oi1!~Dc=b2!^%C$9z{SD&`}gmF zM}YS?5aQ$G5fBj)5fKp*5)u)Ukq{A+5)%@VP>_(4k&%;=6Om9*Qjk-EknC?Ef3xG^ z{>@83ModUd_WwFucLO(xfmZ~DcsREK+?zOfH*v1}0S=IQ0&oWUI|=KhuHnp;|X`}zk4hlWSK!KP<^ z%+Ad(EN;Lze{TKS{=Kt{{BwMAiaJA|U;L#D2f+ImS^r7d|AnrbAYHg%AK(-Hr3(kw zAAIm`;uG8wC8So?Cwk#Yb6YHenD#;Hhc7)OT;c`@I%}^fQhIKQjXTJ{r2RwL{~2MC z|0Bx&ld%7yYY`yF!vQ-F?y%^ zC12}S58UFGI^;7<@420k3QLY+`wa{Rb{ENT$x}JYozgawgv%0!->88T$Y#(azEP2& zDBaU)!cQ>;h?jEk97SSvoytMP-cBv#%>q&Bs4e3t-_ivTmOe*IAF_J=hnD?x##Q01}Dmn$0^iu=1<*v%)mh z(BA-;oBGMlW20{Fi|Ika0E`pyL`akpj%$kyE$_I3(-T%^<*W%>K-{JpIAHANhVhTE z&)nJy_ASIAxz*(PATOVUwm3P%hw(^>P<)(#sfTGjSbwgpr28?>umlk8e}n&fVV-Ry zf1xYH)S{U?XcACeo@7zrTd99O?N1r3hnqe;#`1N(_$A$h;c5m&zkeaYK@KHjFw9G1 z$eB{}SNM%YX^A41&rVXCIXK(0drHKLFY-=GSqo|3F18$Ls@pOBoCGQ~)=x`bMC}vY z!b_a`tcOdjt8-HQC%-gaPlR9Eb?cCGEKT^VB@lq?*v~I-Voy`{!AKFw`4gSdtsK~ygJdyskP>O$Eo*RTzBAfBq(`v(` zwb_D)OV!SX(>RffYjN4N=CDPv`s1d55(QV^pVpD0Gt=)Pn;L+$Qd+>3)1i@%Nbmus zXixd{q>wQBZodzXQ_h%1= z(bVhV<}<97@Uny+$E!Z~)Y?kE2226HmL2Wq-aUJoH07lpa-@LMgQ0311$2=t0FO#v z3vm)nQ)G}&yCOv1DbjRalClIQtEP>_JcPpeUuGbkH71P z!^nyVf@Rzdnr>zzkOnIc2l6p`fceE~;ZIcYreZzN1~x)E&q3m#eVeiHXAW>9F+wvy ze`HU|Qf?Dbq7bkh59lmUKUrr-mlFKEm^R7VoDFA9%0uu|;Z5q%&=L%7DpA_*Bxfw# za9~aT6+^_8f6}HQgZT~M_kJ^s*`;v*ZF%e2^lI)NynMa2q%z!?V;e_%SzwBkleHceZUb5(bK)HVE%p|Uj<48%jo7(L+ zO*(-`s;WA&h4^HdMKYTmd@pk>J(X|_l!kG0`q*cR!-KXFAEkb7mKWmQmV-qLMoh0p zMT~Kt2NdGvc`xRLIUet5b5Z12d}t*HdrtR7g?4GPolV`eBy5NMc7g%G&V5#+l9Bx0 z>1jOmlZ`UGHu5|2KBZEIiTN%$un;}e!PeODY4xSq+rvp(&R-F~rH-^?uGD=jR>b3b zezXhU`*4rxTd1dnj5&J{DJ?FgEZgs!eabd>`twbbL-#(wLU^)9TCJI4t#PV-;<%m< z7b)xn|2&ZZe%zKpRYV^|Xcc7BmINoOsCeG>xP^TqL%3+369oRcu*2!;>O+W_gMMA}cr;ThlG{^L-1Nob!&0jHvG%UWroe2i$V zz!}i%j+#HcfH~kvRQ}n{r*d{750d~|mR0Xh0{0U2nwhpC(L(T8_IdzUs=!(Q%zIB; zAqaLz0A&W8WMBHzZj6#X5TTTtYKx=*BsB>0`|y@0ZAb73!#%u}h)4g>{3w^9iW)AZ ziFYI8Ko%0+Y<7UX5#^nZ7ax)-S&Ctg3CEWmclmY=xV?X<59_inNXRx-88~RHc3odF zHRFG|k_P9G&MRT&(S0lPoLCT)ywit$>=Xe<7+R46^z+&zwLEAqSv7Qr{Ks^palfI} zs@1!(B)`5H=n6Cy^HcgD8tH2ih17z}a79wZxd&VW!}ZWU>k#v6;A1J0;~F3}Tf^Sh zLFDIt3JH5OEi7DhV#Ugl_U+=&)0iPkNFT*a)q@_7pu78z+&q6}$YwUX(*+nBBBbMl z>qA|1+hGl^Qua-4b#2if@)uQPY<-@Sz6oZ=h9@yoy9tqARdrQzOr%v?@@r@}tR5Yn z-iM-J>LwJNJi&5Z0~=bw*T9hDH4xK}Zn*|jbm0&zZBrM zYX9W_Fk5H$RXAil9>NH9hkV@=F`>dolUg&it5rW3&L&x2=;>QN>!bLBP=3QMm`it; zo-nu4LRIt-hAkN0=~ZJ%E-QbrTp~2mwG&j=laA^^w{b;V#arE6c)N}R*MezW16>WM ziGAa10RP8T1H`4j?uRfY5J|Jq^~%j-^9hpSwL3iDaROGrD#%{U~vdO_Z=2X& zyP#h(?{uIe+G|_yz6lvm!aPDI2lt)(%%N8O<%1be1b@iVH6VCiU>Y`Qg)qGaQVKZo1F|irFgHJqL^RRtHQ>hRPI9C`MZZleR40SC&b@zzgwIY8{ZGK=T6i3nQrxM1!{N= zeD&w}I@AXa@5#!NYhZ`_={jkZzqfIDdC*Qh^sH#bO4);x(&jH6B|k-^&&gdIbo|r zu-r(JAy2J3XPTKD;PEAl@NQAgQ$|ZCU=Y|*qTC4F(1-PYb2gKO_?u65&dDo*R0xwI1(iwQ(k^1u5rE z;x^&M>!}7xEFbVz^Ys)05#toXci%gafXs;&7NQKx0G3gE4}{Ee^rPX~v_Rf^nw;OK zC4*p_RN=F>cH-+2f#R5u-Cxu&3Q&m|`6mB>HBkRXxS&1330@dXCh0#Y6%M6TRdN8d;Hpp^gAVWYCEv(Z7(@Zv<7c8;Ie< zk&0oH^VVJJzA*GqmO142bA1q$u0&E0q%@G>tyx4i#>v>t#)q>ktl;#4O$A`(=cp#CE&G(0 zwcQ~bFkDDr`4$qCj_0OkYVN%lF#)Lt@D49w(wZcj)p$7ofbY;DgmW~;3Ixa-Ja2kT zRSod%%EB{}L!D}HKM&kZCZ3Q0-VQqH+5YiX572p)uFQxxM+-zZ(nM%d;qkl)UWZ)+ z#1p1~3>W8#1Wp_G)bggxfWR9yc|AN`;CMCzxZ%(NcrQkZt^cyZrIZI20CQkjZI4VeO#^-~${kEdJ74QSF=!|5y%e8;$+ zvu;a>Z8F}@QT5zYqOv)*VfFoxYS-dBedBbw-@SgHF62}gdG0^@lB3+sYXSvX^GJzd~}yOY$ZgO;0H_zVXEbh z?=>Lvj6D2PKvEn&F4ar{J!HLtg9LjcY$~UqS8D;IUyf0kzBA1w$DZ=R+?hd|Kdfp; z!uk%^NSvV$?8}pcAY89qVj>vFK)81SEHm5<=d3mzD|ij`*oIMu@FIueed<189wSxL z*I9->BmLtrlIxc%#!84PWVn4Y3PmeWP5s7}m}B(!JZJnN)R6qcEHTTdad(W{T3S`iqk_##`8hFBo9mGBT8^b+CPI3D z^coS(|e;+%Yt#Y&{9W7qj*X2v(%c7Lo^?oW0D398K9Pv@H6Xp_Dp7 zdPH^5fC~d&ordGm17)yQIdt&lJ&e8;-b8W81LRSX^>oaa&<_C|ebRELzf!Grezdlm zsLpe7v%0wsG2x{a5L}f*)*BafV;Zi3g*p?oaVffP5KDlrs~Wi~4J<~>of!mIK8oLA zAD_7$!~Dvh{u)qdB#)2(Zp=DK^jmh_VU2c8geGcQrZy*x2|I(OMmY2zEgBmmg$J~C zww2HhP#eWMyQ-a02b=Y`xjM-0yhiCHUuY_0v;Scn#Me9kpFIC{$hptO~K^pg3HTVy_F8LAiqIBNu zx2{bI%RRZ(Yd}vu(NlUMr1zGFNABD7dbbJ5{p>JBDEiJ1m}SV*tHLm0jB6KiFG;W$ zOIM)W*!9D`#^&m0tZx0ZYt}Q9TLcT8D(1G4s!EEd&HADCy<#{$OnZwhY!7vE293lb zRfF>2M))=u5NUeGT+q? zzmgZ>zZ6~cj7IEX$z%~$@S{Ww)5ngK{s{`FYk>Ob#dHV@oZ}{%!vIzfp7{5`=Ey8uzZ-n)?sj+|5gzIeF8qzFd39Fvj z=h-iz(z?FqyV{F;S@({G9Pnpz$~A>V8^1c0_`S>RhsC$0sZDQ2kMsLsx=$zQK%uKe z^YLQDYcX_q{rNOVf@>iDbb@jPqcAP(Lq!$2)w~ZEpcn*!NB@bOTRY;0Lr;&z!f@DkpQ}dxdIjTp-Oyg_3~}4+SP#O`fRI zy-O@3)WB`^roe-)xK;rZcmSQjhCtLa+3zfCY7g6 zQ*z6vaPj1KdzMs)U@WQNx^TiZz;H5wtMv)AoqF;G=MJ{pr|yQBZMdw^?tNDJu$4Bo zoyeVgGm3WDLsmeq+-UHA|v({)NkH~j!-f3DCKBdmdcw=hv zZ0SMUcDi%))2F?dPQ5LcMY{C8xI15vwT`r$GwdR+u@4fdsR+{p0^sO9o4!s>5DkTws z8H{kVc%UJ#g>94>4YZ}>_2BffmQj-|QHGaK=IyA8WE6i8GP!(|XXtvzKa{yOd%@M& zpqB95Y|g6tOp?hkhmlUtJ7v!bKOhAajr1QP6UN&EogUabPQu6lEnfs^`jz$#e*N;p z%_qZ3`clnkre@+WE@C}3QVUmQoBO%eEuN|n=Bp}6UA%zRHT}tU6(m>x8r8iJr~_hX zfY~9%ratQ$cr+`N#clk#v+2$!6QP=W2M!vEub;jDkQ(_*x40n21U;3JY0?@8@n5ZT z)?uwMs!PwwxIK5cCG$Z18JLvDhi9)q0> zBSGFqpACNA(MNl^lcBA8vFy3eIvS!+X0tAQ;_<(!{E{cgQRNUXGf$64t)T-MdY?h< zgwYlxwdT?hx4UAs+FM*SZa2dYi(8Us`h4*tOc^xn^ftm572S;S&!45lE*#=)EcT9j z)G||2C)&=u5ArN0z9*UfYpZcwKP@4os@Zx+6MI@6jCud=@SsN)673uiHn{c4`0?xG z9V>--Xpc6Q+62QCHI#M@^g1ZMt8!UGB%|F%7Fh$sC1HE92Vi$|?Im7mO7v$W!Fy#*#e8-&c* zTgq~ADTZ-H)qUq-|0K{!TI|%bsNGXz?8^`;QJ<3#%rD8{bf5kUZSl;L%!LC-%J1oE zGCMlU#yhK>Mu<5Kz4-cC+$of*D;r6rgD6dCAoi)o8#GOcl4Q}PaA7bQwuzT<|M`k< zbv@djG!{RGlUnX*kpoMrg4jlRpuaFgxECTz<1yxURVzb_(Q19hIj7?(wmJl3@WrA) z(KJf&|rq* zkZs||ee)SVU&Pj{@QXm(YnfBeRp#xtj~^lG9W|@fHTBb8@gDvW&~98d(2)C*FpGpz zYYZY;?fCET?00Q|KAH<>C3Ts-_+P$PipSsY9gA_GJS{ECp*g1uSvDD{|ULXW3LIWibH2hY0=)I^CebSyns5ws1Gc~b{5 z(Z9kqcnwgU*!`%%a8;od|EyX5;o*@H`uKHGyPy5}{d}>W2coXc+apCi&klU~@6tUg zf;jNN=kCsAuv*L}i)GjdZ@Kc?PZjmUtejk-sr!$mkdtS$mF#fc$8&?wy4Zs?$idS~ z5$u?v2-50xyA|T7A1jLVZA=QpSELIL=3ugD$g5)sNbE~s;1Fp^OzosDL^v1hBf)9O z$ejj#hDysG>|{eWCT&nf%~4w|E-wc>IQ+iggjiDg)93StoUAd>^igzUo2oQUy?w{B zZgdNZ4gS2`TPJ!3X0WytjQ?@j;Qe;Rmb=y}%t*{+S2uGF`lki;SspaLh(U*|A{Ta+ zoeHgwSnBB%oD~xO2$T0O-^tO*m|CnG;l4-^SRzvXhHGw{p9%Iv9WRDf_X!$QatUzJ zn}{K!NqPO!0GA$)g|7KIV@sx%y5#QAEaSG|%WM`d4NDQ!{^g@Dkt5CCmXvr=buWjA z;)h^`SDD1JBW=}O2DCqmR`oO08(Rgtc#FT#Ztx9sSe;8>a9H4_giL3Ok?iH1$M#tv zjD@3Cd67cz@V(IAM*El#3nkw34*GX8b~GXy%l*th4p4qVzE0jYsQCHHTE@mo$2Qz0 zN_qn`!I^RE-HTFa5rq=TZhiXaWQ{3IR=xzCk|`^nvd5Y5L?w|F|uk?t6}?G52q zl2=xtO)00sBxr9vS3whGP40%-XcxJYF<%e2Mg#F=&CR!haxqV3Mt(Y!Ull^vi@Qj> z?7LWZ@>!9DB^4`sL8z!}fP^y1sc0wN*=x^(DT_9F>^Zr8X2Kz95ewb&n+o-lYhcFm zHY%g6sRO1?&2<{Z83CTp<%IpX z1`v9#vJjgCJX9Xq0?u+^+-*fAZ%_$0lZZ>>xbb_;|0*}MQHn{JOub2OShS#SGxJT< zK+;CY=Dl7uj`-du(Ij4rUJ3dkBh9-%I)3IAt;$taYbFJGXWzBWRh(Yv*~F8erlXRd zq4)p1o8@6{_APyB`SvL7>!mCeu3Hgo%J{4^m#6|$g!ZFxNG3~g;>A=Vr3y|RES#1# z@mN)9FGqV`hL(PDL4Fc9Vn}y9S>tGcm-`f+L5)*LAZx!XI0RY$(4R8TyQ#O0ePrQs z4XE4KK3^pn>Ih`O&{9fdqiE6J@4OkH3E+B0x#9nk<3%nn8*+~BM+B%9lwj|i=QSUl zId^SxBoCxs z_!kF6&i|=fun-;q=fd2VdW|5TU{`JQp2hdKEJTT`!<2>Ot&0pGb0*(7zq>;8J%i-OHB8NW05_ zbNqR0CVJG9Z1!&BDwc}$MPo}YFh5;cG zq3?yJRzMBgZ1m3`Qo-3SJC1o9cmCQd+8Xf`*}h(e+S9T>cw#X zEmfulyh|SoYUVwi5JHG86_N5LbTJ*Slj8U0#hSME!%IO`V?0Iu_wmaX(jSMmX!2a$ zjzlIz0$qMq3QiQ;R27V3_L=-t62H$W$Skti-Mifh-SYk;yuQ}$&WxR0y{k>uy-GLN z{;X1wq!G4X+~U|HvvN!Ck<0FPCAP7!xUf+O!ews2Do)V7EJP0}lz`aQ4H2I=ji_{f zyZiLjOU@JpX%gEZ-_r{+cnXG9`cg4$D4)iiIBdd-V$QO${#!pP%can9=}U_ETg-WG zV%pRP_oL$+v@{A-8Y9tI7_{ES+-zX;4uU>aO6d!tn5QSh?;6m&J10A2u8DI}%WhJ2 zBz_I}7nk+5pJuO2Pu0HLS0hW^f*#RX1fpV6FE20!!&f?pSPZoqN)RkF$Psl>+s_{& zTekM7ZT1>~6E={=4ewHq#4GbEFnsenNMGBviZNacD@RQCHOXm&?-(=N)i?mgfFkFj)R)ZRd+=p)_g6pJd>R(FGaQ5B+kSa(}!Zxf?oCyQ_uL*+xA> zyA)i|`E*@z$9F!A|5LFLD)&?ne#gMTJv6gTux>vl2mONGiuIsN?m!J%O)eG$db7u=WKf0F$~|mOxQ8!;^p?W{ z3JG|*_LTLL<>IBb%K7Hk zp!9g7!64s4K+(9=)UdnAjVW}8WovjZ9-#UzQ!==Pm&`Akc`_yp;H-wzBnB}*2!ee* zq@A9jJONrZn_%I~`68mT(M(M8)9oFj=t*3HajFu&m)o)!zHu2;u^Ui`CxdEA|+yj$jS0{(dMPk#p|gPhSP54mVNrdmFy%P z>3O8NKqOlTlpQQ4+MdBv1DHw@NI#SnRh)UDOkxL_zpgY=8!ny35O+1b#nUE>+V7-{cJ;5T!BwO$H1K0Z++@Lr5 zc3OWVgxO38uR5~~N<~damX5b)r@iICt6L#oRPT*`m3Sx@yRpXhb8-iUu8~@A=7cw{^CpDwpKRmTjDH zjol;PmtvpID}nAv99FgccORZDgUf>FHQ@4IN6WE5uO879>G~nB_ULxJnaf}*&OTG& ztJF2?3*)7!JQ4Pp0+1<_L_gQ|2`;@bLf}jZ`rmpWotz;Y=z+?4M=Qp5hH7*gg;noG zKU@8g%)n*i*kEWJ*e)%IO6@8+nDQ5B10`)HG>=T zEZ`0`{T~-*XhInI&Z16gdEZN94?GY%QEo<-U~fkl&_0Fo&A_d49fy6xQDnPRf%ZJ= zWWs>X?nttbJ80#N_pYa1uT52ABkQfGf4_M)hN>l#dLLRk_FJfT|bJ=$#Sa9EcK;~V`Dklvevtw_69``TyELVjt~)%|uk#l8B1Td&v- zennQON%mtW*Erg5xg$1F(&#l`#G!Bm2YKuu*^1>K!AgmGt^2{bS|_pk?a6!@N^bKc zU%Lxd%k5>y!pfb4>}qPhl&q9Az#hRLQ4g|mB`4yr-rO@VH!Fcjczsk)`C($qQ6_({M&`8~(= z5L|+|xi9fbvffqXph;6tKmiphK^|6c;KEp+_DMby)lpYRJq}c9LsuV7wWz- zW}xw?B3l);8sIv+d-u^Df#SErtAy)N?K9+QnIwhcnn_*O;_fO-LSRN!s%8XCDJaIU zhr<-Wq$0ADrZqobRbmxF&4pv~+*x?dkeZ${}@xRo|7| z{rWXYxane)v(4Sl18ieGp`RYsAKZa@`5eftsZGNh8)8^o0vKL;^hZ!jvSq3Pp#p-s zR8FWM^F|MoXAfP(#Yf0JDu!;^WZf-%o)&a5Ni<@G&>r?#y9w=4hPuDkk*O{TH5_1P zPb%qF3=!Nwzhh9CJ4?HXMb^fhx|1(~{!4_tT5Qch>IavWFUhuwsmDEvZp%Yj2sjPX2b5Dsi@r#Bl|1-LX9atAH#DPU4L=@~Dk*nHvK`7}! zZGiZIqQcZ^){(;GyV7H-8tdj*qSp1Z+68U1hbj81>m;H*10gSvby$)jxYk zca+G^i`E5AQHRi46URnc@eIGDzDOyrwEdRRrBlOu{~T!& zAAh6NfX2;y_KZ2RfZO4ot4rCpxsF6p0iBGDr%Pr|7Ykv$V{2bxV`{tB-&)mAyaIPM zN4pr%YN-6FlVNLQRwcPh@TY(@mKR$)U%wS?)#vaGwA;Q(%%y#PpMw{HfV=Eaf7U9LuiB~n;ZBc&b4uA&NqeT50})YzyDUi%UCAbw3?g` zlh{~nUa)6Mexd2hQ(h7G;A^Szf-Il6K|X1Tp{=O z6L6`KQ51$wtzgVR_zz9m=~$8mq}MMZ4TwuYf-p;DRdwmJ8{G(H0+(;yNDj&kWY}t_ z;;WrYc?D@o^QYCZdAvmd4oEMF^sak{=Rc-lTGX3%sv0Y8t87oZa=)MZgxDj-z>`>A zC^IBi*rXyadZEN*_hL1-JRt3Ge0-u$PW#|j>RqEOzX=CdT8LA=YER+_S2!1u+eTX~ zS`CrjUr=|#TzlZ;6kHLoq>eU2+6?$z483w_N;V7SeUPh{B{+JZpuXIc`sFz1ak=B1 z)2$namj+a59^rxe_laGtuc{RJRb1Tb2S}Za$XFk-CRH%t5v9g|hOFmtFic=p)&{yT z3=2@CPd9cm9-W7? zw3mH5Yn_$5e`nSW)nxb$%M-AqfzG~k!-OFzqoD2;zjL?T8> zF1|Km(+t+hdDH}JSGKloa`d%eUe};DCk(#D)9<=uC#r5^yKRV)K))zLx53Ag5Qn4i z(%3Nm0N!3jp_wo)vNf_D!xhEa3Jw2z{S?)^Q{vCmxW&ewalVSptcF|D=~V{Vonfet z;H3%G+L=cOX2=KFRdcuaZOzYFQWurC=%#ic2+fp)EU7OQ*tr7wd$S^R)Cxj<&JmMT z*%6kF!f{{a(3%@pGa%Th12jpBO*lG?Qri=vVw+10P~<-E#v1{${%Eev#i zeq(%B%B1giR*kp<$q6>I8tw;Mn1D{uy~^ua{|LHcsRlWi1)qiRBRXndhd@HBHCR|6 zq*iNYkFuthX^BQPA|;!>aBIS>2?8e)6&3m}A3=MR6!|gkXu(elA-agtUia|Dg4oi| zhPXqYHCj+>Y1o&0x3KjWIw`db!WTQa|`GPs@_!C|=0mtYTy5rM)$@_u>U!?uw_?-n-QKv@H!Kg`rG9XlNc+@+YxqBUfD1Qrk zAN0g7R=djJ(~eGk#b^#0#n|{&@rJMdy(i~^>0T{Y_6Dx{PST+p!3+1!+uJxO!lti* z`qflqI*J;hSO#LnFPkeyi z?K$yr^LdM&@|a&ExU4>g#v6H>t1%o{Ayv8C{G5JBR1(mLdwgFP{;um4G+hJgPjA;~ zYcVMh)Ms>w^n$qt(Yq28>9oG^k<4@TQ@RtT0(wM@Q&f4DE)(&_uZm9}D|$6_N=PfS z({UJup1pwQdSuJn9v$7elW{BIRmN3%Ym-Swx`jhn$3%)7M#~j`guc@?v|`=&xP9uS zWXj@WtHlM2#fIvEc7r{+C$GOg=$6oJ$jIQN<7H&5ZSGos-}*|51EB|95S=j5uU^Bzc2 z=ij(mK}C3Cf^7GuhsgJGWn*vuMgj8*!(Ppat9RaJyNw+`&W}okJ4*QA{NKcUEVK5g zj-bJpYZ1q1efVe>2!`6bPXwb2dh+H}5S<0cK9j7Wflh`N`ZEs7BV(hkOR0*(hs1@d zeu-byo{A@it8@K;*nkHg^Y5bHT^e9i(Y_xsPZ4Ja3hTcWnO4lzHb)h3XO|2n4_DoV zDA-*ed?yyd{1BE}I`CbBO1mf({)pQd8BJAmc7tl=Re_RL8f}z|$ZpP&Kz)QqggZ zB$X?N zhOV#02ZcpcFBRHlYjwqHcR1bqqzY8Uf4#+tdLBIIuz9k#YXkm*aC5UXRqgO#gyHf>Df;!PzkuQ!F*%i}9wm%KI5)G>&6p+4YENWLe9 zEvu#DUWaMaAAQfM)6Bo6!mDzH#9CVYIxi3in_g?0=vjx}m|sti%-|C)1{PjE=_6Islx#ZQKv<~UfRlY!!FLSmE0QC+Q#7nQj1sL2 zy?lVtL-I6K&O3!M7p(tE*hr#OaM$UG*f4JJaiPs$y#MTlklP)f^B2qJ=@YxrM?+Q= zdr%t5i6}Yu*SrvFpEc4XyUk!HsITiSDdh4yRN%{yWxI!NM~TK6O&Dqicr5CB(G_m#|5NWS`RJuTQmDOG&G!|eJ&-%n$ z&Lh#zsMsL-QKo6l`=uuciJ6Z@+C0S_#V20nwjZ68;f~PJER@-h-JbN?^zgYMD&Xp? zB1_mr7ee*GYCSftBuWe2SNNNGE~*C0u1#B8wQJh&J)57^(@Y@yy_+RmR(`mThX*1} z*tWL7u|EC*QG_L>x|Bg%4d|&!$I0Kkl)TD$!NWSgG8F8DAD;+m{;EZ<_LO(FD!e9N-LSh#BREOQ-x4q33C;Tn@3<>l9qD1rDL$vmWP2`uaK8*yRFh6a0&Ujq&t zi`m_42yg;fq(NnX8z2K$nwx#>yhw-s`?U;?x}G)?)$mY(UyCcQHlIa{LQ6Isw_f+Z zv$IKbkUt08etu6?qHa(s;S>tI``G2Kx< z|KH(=h0!mf3;H09?#Z|7+TJRBbG{H;86g^toJ{qofb8W|P-=`jul0ap&P8$7*CE); zPCpkWfi4<^QpiK##O>N;0S4E=HGGEoEhYGl$H|8OCmThK`{gnxjK?Y_bKjo?Y} z94#;1W*0(l75Y3S$qL2gVs-IQG8i{$Y{>Z!c)-%2F02Q_9QiYS-PQ3q`Sul|^KSPw z&<-BJrN&H+CSL=7l~{&z@$)9M6;k~1^j)*QPT|kZvmv+q`BSr2GTl!CO#rtli)O0u zHn{uB_X<2$yJVBj$gZnodBIES-%=bmI|YV(HxFn`e4&H^VZE!r2HBt@LvmV`!*V$sqFXd^lcMzm*Z*EkBBl5*Atim7d!FG-#qpj^hz7_5KCs#k)=E0xB+=?A-p-!_x>T=VSmH7ETFG1C@Am( z9M3`~8t@dt^tljCqNQr@Ga+lHAC6wA>3NVRoc3`2rx6C!qiiohyF?y9in-qQZ+Jw#H%h)XrC zrRDA&at456HB5;t`n?#fKZG1qBv#j=uYoVZu?S$KBzK(J=ylj^>ZCWF6og5o2V@z-l?% zD9xQZ-){4v@yGU^ZqLG;1iUX2e0J`eQd*w2{%KRxIx+V9Gk;<7OV(WA;oiuTs-~Qj zwy?Z!dy8h3U%TENLF$_@tg&Uk6=|*T!^SM}rmzeFTDlM29m>oXl;%83Z3uGk+(Cm4 z_qc8@$_g|zvC`l zi4;z#jlkK2`Rus0SDn52_$F%QHQsDjX~9Cy3@`k)?xoNTM=(WF{JJ|Cy(y>z0wA8(j6y0nB|59T&x?t9gJInq!Inv_R-8Mn7dp zqC`Y|HJivyW2gZTXMwUiEv%AENX*F$F%@3HzzefOTW(Z4k1ZBtWll;(mzBmChnU6y z(&9)bzeI=C$ahR9#RfXwO8=wff;Q=xlAqSi?Y`l8PtO1#?iX|-@gZ7J$aqg(Rm}># zN%+e4Vp0S^i2nB`&M)m>{1# z?0*sVrr}Wj;oJBidyJj3j8I6(zMGIOSt9!~l?rJ{2pKaXWDi9VqwLwT%Qp5Up{!ZL z43T{%8h6dobANyT<9{5_@w|ARm%K2y&;6NeIj{3PFD1}1A~?oRy#A;r;S{TSy#C3@ zc8q&VI^gLEyxAM{-NtZ09+ttq{7SlR`EEWKx^hu17s9!={JcvKEua>h#JIkx4k`Tc zz(#G)qNACa`2jZ|Y1rM>aMF@}I+a)XY={gD){wNalJLojNWo zHH(rL`0p*Ru2ZhHRuHoEY$B74y5Zx|5C+4e{h*_b81()9F(uB5kwTVVwg&RwAisC4 zPoz+;^@x#jA!=FMTvD5JOp`fpu#b&Df*N6yfUVgVCt*9*F7`GG0{~ zDpN0pu;K)ww#^_+dt(rhMPRamY0IMM;ikCnknKH)&LY-LfaUL$<<5{B>+%4^0Q+R( zimC`{;WgF8uwyETdoY@jP3Lzp=S0c*QdK`D)bdk1$k0h6g9?+Ap(W=pji^U&=7NfB zu>$B7e^hL>65Q>7J|Du}`S{v7k+rE@ZLT+yv~AEb1d5hdX(7FhhERfyQ%B!~7Bbtj zPG;qQ{9Pn&P*MDQts0_pDX>}r&Q$QC=eW+CbmQ;gCFrpC&^=@D0SXIueXkE%LpVCa zi=u6-CGWS`nChn{Y|Am^b%qa@oLZYqEvxY}#823Ps{fX<;47-a8Do;qTgKAqO^8YL zx{!gD^=BC2YnQmbHo9uCx9~n2QruM-0EsuXDP$N&2ZpG16#?^KX2sq&s+h(R}X}NMU%#Q;3a{E!H-vmm4xT^^%+7tW=g*beLIuAzusFS)%0iB z)ieH3pxx-(x4&Ae*g*sr{VSZk5zNRY4#{rBp``n_rb8CL%{;f|Oo?%fz9aqC2~u%_ zhfU*G7?pR^tSO5iyqEO=iC6nhMb9BO9!9_Rc30GYuk}9m!W&F;)I9{C^~elMXPY3J zM5+naP4n;tM{8JhvwnfF;xC z*){esj-7F3X`jaZW+oRz-Y!hYrF0RUpBsq>b|1uG8!@~26vuFHlN%GN>n zK(Hoh6rX)fNap|s{auSE-%g)t(mc)$UHbXdNagmSoeF1zSj?r`Jn9|cT^dMtbRhfA z;O@{A`u{d3f+X4VoPnpp$EVz8Ya4|M!tX{#>FOveNw61V&PTY9*z=f0HzSY`bPDw` z`0v(I6#;gVIfD428NoxnSpjH@?CU#fJ=H%ORkIL&MTcj&x_kmDDkD><+;9PpzEv3P z+@*@?xlU)JauL(}YmRVBFO>leGv{rY;-e#3bwqu9u*u}XTe zJ&C#jD1v-1XxvQbI)|R|lB2!pFbO4TxspP=p6eSFdDbQ$EzVjhYKWdvzJD%v zFQ4IQ4`$*LR~yWJd_8wFt*KHGKIqgu`?oW5qSVvt z^0}C$2)q38`l@>jmp-wbWP`v8Vf6`A0ljW)(@IK*13pS(Xs|$llpcF4e)BrXmbWMH*z0VS;-1x43DMyd%6gt;5jo*T+wdZodUIGfAI zLsTkmpezjl{Yn1!a;BrewF#;a6W)Axiu67(9xW-MHm8qn)9Ctm=WTdkUyj=tz4FB5m1MGS zbw=__ihf+;Lb>Mt%~qc$)0kB}X1?|sQO5AhR1I1>JkO}<*%(1#PrB#y%V|TlYYN6I zi&yJzjl>qAQnMABqR)RC11rS3oIbtUcm&aN>b6 z-k^+qntCd-JQ)jJ(afpd0yQL>uPc8#-&4KOQQzvAE!;Ca+{gWNb?p~*aT~#c@dNc| z2trsAUEO~LLS-f#%*Xc81tw8ZEIDOf^X}WvpVgF2?w3p>ed4Fb1d??>q3SoJo;*9r zI)@;dPe6Xc_`2ZqFIYj84vCZCJdlSFT=HE|U)GsBiqg9}sqd~4*HO8#6*O-Ic`a$_ z^u0_a?qQ*{w!JUo3RMU}t|gi8{#eC5{mcDiO%ERDh8cj1x&uC*B-)aSB`of~+o`h| zUp}9_X04o4clO17+t+CqPX!smK%_(r&4BU({0Qk!|3So2-V_t+1=6|tTuf>RPGq~y zlj8F23OFX_w>G4UtV)ld_yh7r6CmT1$XY$h~+H3WH5D2EX4+wh)ERv&0nkB&{ zD0AF&NcN0FUyz82%XiCrf-}|B-*;Zli%8C=sMw)7Ny~{8jqosX4Im#yVF`a~MDlkd zjlV)!76aV$Uz^75#ElKDgG%>(&1PcpZHc5mviGtXo--YZf)`Q{gAeS6CH_5rdZ!+U zTN8KasWT5ERG`k_74{xY^^sZd^)E8*0k4q446XGhyGpA~}jP z+}l|DDq-JpRs(lMNeV8LbV)ybz^Q~*FE$M>Jk+=2iweg7gGlrhecx#T+3&LPN=H%VSDTx8h5hiY zrqT~ybExh+_aSdNM(-ivf$8zp6c*TmlmD12T1Y8VqbTHp$amVS(H~4?)c$u|it`{{ zj8ix=HZ({R?#m1hbR)XdVJXf=nTrnJBQ$p1;a~Ad0%3<@ zaWd~>TnN!{uYV9fC?J>K7dS5qvi=j4c02smprSy7Ez;SVSR0Gd3$69f`c}cPzdZZh zGKu-V@~a<4_v1!bPvwA|ci59=Y`x$=$l$!bZ7A$Xqi29J*N(LzRs+OO6MhSSOq#E* z-8*+Zd)8vTK$T!BdM)->F+UNp$DM;~NT&+P_7|v;a6@BZe{K9@@s>E$tz4Uz zp{3hgciRJwUMqzvSgy6a&=Ec(FzBIabC4xrO_~5@JtSu;KZYDj8q6RBKEv@I!IQAU zD2a1mhZd5|r8+fG?O*Y&=PW(n&+p`UWQmi@=W4z`rF=`jELgXWd!PVVt@J9V;Uv^; z6SAFFc&G+d;a!siN^cnY95ITXS_TYqh8%*}c)^B-Fh?Ew_}ve~n;9v;aE&;sP#@m5 z8=6>fj@a(E!l&;+Kf#L?-bV8i^OqY+!!PL`z!tPrl!l+#zJ3{2pR&%#CdNt?AmH=J zWZJVDZPzRUOEq6xifYl%mNPGSzwoe_qnsyjbq9Mb#8Emsc6DXJryC}AaO*2t5}SyY zBB{Fx{;10Iq#D&vU+H{9=G|2YQG)!^tl2BMNHOze+8*;KPZFJ}8UfhaMe6n0P}oT; z;@!g`BJgm?%C)iD{A${^Ed85RZI=(*Wh>Uu=LPRRP&L5YG6wJyMmo-hfnXgE^eM_G z*r#khe9)F{MK=CsR~!3}dFu)hYRMb*7%j-l)R~!Ag7;Ciq+1qcwD(^A7)TGXo)i16 zwm*y)Fcw)$MQPVY@h2N+ypvMD*9{T#?2F?cvM8*R8sE61$9!$IM9Y!go_$29rsdlS+{ce0W1jqnp};SUcG zGg2R%e=Yp-kGy_24f4lxFSY-oUB4I-J&bM9>ccFST_y>Y5vN=}&hF|fR48SYTWO@s z-c`st5&VjL^*#ROUB?^O1s{Cs7L@9;oyPb=Sus6^AWHB7D^7JKRf430rzU+kVSPn> zDbe}a!QQ%V2&j+_79W)6;u|KIM7OX(m;`;c>%!NgLlG9*f#hk(Uw`sDTDv}!A_pW~B9a>d2{^v+ zp@9cW9zW@)h&8dn)%T8xJuTy}Z?u^F7|fefwHWSL<&RCX8KrZy69esN*`Q%bAjQ02 zs6ZS*JW@Xp{D$xib8;Jx3++04(Zgjt@qDY=(6=-j_JyFsIjTT1x$z?aG7Ond5hr3a zCK9_OXn7s5P$J#a4VI_|JQ_cAoZUBEwnqxh{{5>X*=(9Bm7nFUr{ckVeXb9!OWH^d zrf^|;EJA!cTnPt!+inBnu>^75!$F>8&#?Z}Hz_CmD_-fp!hBu7RjTQdaXF9&5_Abx ziQvMl)_`>pS`HORT#30qH{`@L#BYVf!+AmBZRZE#aq`(z*VpQM#xDI~r>@_15^)w; z82cFfmb6Ldz!Do)p@T@S!th&__i)&dA3MGDzA$*s*@-QnW_j{Ej5V{5 zHFpBUO(O%O{~NhQ=#zYp4JFb7#lzoDU1?1i&b!L${84Jz0;G%LR27C#|8%GXfu*Ov0N3Vg2@sxH>3)a5p1y$HV3vz`A3 zm%chs{;#(u_UXDAZy@&ve(8yV7G2{7!fz;LS- z7%n;^_0}wku?^mjlK?7vXJ_ZSr0a>cv4OwAcEjvOt1;0s>TJgyspTXOGfoSKjrqn{ zFJ}sPwR$d15ezdZr-8-Qmel2)Zj1!0mbeO^<| z%PJz(6%G_p?wh_xgA$EK;W$(TX&W>D7|!LN-G}AFf#+i0Ax)@_g*P97?9ZHDp}(oksT9ojpO3R&hxsOq zeA%!>j}cx&DQ@m^pZ#?1fKkb^2oYtx4^&kev&qldfT;4?lus%4{^wG@o$ zD_3vXeb)@pdsgeXn!mSnp;!3V2;bYKZ*~v^HNw!|#3~{M$6zQ3to7pfsMl8W6}*6w zV!+L|;U{+}XTHfE@?;x#k^h6YyT`+&Iv&x(5NXMnP_! zh9^?bk^%o>vI+knACO(ruIAxblD1~-j}@8t>#AqumYo>%tR%9???`367?8Aq z+2gGZcPD3%V7sv-x2fxg9VYm6;raP{bO|7)((hEu%!S6CIG@)_cCSX=ak(6t-mIe0 za2vD)>eAOP1%Tg_WS$Tz8ilw-5=_y}ejUYdPGQvVxusY|!GqyXW)U0BBI|^XFgklD zVL}+?PK!Yc0JVc&JPn|eSLh0~Cp7Y%>%6E!q*U;Sp@4`uPeu!A{j%3BO_5?rH}y#e zmmq^G7MH{;9z4(%U}`r|yTo`~qTjABNZ^(1LYnhWIfy=w{QLw{U2%7CE6Ic@B@M){ z7^(BawKBA>P$6(;c5@;nOg=sm!|cyjeql7T*+JsH33t8`>zAb8^_r9<(gYxP7rHA* zssi)CpSIpTYG0gGm3Woih}RYUzC#p4Uf5k`tKDBL*!ZQ-#d5 z9a|xo%3GH#qdEGcA9dLU=NhI)5ER|!Y^U}D0&5RSTom`J34V85GoCqKHa*e6cL{Pb z)4!Tl5TJh(IuNCU;u5v38J9^P2~FMeX_33S^QZy6zVPSf=dQ5*r#F-FMjB+BCqDQ+ zXb)Yci}vE(WT2sn`{m94_yxZj4KAg{VML==Y<4L0?1y08j-jaSbDHP_RosDgipPiA z?76Oq+qnL}N)t7+U`&2;^Fz=;P2|F2^&%K%w<$Gw%ZqjZLm6OBXWJntz$K`crUR*_ zq+bhsf6b%S4Xi=82E7esXsu^2`VTAorty#nUkZF=tL-0TPlwn}=d3RB1sje^kdDrQ zLQYC+wh00qxd>MOt3y(1NcE?#pND=7-*Er&XI<|W?A7tH+A_jgV>)v*_(ICBjw;vh zBcnpHnU>?0<(0Xm)~6@QzshMXmNf-ebZZc@k-x*Hx69sqNRod}6M7y?w5?6Teh5s#)pwl%0wNVG*X6yd+%}?=9hf>m*Jm3RR5Pcf_;u&!zHWlY zNg8>cdRH6Tr$agCokSzuUEKU?WW3}4dmH@SklS;+vQ(aleq1Ab023E>8rEM8H6bB( zJIUr&mAGfDygiKE+0gTK1!sp8Brld53t0^J{2d}Y0iG0n0M;h~pZN#zwpPGH6VbA6 zmgo7Ssy(hMzIr3*`YhXbFWr=9H_2E9my!suDh3ZAPK6qBP?rHhCRu2Z;ElZA?S@UOUO3 zY(5<~(N_x#Pww5qeU7w>#HtS7{hc{^>C14`Jl9>os|KOvEMK#_GM`6aM|kg= zB6wjTraWgME%P5lc|dv@?rRH3ChTbt8OmOxPU;7i8exxCIF?e)KZAqzMb3wqgBkQ5 zdh}7|)=GnNU%UUSJ1rkuh=X}zbrx|FnnD?jRb|hOPVo5uS|Nky5nJ97fxq5T50tEm z^A9xdJdH9pecd?vIV>zOc0f8(`31aohqc#M&>h&so5yR6y8K95ah*g4zO%fd2yV5qsumF=dOL(m4cf#=f@VV%^&a3y%J`L`C|1N=ch;CTYgs;XCdF<<+B1Jo@m4R1 z*vz`btzTOv(09zbV>2W+$%mnZK#-g2-{1V+vE@pO`0L<;>MLgj{Yztgdy2Wk9w+IX zR1M!U*H40oD(+pelnfauhNP9I2ud5URzsMdt-hg5wk+*fJd9E?5&;!y3{>gZD~1P? zS?`FJ>PqegUy>=MHV33l-LbHmbTu?M1t(_b#m7jkdJEZRacE^+s;*5a>Hx*ZY}Gs8G-@QBV>UQRgvX7?%$o^yBmq2SsEV zZ?3gm2)0@*9Mb|Xh=5q3!I(Uy^!Y>cx4-vHNJH>A$cZ~fM@63twTuQ;ETe-{K$)X` z`0T}@ZDiX|ZLvU(;n5mt z#{ycnb4BO(+g9;%NKiq(JmS=F=^B%4n0j&0TnU6v<%Zm#@QxX`&ekQ#V|9q+S)0e? z<0Ac-$VY{1b0t!5vZgm@qlHh2O@jWjFWv-=oxpmeKZe+tubJHDG1#&dtE2gj1iXS@ zfjIOWjGZ|p^}Lx&{*-|=L@B&@F@31|Tx9To9QYS_VBWJY2Oo!kk5Tz+h|Zo*OkSJV zJD`YcAq1Nc3!8+MxTYv?`SEO^#+~{FWv%O)@lMkqY6*IEzfayGqn`}=2^VmMV|EX~a#|Aj9}a+*#>^^i26a}ePhB;9$ei@( zwNmo?a5uPoDWBKkI76CdqJ-&lA!7l2J)1-z2u1=*vfb8q*ZQVBas5$DHl%_Q|5Cu& zZnlHBVvB637@7DkUPpcB^;?%s>nUm2tBY5=z85}cAWP(xow~jJ4{|}eqHH@iXF>Yc zyFcErC$)KK6|lfIe@ zvbos(3xAGX1P<)sh%Shy2zJyp!j3%BPzH##1R^3d^T7!aFM#QBORFdZyfy^6)SYua z^xLUl=^puU@awPP)Bq&Po`F4mHkw6eJ?YQl+5uHiYjtyXEsRJPk940=&Ld=JC^g}Y z-fd}}F7p3S_~a$BsKqSPQbySBg{i%T3{fCs;|rJ)Bj_ymbkDB&wP6O3i2gSH!@inu zo6+vJr^}K=LT~xxSh?%7LnKFK#o?*uW-%o~ngG+D;=BP{@z7ZA4?|Yeed5I48Dc`; zvOe){g+v1$Bj{>g?lXL7UikyYPDkP%4c z?^WAQ7I#xD#bUp{Aezr&k^Wq9@7))N#4cWbVy``YUQ!&w6m3ws0SCD$MCZV#;J8vd zJg`T|YAxRY6|CO8%uf=kz`HL(q2cu|Gx=FI(`Qr6gI*dt#tRN=HUxN3z;1=0HvvlS zyYyxn7vr0zLvY;N!jBpXncrsX`H749q{Q1r92IhB*%8UuDlfN*NN>L|jjV{~Nh^+F z@&RrAf@;~4BlX8aZH+%5(}(2%y@=q&_+g{Fq=^%QFrEnl0tQ5d;CL6dgNMXyG4Hm? zmO=L}nc5BNc(=zr6BB??e!=T}_6|u_(FH;B|L^r(J?whMY-W%txj++ z`R5LO<89X1{a~yjp!ci&P3mVhy9W;#j+7)j8!{*CT3T>*Z;;vSYp&iwKR@-cl4@|5 zvuCYD-Zi@1OcP+skmRT<7vXrX4Pp4=JgkW$I$~m4K&Rco9MWSL?>$w(+jUmB&nY{| zckbbi+Riz%6hHqmnY*jYf#wo+HnHeOsF~nZC{-Y*2hk{-r0=l0cY1Ni@Fb9oALD8o zP3H=4>C1U6QJQ^qjL&mO?5B_~s#_R!znhN%w6cxzFZcVfBeW;*wNg5!O5`L@pvE2bZhZbGpO$Pel1Vme@+8ae>ZoA;$Zfr)6;f(<(;g5%eZB49Ze z-MxUcp&mmn9EpKA*4Zl@Z?)NyQtbZK>RFE;2WnCCxi8ZUYcvw&9ObXEpYIoZ_GtmK zqoKQI7QExP*d6v28XCV;zisqgPdv-r?s4&C6_QRONSBxif5K)|iT|(`|-@i9rO+JhJ-Qj$`)IxvIQL%?PTGEaM`|Ka&mQfWb zhv{G@wN1D+Of`ZWw{T?#V&+ekZ{8CW&whUPY&*R7^Mu zI^8*QYw4Y+f$KZXNy|1ZN15G+5aa$7+E?PJa~lcTr_8;AusIeE%k``}yfU=R!4Zk^%is;qVeEH6GE7PsVkFifC;&3m2L?Qyw`PCfl-}qlj zE?mI%RN5|tfjtxqea_EIiFmL(U*qbg+;6)J?2cC=BLdF4yB0PvOYEQ0>~=c4bRbnT z`Tde1?Pf+&tziZ-GWf*QqPw+0h2n{H5uc|8CG4j>D9+=!hJO&MAovJFxD+>^=&~<# z8LB?$gH?T}lSoTX)0Yn|#Pap<9lYF?xU;Bo`OLhknEdz>Q=0c67=3JOaojjn?~!H^m{cv6lpKzL@!; zwY$}%K01H;Q{1U4eYL=dl7Z3{z+u1(mgV{7^`zO&$oqAkMrWJ4PcrnWb`1=R<*v}c zRO9_0V*?!KEuU$g*0&B_3lsr%`n9|+fT!crUynAK# zmKr}`jNpK!=&@`s0+2o4T_L~{A15SsID%93q1cecJT7IEud*{uIR52SUD4!^XPTN) zh;NVEpJ$YkGF2J=1Gk-_Y}=T+^uZO;a8_GVM}Zb#RdOXaC%#FU4PjCh{+RCWoc;CS z=#R9k(Bc#6&uSv6GPl0jUt*opElu{RVnfgQAs->v`As+a>mzJ*Ro78f2Id;d^Y5&( zv=dMKU_7OnIuQKEr$*IQ;x`GpyG03W**jZ`9$Fx`6Vk;1Xg`K`_W(VIA3@2H9~o+O zB%4^}C9GWh&h_U0SkQ^#E2Gg2(z$!+3z(kUhP+gH5-usE=>Opc%p*C>1JC*T!3e^% z-Ip2MJ?4!2djB_PrRzRYu|~Ebiu}uxuf4nf#tWG{Hrm6r48S2+1zG`G6g^LXgUiPg zH@!lL&(dtb!*>l-qg*?T_##BT8?8cpa$V6g(Cnn*5C0WE?UXa< z(AP_bDE?Wf>kRQA@yPNh%G<*4Yw?rP=2wh-*Uja+pBtZskPGL%-|#PFICES-+*jBL z1kNk(MG?~H5qQ?P5L|+j+oQK6Tf$sY6$*+MoW9Z9Bo~w2PVzp0Zni<*7 zbyN<5b%`x*Aq;Qcmj~jNa9R~wDjZan=lce_UnXhhm9)g`TL+1(kkuP`-hQxr=G&jF z=@70L|9zU__|b#1G+c?~_mmhTaxEW#r67G*8iYae#DOpsF?B$dB?JnX-?^o)P-Yxj zZNKJ{D>+f-YT~mY`cbeyT4Yq-amhk#*12Z_kT2^~PVo{V?X4q-+2P*qz|B&_1_I(F zNOozR)Ywz~ddu(W#&{@~%>D93FV#;rEDu5R+r;_;(WqOT^VP^79WiQRVO)H#y&Yy8 zCnUulswHiyce;&-MI{SB)dHVK3xB@PkUiuO%@P%dI*Z2wN_inC9@FS8b^T!t#{Od%2jG}NRKti9cPclde zso!1*%`Wq6g=d8_T1()+>^(3uuSP!x^9ydZA$x@bRF9v=4k^cIITu`L?QCoLFr_pY z8yw+Z?7`Vu)SB-2G|peDVq+k&8U35!Wu+9=f=BE{6j5)`+EKE1V9997@O#KGeCTo) zmb(eelA5ni_vaSd^(Ohn2b1&6pW@R0w^K1jjm0&ctgZ$-R)0l5;c#H>l|{ zTyMg%ZW&5-Hhr8{@6`Rf(SOjAOPD{K$@1Im#uaQ((jTU)VE!UeS*EGTn@AY-0vs?7 zMf%fu7nPrl;YA0C)-!){hI!7_IFCw>QE*xkUJt|em=kPC$8-+xZnm$&1`q;(CCC*( ztA7MAVh@47Ys7@uz|d;7-ZKP#hH{VI67QS5e1g`ym6_c)R++YAOj3Ak175lv7`Jqo zynxUhP4Gkg*8GIczCGvw`t)>lQ#v#}vzN=^Dy~IlAOflHFJu2!fA994n{Ooiefcxq zpamGZH>Gk}xD6t|v?uD5OKy*=U4onmdNY|ZvM9J$#a?zFT2XZF* z&QmcV$FQYOc{&FiK;?hCLN0{n3)^*0h9Xr%KQ?qWzJ6OU>!{AyT9x&T?tcfUeWz*obYTK|lX|Q#r3Q{~kbDG3iXab@!zWC(hu*9kh>|gam zfqn9>lf;j@6XX0~!jmdUAXB zzG+MHKUFFYF;DSWAljCx z;yR8ys%CySw&+vHYv83WMY+;CnxJ4JckvYNneFF{2A>kMGJCaTN>xVz8j>(y*cY`7z~=9B}282&--e2G!}lJ@k0{{cJ|OEeYN zTEq6u(`D(ybd_eHZZ&6g33wEdt<2+FW46XA&3wpqs#iPBSaXkWGYDWBkPw=t@)AVH zq}`@!k5Zj&U-X6Ei3}hg3r%uY$OJG+LyZ|~ofWHNs?ERQ?Ja%R{59xuooE_$LRPIP zt;p4BKyz{C%CRj~)h%i<0tPCfLE5F*gRb+$On{SINHohK3HKf+Nn%}ErV6utyk56| zc+Tdtllk?}ZH8W*S{G6Rl~;l637$tPV7563Z?Nr84&?H((4P;X__@)0S@oY5jNf&y z)b#$=458E!a033ppy|&9FZB!wS%bq{hdC3MTW*ILVM;wm{DXuiErul}*0c?FCTjaW zy~y`yt}HSfb%DEd8XmMB$pwj43!1AIR~m&rwtj5se|)e6GBrSCKSm1u6;U6%w6p@8 zTN_y5>ar!{;*#$@K#IE(9Qpep>kWhN4gA+XzKx71(BUo0RCRnA&yg|B`)zKkb{FLB zvg)FofGgQ;hyeqB8{l=Z`BN>D;8bY%T&dN&S9iO2y`8*g{)|PH6{BZXJ~&eaGX-J? zg(UbskQ}$;RKg3EeBHXeyDv{mhw_OXUCvLZXpCOiR2x+v!ha-meK$b=ra#gx%DVHq zwDqK|3@)=28;mHK*qp_^htBIQs!=#b8>LUxgg{8nUY6&ZGNRSIivfA4Nq0Cdf9rl%zEFRY_L|}U70vAcN2zW z>N%)2b*A4F;oiiSM1XTIYswYCfviWv>N}Fu2Ip|Mh$~O;pC+%@oNOo%(v~_AJvk~Z z z)s^3iz0P6#oMunY`(BWJG*vTJU)_eMzRtEFm_I1K%^WHTD~|ZnN+{rVUQwR}Ad!<~ zn5IqeWHh%0+)eMsaqWcdd@}hR0OIn@NMMK5q%ThLb^!E;MOZ-u%?t%MU zjrTZ(GMZ|Zi2PLpeL>7ru+W0e^Pog~k7{q`o=;>>w8K z6gTx^g)Met~!PgG|qD6n9!Vh;7@1}YT0(5nmBJR({j|#cJ z#0YG>L5G6tw`5|+I3f&RKprGt(u+HB8?DX+Dh`q$#%9na(fY`wKkok}UIHR%g%xZh z0#8w`0Y{RmE#o~bccWG3U4MCXQAPVjcb8sV&Y9NKf^!^cg3q7i{CW97<*;Q}rxvu^ zXkMZ6>-8c|qqHa{u03ljyx$OBMtGVnxEf|l%fToti1E_hB<&y1w4Y!ivKQR7C2vK; zuPz=)*f5Lsh}44Z%j#1c(W%;s&e23M2nT;2(B(=`phm7L8Y^mf#CNOCd$M24J%u;F z8{QJ}u+r@FHpgKa?-Lu{PJq*_t(Rj|cESrcVNS!{^*I(hbi8ypCt8f^WtPQA%#%P z2_>~$dmV!TvZ59Rei*Gx&qhjt?26P zcWH;F8&Ya2X&?gS7<$rq!_0%_d?5_o^1szJ2w6*ldbjDok&6M|HvJm=y@!V-Mk^Xm>jy;H5YWkxa#5KEU#qn`A0#x($7<;sj9))TnsDUo%YQyMCtm`nyW)dB)*9>}aU8437)b?hS@( zOOvf^<`u#plD}zRK20*cn z%pn3)baEIffVgEP&U=LewlI@?48xW+<`|cec7i&OJCi@Wq!Y-*29zSs&a4h z#Crx6QA@j-W1+)L^DE#~^4q}`+bx6+h@xKUkR>%Ata8^9Y@e(MC{h^+w(NdkZ5bar zh_uoQKLV53Ez`K=5aobxsWYCcYOQZxc9UyQ? z;CCl`P}ho=dUI6t9~53cmtcIH3woVK8BgB6$vmu!rh-uhIxzhKz6(_g#N>fIpE)-oz&!UWrFb_n1BwF=! zoh3s1BDs_YmQ~wGWE{&{X!S(tqTkpfD?Ug|^R?6GV+0too{Y%mLUxQ-`ONR#ebiF7 zXTq>_cvL%b0LYu~!70{g&<4%HHX3)b5t_Cq8)InbzjG^NWOYww8F-}G6)$A*!|_&pY1s6M>w4_p2IIV;j!IUgQXBe@KIaOwzJ`FFjh0;prXp#k6pgXBFUV)it|T zFt=M@a@3q*&>jAu%YohX*DFz55pZT3HvfM~Wkn*aUH%|st}rnGPw(PQUM&Mf2R>1E zM=z1m`_o0axPSdUw2HNBX{>WsNKPz$dDomH;{uEEb(S-lX*O)E^OtXY`F{A#c^yX; z^d~>3m4Xg0XgDpO3NJx_nOi;=m`p#X%TgH>wpuO3;QP*9`*Zfs=FJndigEEgQFGZh ze>eB6|3SF?Dbn;w1Va;-)|v@utzx^TEWU*nCWYo7#2&S~@9FC0#hE_04upJG zoYs;b4g3Rv8;`Kk|A52i#s{|Il`nz0)fK*Q3Vwi|gI5;$9?pqqT|kDmM`&^hX|y5l?8vV37E);{sWP>B}Od^gZ^lcS|S{ zl7Q|B;nP~N>UOyqs>}VqbjR)wL^Rk#;DZmXqZo?j>9ZIH%xW2oLtm#Bb)^+t5+gh~ z`?!{zzd}7j))YRi_`Nfwen4=E{2@9~CPrE;y6~1b+o~{xQu)-4VM$I}np7y>srLq4 zr+YD~{Cn1t^QRS}>&l{*J1rh}19mFBwrArm@T6jhmVv(=KOb=0eO@|3A~f;kOZo(o zYv$|8s@4|0C#P!C>Fm~&yJdTSC=jYP@LJ}pRrPl6y#@bIPn<`m_eNrN)3b$%xCWG1!MgU& zYq)P@C6*m1>zmvm`E#!33uNrJJiy86qFQb~<3egTkIBNI61bM;{ zAQ30*fP^(grnRu1z-P{xF3Y1q_-`S|U=!bzyC$LwyeG?Uzn!kX+0kOmgCAzyt`r*@ z5N+YTd|P$>ArzcVf6p3#u<7XN+kfPo$_-Fk{9PihYsWA$(J6aAc-j#z5S~5NvwUh= zBg*(1b9|1!;*ZY>SS9T*?JcYCti?Yp=W^6{o&;v;Hgq{iVro$MK6vfCEU+r&!Rx zbji^^qV=g_0O@^rFt}DC*DLW{BBscX$u_|~W~*kAVw4liD^vcVayXJs%&6Y4$eYM`EUn0+p8Wp2l34f6lg5X)$WwhWq4-zLYc@ z`;cv1qj^${4>|xN2H^G_NC$KdVWP4#&_Z%1e3(@qgL}0Rf8*k-^)<@oP0pk(EF&7N zf(4B2=RbCvE*M_TobQIvl%&r+ubp^9LcGKhAK32sQ$Qwhk3kcLF}wwj85H$b=AxR; z#mDHwqwvwbI@=Pze?beLK4CJhfDWQ6c_-85-Hcr{P}1qXusOT$M8j}bg5BTh9B ze}}C{3w~JH{p!y8D&L~Q?I@Suh1^{>L(B2T{k&E!KgPqi5U;^*B#0FZl}4BzgD!S( zBrXhQPLK2}Ujv&KtywUf%i#!{jx?sT9TS^G7Pr#S@y7ixyaq}#w zD{ZQ>9|{VPN_8^DsL5u_Ek1u!0hWsk`AZBIJJ~yCHc`wdQBp%cripcM1yC?7Ax_|1 z`kxt!QpJg7vFdwaUemVm_e1sK+dt-RYwztX5Y#9m%pU`6rd?jGW->vd*26`p@~)V$ zVZlEJ;1_?vmYhYBr3&o~VCqNdUv#UyM8K*s4y>wN5v;y<>8@@Iv&a%-zqh7Rl6MEmC-3J{t`e{Rg?9&gu^*I$#9o^H^R(rk-E|h9&*!U-Ovz zq1NYR{V}KSJ!*SC9k5n+3%6x0{@$QYWnCykFic2r3EXq@Um9eF`@^4L8T8K(dw&9% zbmb&3?k=ot%ap7W@%lByU|wLDq`PZ9;oIe*phHne`8k|5{UTK(j}X>l9oF;LtFsa~ z2MW(J2Pd4qtP36I>eWBba({lYDm8}HBt?P?)p-X@Du+NH2{Qoh!$EdwQST9t(>kEU zAK{-a~ymOzXGmhv{yFhWW za`E~#+S|q268{j_ktAPTd-}4&H#H+(=^0^guN3KEFBrbZL=;}z05xVQ3qyTSagS{; z*-1r1MwA{gKfkoL{5zT^rB$y{b)16?>GH^08vJTr_a^%)A3U&e&$LQLS}&umwp!Sb zyUu;SDN@hsX0R^XMyt!qJE7u&c?UdDqlIX=H0G8qpD&D`Ziwko4-9Ev}=Oxji)7H6JspCr52vOA{EmFBNx6Ci8(_re)dFiDh_tc_B0jzPhxb44&g{K+Yw5 zpr*V?f0fG%U;rS@C?xpBwM8y~wHzeH6vtZKb^9~vTn37JoG=}bvE<~PS{1~bOk=chH5ST^u@KB z*Q|@T=haYG(INny2~aBjJD_GGoTJ^Fa_GcJK*^-FPR&IwveVO39L$C0)#0w2Y|XYe z1-aCoE}rQMc8olc)n*wV527yH`P(X=W`K412;($M%iN=2Loiw%bJ`^vO&aeS`2*Zt zT-{C6PrtA3{;bgSx*XOO4L1gw!DFC8GaWTh#&(>=;*05WBkI&aU1PFfYDFyvOJjAo zh4NCCzWnI!*Jo^w4fe`{_nAMmKRa4k>H!@rAs|^$fhO(Pfr>?ob|;?98tTSj&FH)O@Cu_c@8h!z4GBAoi#uX*zgCmZ z?XXGWlWvK?XqAU=k^YGssge2Idj9xSM#vKn?`lgx3f2nd6RtEa$*Hu0GP$dIm+I0l zJY|`nmY=+KjZGoJIgw3h6?w#(D&&2YnFtzV5xnV z|8XP0CwNNjs#ovqvRWs_#bu5;`gl0%z-lFGh2*qC-~DP%tc9Ti!Ex69&;ERpnM!1c(l=!Mbjmqw zu?GgCydmYfA(; zE@eSLk1Eg(V0-!eb5VrU{dTUKt_Z65{0i5r*?976zNsJeZyXTfP%P(s%`_OX{bs|{ zq#-k1d&HW$qEO(k%_J*s4HKpaj9=Xd!1X;3$Rmo&F_FFyWw(hW3_!t9*u@X$@}k|r zX5RG^En800J$^RX*L=L@AX$0mV+A{FoOaHlYhNxY1`*l_qc_up+yE1*H1~stzR1I4 znVZLcW?h+m{=3R~w)o)d=|Jl$xvbl$rVr2nlPoMJ-hs6260e}yRsqB4(A$CicL^7b ztDEW@o?}BnFP8wc!7-m4Y@dGkrD$`sS(*3yI)d&aP4z1wc2cy_#`&+VuRUQ5V2dMN z&Yv#qzpG4bBZRgGa;MPT3Vzm&0@{>fS3n&8&O+|byN25i9M!79 zJY9$5#Qk{N(W0?sg>Pdmj>LNReTF}6RBQS;1_mZI9W#UcoF9i_fnHfaF%kGwvC7nD ziw|B&Q^!L6E$VAtis&{*Z^wRgzxBj)`oUKQ#)dXXwvEP*!l!PzksT=C#xSx_USEd@ za2dt)EI$|ryf>0`3tm7!{&jcy1ogs?vzphfhV-Bz+#>to#Zh%}WEZ(VOH(t;QupPp z8u%T>lX+7NXS zDM{b=8`^DB?Ppu{?{JgwpAK14**Z$LB;3i5z^gF)%u^@Vy(xXQ3F>h#PNCxV*8>*r zvh`nazNyqE%K2FHOaKlzKee)PNZ(XQ4siMsuq+D}<)PMd6px0nhYL6SmQ$u&uI}uL zSZuqs@4ay@%0Z}_j*`q7vi;*pY|h=uD9DHAu%V6epm8fV*rs)zw>i+$y

Wi2kB9vvK-Kn(WvvTD}Y_TktzT ziFUB2mCX%?crd!Qe|C}H>*4ehGh%#P z)Z}^?jbCD39imTltPAwiL`GgQpBK2XX^&|~)vI*k{%Hf>W*><4w5s}0MBu$DEWQ}& zn5(4Lf^;s+UMInjY(AGkPu7Jg?;tUfY3lJ6Nv3Byl@rqiDf9VCJOVGO*j3Z5E&iql zI_^9aH`sHGru&LoRBjtxz2`3AeS_`2lavCyssKKKoA#t&rdk;=KaeEr%A=vHy_Q&k z(2%8&qskoj$Goc5LtFyOdN`EQ!Me0eNeiO=j z#&?2qu}DRDzp`d8Yfu}XS^-0Y!V#9Y0AnK2{|Inq2bNUeKJLoR5EGHAa@?m$<`th+ z#CvZR363AWyldZI@?Llb-zm(SHV~i&U?!1OW;A}jj$@mD^;e6@8HB^$M43!Hjsx<& zJaziOH?e+k#8@M@=+g&bhf&zR!z5N10J9bVb z2$;Q2iJj)G0;~c{t=WZ8w((&m*eG7N9oI7=4)dY)NH&ij;*NQr`wy19y-$s>wmE`A zsbu;F28nO+^;UfyY58Jx@}^Szj=gC_Mpv+b7&J$AqQpS)AgG)K@{KXb!6lWbbGSGa zJzwRtBIi(#y9CR}V<+=EXW_N;rXJn{w(COAqL*GMVd`v{ow9xzt6Qj_Ep;+u$c)-J zqHpy6Ve;|e^Pdt%dnadmhq$HU7Eh&U_Xhqdi)51qHqoyyl$z0kA3);qR7TOO9`(P_ zJfy!H$kTrEiW}$DYGTBNsvBiLUi~|%)#F;8Iuk?ZV4<3BwCW$e)fk?swt`-H6!_4O zPg!Ra4VOS=L!*6LV{=MFg`Q{ols*)so)CBLp8J~rd}?!|;(2yuEq~SU=*I<`%2>1I zy)<7<&78+_ohEUs0+~$QMzG~uVIypWz8;&$b|9O(g8{&;@IzNN0R+kx<|H&gb8pH| zP*3zTSnEELll6*{>@qntD%X9bTIoZE^3NarR~{7bJE{Qj79A8&0RQ zDA#L-oBsR{On=9DJ6xYVq9ZH>?8B*UTr7^c%@;AN*e$_-SgdDBMbNI+PcSc@B=seJ z*b2gO)6P5Aq;CB!&U$(O!`P{>cTP_JdQ?z&=S;wtgFg=w%@1T+kJk}?UEtAZPexA~ zKH#rAYq($5)b{vcDzCfCJZ?QK=&&clzwTegNqFE!QhTNWk)MHxB3<%-x~KqnQD8J# zvRQJo)D651zi*yI^aj=V^JmQ~lNZ)*B+TzIHZealePkrFII+{cdyU=b*XtDTQw==7 z2Ky-m+R>o7&356FDu|(n=|?LPe@uP><5?>`g33f^3kp()V~?V!AX9Z*s$1 zx%Ih%u45Oyu}?9c%DRWMl@Mxqcy)LQ)%gu2;5t|Vr8*c$kb-k%vqLZ^`L4N{!~CnA z&ztY<9Fa;Ylf3OM&L16YpoSR@$v@VtavM1R@EL69DH74_#qfIyCFnZlgT!41m`R6NI+FyI6j(KlRaGa zE5xica&QyR_pU<0Kh(lqW#z}8a<%CZs1GgqW?k8SV9E zLNEQS@R>!cQ6f}$e{l93ZHHVL71S2DDAV)>*;$}08EV}T&IMxCx4OO4AXIq%N+ z(%CJS5|KygV+i=eRL;#yd6&Xs_gQzXujujlV}%4YFTBnRE{pm8uDSGP^`#PM|F8wQ znv&sOHA8FltPoRnd<8&1F`Xs9K;nbNy;iIgSvleGEp!0v;(<^OqHH{<)vn&{5jyc{ z+;-5@yUe=jWA=TxjSL;9T)K0`V;7Z=0MMd5ILgz*%vl2V7+QN$!|EhwM%hn}!*b70 zSW%&~WR>RknF~<_F;?V4iKBdQRw@~{5*Q=i)fk7`y<9Yp%`x~z)BH2Ng!Qz64h?z^;P8@I8m_IFecU?Dw+o}8?I!9pU* z^rg?oCQLPP^K$ERi6be!6mz1%_b%+@2~>rU@Md3QB@1{HvoRg52=M*?)fENfDgelQmH1U&7y`h znJbXy*!%PNq^bn4OK+(NfaqW!0#ZV3P56<)u{NB%8$(wFKA*)nVFImExPjuX)jlu^ zB@gnZ(a28px2Yy#n^1d~97`H+-4{|1O#2Kq-bo$b9DG81b&-Pm4nL zy6ECDu*db}i7Jqs8BGvQ34^|ns7saI za+u?^Q=f8hE@Z8zHAkCun71VxulrpeXLb@~e|NR_igFP-(h7zt4%O;TgQhw~+^Yrh zic8*=dkYX;HjY>NhTM}jT#}7#_bR%!loqLA#FplAsmDey;9?2Kj)Lx&pw_x{nGR>$ z(!V?jFk1Lw2^ODRAE z6@CVJw+tLB_Ji##HC~f>`^!)JzKFghf2}^8M5Wvr$KJDnl=|Y3KE4QmiqV1PHV_Oo z+SfOrcDr2qYw(7sbkVWp$v1)6e#<8nZ=-2Zr+>T&HenZjWD6e^uqw>+K5MjT{#_fP zTA7Cge7$wnAcq$uIquK|NJN5DcO_!uZ^Sr8Y^+&v$#UV2+{Xz=k93M_XpU}Z=CWzm zvmu^NMFDLX2!;}5w>_x3AdFUu^&|fdF zyOx@@dLn_!K|eA_(AC9Go3*#{ppKv~kuZ(;Ly1Xo3|ttpry4gljjvtFqlC_GXt<`G z8k;vC<}=_(o<6PiXOFzsyLk362LREzVpFjk25gv-ML81u$$l*G=($r+xSdOPtVhT10@h4PZrW%}#_02hl!8`m$y+0AphlVG zoR$imLi~$U-PA1k5B651RT8|>1~mV};_Ki-1FcFb>8OmF)T*~%Z_2F8B>EqNnmVV< zqj2_L0MD`D&W4Una zA||PiX{X-%-nboP{`=ZDUyi=Jg@c@WllDuf>o<$IIp|z-WI!hdhh!@wTX{fm1|+MX z7@)a!iqke0{*)$Ix~}=4aKqtiGRt;12lEH}RkfA)J?+4HpR5AzUHXv@uy$3{fvz1f z-?Q^Xny2`>>FVSw1wpC28)=G4%^BNAp3TKEG`}AgQ=@$!+K%Pq!L+e6@lC0ZgsAo? z-?wS#fuViD_RRhXr2A{2ABrOh4&d(+4wv;d{F4%Y8m|+c6)xLB$?u;6QJszsjNUjJ z?B~o>D2DW^vy}lVCANt`YnN(8&=P{AD<$Q#rf>hsc<8S*b1`2!eiWWHyz1dS7stE{ zk`G$t+=r0lu$g;o8hhZxQjO3Bvd$A)PyD1{cmWEXeqe8_ z&_s=uETFNEt*jj=YRVZh9~+`dTi2~OnfzH%XbDJHHALj%NQS6XETS1K=gcXjomf%^ zUR~mBW~~xxOXAcd+U>@>b)HMVtpk-l1%C-jP7yvYvt8nwwAu>dQPC>c4tW(UJ|`cp z=T{!u)t!~63{{X(4sNKPu;lc!Gg5_@esuXaI=(c%Hm|GWIDd;;AXj+`*cEqZMwMuZ z!+3T7m^>ZGmmJcLQTq|ZWqp|Xq#0i3Jsb3$cX+kjEaEgH*K~%2>$ygSX?s@R{rZnq zFx}n8%9kKvTM=ll%KxpsFaKGQio{_^X}J5<7x=Uw!7Os$h8cZ{Je5E{3HeYjj@|`H zb6mzQCg~1LkkHq1Z2 zZ}pw-p6;}eKF}ZmfL0WxE`#C(=FA5~5>aFoeEs%`g3!WFJKU`M9LS}5JGJY#WAD28 z^haUn>3IRwp@O(sV^*Qwqq}V&`qu|Iko|$B7hF|S4N@7imrB~UMsMfiizl4W%!{8&PLd@L0{h)Zr62LYH146iN@sR&uR%26@ z50Nf^aCG)u@->?N|5hA6DCYktjy_PF{C|qG(g+l1#EN2`>o@i=aqp{L7*YCWq(K09 zj851U?mSK+-NU7io;84a`wh2=%E%`frp~I_9fGL@P~=Ws*rzx)q!*4m^p)|%0Y|F5 z0WTCsQG8$W;%iE}>r4XzUCLFn5^=_N8f3{~XG=hd>~<^|YAD&gq7(g}AmKL6>WHlVCPvTul7$?#mVVe@gB zzv=ApE2^QPHi}RW>lfucSIy%0rpi!km?j$B2%ewDl7V?L%H5XgYmV3sJobs1YxW z(%8td1lcDv%fgN0A_mz@a1QrP>)))5Pbt}TPe&*9M@xoJFh|ZYWu9#AqaVqnUf7h- z0!kr{)+UE^!4pwj&7P#G&a88%_T3c#NMPpe(5$pRpY%DmVcs73AoICz^U}ip&tsJg zC3yZtQBITAXaaBw0%XDjlyF4ocK^lpA(+<}6@ECRQaF zux+TWCEaNg1xM2PkhFgS)>8#iM0M;Yp;&9S-ROd}OQ6b&T8a26at*H3ni0WTDBj+h zF3o(Zn&))fd06cC!UrEZd?|+V+5j`+f2`WRR7hLX>K+18Ox==L6p6psNM?kdrsN(k zQGYfw5O7#n^2b*%t+2_|RWzaem|A3*)0P2i7B2EbXZx<|~f6vn(A=^PT zn4<@Pneh=$gb;M8h`8DIxF9#vnPpea^aYb%-bqs@)I#g>?6O!SyifxtfEmq3kesnI z!!}UWJCKsE$E2J_@P3#N8P_U)nZCDc62EJ*xnRP_cH!4|AE~+cXK^0S^51S54hZ}* z>w!ZGZSfD`%nO@gTJ5{DsGhtNV->~O|GX_-0>`6wuitP8<%MgcNcOQ0H{kiY9%{DX z6X}v*2TGk*GbgL`zJl|6BY4n`mA+xEh3_^g^~bZre7=>=3te{&e+b@2CjAg&yN{ETq>6hI!Jem@h62EEWHy={OoRI-+(`4|n=RPml=A**b z=n3lie;tdbHs6`+`;z17+&tvAM4+t5StKPK!EJC9&b}PmjHrzN@e$Ds@nfH|ssrr96hP@Fz73e7zWnI13lnDu4{7qAq@3 z(Dvwp-y;zn)QcPA{CCAvJKEXq9KGqK6e(})kreb1MHK8wL`1v8TF4T`C87CFwV&m< zO5$ro2mNhdRI@PZT51=){39i;%zvv2#%Bl|Pty5ZW1>mwwhh%E+xcb`Be?M(|B38& zNzdQfgC{9Abv~_5&y|$kEtdR#RWHyhC_K;y_=`yATfzxiPXQ*aO9NDwW<(aGAICkB zn#4yXy1ERtd9o_r&7j5#@%(!5%G&w?2E_fMMbPeK|1E#2IcBVBxEn-_YD(hwZ4z5$ zQc)ux3nAwU_nR7xZGmFzEY)WX<{He~rQ(^BI1hQK!E_vo$3QLg%aUi8e-x7SKbTjt z$Z!wtn7VVbH-VZO?=l>}y}zKUQZak~PR^nh>WtjpZNK*?ECil%2S;L_|6i6~r3OfE z^nQZ9je1;fAd6+C8IiX~0$K7B@i=Zb->}%_Mc1a%lm*cN<4-Z_#UA#TkAIVsL`Y&r z5p}~IjY#|l-tywMMyM=&G z5+!8Gtd&F?L9ij(;RnskKQ}csxA;X*?hpjx98;#9JFDUs zc@a&FuhDP*E#w)!iy*~1x*E*^Es<>S0Eu;1G_-B$Ve&huW8WX<^Vxeg>$R}+jpzeE zTImeFx$SZD?eTlIt}8t-%r};SK?cK{VXF6%&v0>U)?|a& zptnB3A4)hs&(6PX!pFqH%DEk19P|D7Nbgg3^nG2<{I8Kn;kLWLfD3)qfh7guy12|b z7Wr&MHc4pR0RO_)a)}qp#?BjMYujhb75wZ6`*r404xE-&4%=vAz;Zy4jjN!a0yP@zwDx?JR$%*~V5x`yh z0l<;10_duts@twtf85=F6@?}Eoin*?bFS-p?#c!1$*a!Hae|WDXBZ3-Wp_j5n3qSz z%to7aDME+UD~F#kVkYl5aGZ2`ngUrPvqq)-XR9iymXa>{vWu~g5|Oj9aTX1F)|(HC-# zdlk1+ycuI}{V~Pb1B0Vzk$oH^jqT8F)$>*ZEImkg#{3;ae!QNHUc94;TK<~(jI)SjoVnW zg~$bFH8X=(EbW$A$xef@lusGj)Va%yHjos8CV=a}ksSObLVL%qK|%hcIZ7m{?iE-5 zlWiBVLF^6Yox*P~f84noM<5tZ{z~eeV&bGur;eE1Gw;DX0^(vlk^_6dd*tY!l3zPi zvZUm(^gE7*Ep~+hJ7<-veeNFknF^$Q+dh{$@kV!BZ{RfFE&H;5cHnS7o*!h$NX6X7SN}EuqU4WGl6c9KACUHvlqbty<6)MY&t9+YQZO4 zcU^3(3wOQ1wEemLWD1xBuJp56e}Z?UBN+%-(t{Ro_T$?Ym(LjAldQ6&5tpSGAzbBu zbR#Twq04GT?CF(_*;7aB^-5*6i2~FdZo|sHd`O3Y5~Ba0;5cY-(%KTDw^G7q5k|f~ zr4e2s*zQs>^djwuppS8w675x-zuJdO8^z97d`?0;{>XlSP4esz%Yvp(| zI<8F)i(lu!@ne2qKj-C=!k5-MaY-0m^6kom(?R8>`znk+*-dUOpIzo-5`S)gKile= zbT|2R>Aa}7y&NNMyNd{)ZL@~pRDI)euQv95Tx>l?k95gEvY9kgg$*e`4e+wDcbu~b zi~Q#9*y{LcxB9!NrmOiA8sfuY00~Tx#^2RtL1OEqv`Rv%)QfXIS?8?M7uNY`!eo}N zt^iK2;CNTfrwRR$^Ic}q7s~hA;I|Z=S?P z8?R(9VjSu@xnSrh0!olRK?hD~jGwv_$vu%&hGbOf@=B-E$DG=*yvA=|R0Sv)POdYW zpV-q%Q}naydC=}b328r@O_E7`oqqSabOvB|n`}d3@!9!x3nd*$NY70W%xdNPXc7;aYPG5w7bpxn9qy>YTAFpp6 z>=$HAjaH*Q3FTNu$kKlDBYu1bAVE7TuO?cLt&Jl>hol}oejT7Um=F-9g7K|~~~!y)Z2AR$?xuU%!N~iZI zKdj8Wf7@IvN#vgtjpAMmmdk;YFRsQN6hepIe|G0b7|#rG$?N?eA!gKJT1LYsBehi2 z%V0V}gy&j5FyEMza}xV6WTR(F)Q)MdTjX|WuNyMa8inwVEox4F%GZgj?u0VGh2b0K zC24766)?ZW^{S1@cZ9jp<@aj>Dfn)4|7KPNUoOX*nGRFk@R)v$s&EhZlM3c%+s^UQ zo|Slm8iq)n<^5t|DQWv*&a97iDsbc7KXl5etx>aN#7bv zKHEEEhr?j&vXR$%YU7OWo7~*M9H0L{_DrHR3{)h#Htc%fL`#L%^L(uO{O^viaVdT2 zkeOl$*pPqBySP|8=kEOjWl}qSukYH;#s!BihuIyk-et-_0cT62_I@!Fe*$~PGvlz3 zV(mGgdWbx1ZDj&Ff*eTakV15dBZ;`gR!->TFn*yek^DAS-(c!}t`A{k$G2Y^p$@V? z|ASrD-wOwP*qdbd5|+jv-h*U0P)RnU#6pj5fKygF?_9DE;cJHmz|m0p8Flxg@@Yqp z?4iW(&!j3}XfiUsKh=z+v+EK7r&%YK=zgi16Dukj(wi!71%yAC&tAU$)- zluJl4{&SMJ+Go|JH-^1qL@ES%>h^6MKDesj%!qfi6N#NJyELV5EfQZTU7bxawN)aQ z0($P3ax9+er7ZBSeUtWNXopc9{s;213C(}_Ve3H2xa04~FIw>r5nSI05gy%IO?s3Z zU%+cvft~B3OhF*d#3FQ`)ttx`}T zF_kCOpNj1I>oWij_M!`_zeE#OS1J09HI`TLp$o+vzX4Vtyt-8v+hK(fr*T2`<&f~h z{3mp9G92f(y{l(@8$Wym<%=Vw7=e1N^gr7~`Q!Wcgfo|7F@7&3Co1{Cf^6N&0eMg^ zk=ANJWsV+&7j^d>M0a5FQUvVNwz_L! z`P*IOwa&$J5D~dGlqWX1efQxoSEzAkomo#lkKbU?O%C#-K;MA>mEorA_4|J^+|auK z6`TGIkK1}hJ%%X?vj!GQoJuQT1Fb?M= zIFw)!Q=~pR`z5S53R3vpg=2zlZuUut{E#9Yug4NlY*2pJZxNX{exVuDubfNjnv!pI zq&Nqk<4w*IK4q;RPiLo+GrIo=kcZEV$d;@pT0ZY(AV;D+o&qo=jO7--Dz8L#Z1JVq zH+9%5&};a%*~E54EsiEwzLZx%8lunjgqdcND&5CQ$eQ`07A2@5mD9_bpWHm|uCio0 zQ;uKOGUWSmUjhU`SqP-@EMRe(siana>#*Uh>O<}=D!@FS04w@E7LN4nApP!G)ciuW zE+O;v-Tr%{}V!Zf?`53&ml>^&x)j%9ji^opI{^N)h zq6rYH-nc=D79-7$`0#BNTe5y;S=`H-hFqK3BHJJI6`NPooZS^1HLj}D880L>G-=jH=oq~HnRT7Idi=z>S|ln z?`@k>Z!pbvso>ZSjuHE3i)<$X?&hkAwQ-s8eM`mRV>GpzgyDxy_M2H=jas~qg155S zKhE?Xy;0wRHYUSOkR&tz6fE~E+XN)Q=Ak6UxnWJ7opl3QZqSfc-J@eszrI#AUJ+*V z?jS@OP@sPqlvgJXKm`PVCyx4C5{veSN7EFS%EP}XcHuYsCueumf`4l{2|m8-DsR7V zWzpktCjIhFYsEQ`&=>uTrjkfuM%Gj{gUo`ZE4k9XSZ^xRVf9uoN)hQvovY8w+%s8A ztjT8q4mY{8i`~HAGy^Ncf z06RPZLNf0+l|TcBfTEt@g?|z*zJ_8PPXe5=T2#T9-^fh^)IG8&_$TEpRU{(-U+lvJ-kSSRgcJXtM-d!feLT+Jm$!KlKiK7Qc z?+qv)th80dcTylF5G6fBdg}H-1u~^6y<2~)bekAqI&b19a^&MB=cKQZ%zlqQUdE>- z?EmNln4;$kk;ys@h**WFnzhCvreHIGnR|pyFUtHiOsCMhV>P+GbkmS+aVWqvpid@S zH@o&ES8H4(%nUsVz{TSKg2gg^b}hDc(0(+`C5GXy@(=&Pru$0&gZ;8%q94(qDk1)q zIH4fB4J<7Kb>s1H3rkF}u)sRt;#)=R^C4;MsMSaL(=S5~nT-E~F{J)MNaRD3eOvW; z24dh;ZprL_T4MX3mMBm_5U9+N059o)2IZ)JpO2%iX?PB@ZV&$VXg-}qWe>h}=?gvT z4IpUlqot;@(T;giT#0n0i7DiP>OTR@XxiWtuK!@t8*2#4|9phHQ8jG?8xJsqM+VHKFi7>eDyc`fBGdR;%C%Eli#6LZ3;p zvFd|-mhoG&cT&9tl8ba_Xzy#jmnx`p=gWX3y@XL&D+^-NrzU0#tP)H(eK zJJAKd_=}h9Y*Az_+w5vD88-RFJ9`j z$`lF6L_KxZA$z}RQ?R2c(Sbg1do;t=RZOa%LFwB?% z(2MiNi;nAu7BhP;Mv5@3{T)%Ae|_k^_Gj$AU`QBDbXEL}SwRtd`(9E`06Hq=T~3H+ z#`p+hr9&k5xzV(Y*9kTR820Z^r2G&I?1Wu@@9TMZXQtt7;QT1V7b$>%%*>Q$SY!tih^beONxFZ0a@7dzB=HWsiZ;o&Z@Sz?P$N@pnRvo&FT6PV}&_(j9}s*{<| z8<-#NT71(IZyI4LlssrJ(o2BrUBC6CX1fQ*j3*}93o7oHoV!tabUg?5|9%><|Gzv8 zd9ffwu8f7)Yg=mq*NM9tFma*POmpcxZ`XSaVyig@zxiSG^2M}QALWJeOZXG_{zbhx zl2WwaJdfHn-MSu9e6MI3pgA+)9}dEPodr(mMU!QW`iBUM0w%qKO{04O>VEgYvZ9=q~A;Z`G z0#(^Gqt(m@jcYmw1q>-RcWupCqXzUI7TuB_0YKcFrsp0O8HSHlrFb!XFxnl21q=|f z8B$DG8hY9IVGIo(e@0kw7q9Ku7+dMR286i>I{Gd0rb53F?)%*O@~ln(-SCL^Kp(#c z$LMqPw0cP-z=oCymDm>p^sj?#IWQ*PNBkcigx`$)y7qc)@?XaVuxDH+#BC}#ALm?P zYk?_){%^uD-qU^RFE}5_F~Eo!J#wOP$lU*tpm9DgF7g;x>+_(0{laA>o-cvILnhAf z)XFd~qO6#G(nlNCsJ-hWFl})zCgI@Lv}MUjQX1p6@E{}sCj#{s5~GS z_dL@e(`ui!7lzyH;t}QmLk^iyR@hL4LWHhugy!}B?i8ZlWf*ONRsI8qs#K)D98*b_ zE|zDO_1fw7TSUf2>b8NELBz+3s1ag?O#UO8{4<#re?2Q628Y}a&6?vZ`wX|EN60a~ zc-XI=)L=Kf;-nFA(n^`QI$(oxDP#SSF&Zf~%ky-ZY$F>rs7?wT7pX2@f8bNkV0PqO z4lHGHX7@ISknH%B!&@(&^#V1n-Q6LD2&84aWulj(^0_L3+zKf7`uF>uA-|h!S>sA7 zOiDVt&!|j$o@|V;an7(qsvUzHayd0*u4M$i&KOvI(T;fzfL{#Yq?kyHmGi^^wBTl0v1lR8 z!|P|SFGbOObAHflCkH>@{k%UQo%7RAIsMm}e#`;Pp@phwpJ>nN`W)77^v zRo)AK?7{pzSqS0}%x5@tV+_SkAPsbc4vKHlHs6gmpU(DiIV~?saZI+VuVv? zc&4uFTH&tPTNCa2)3%R)^^`Y&8#E?@lA$ZTukcrQx;wp2@VN2bv5Mil1q+EbxW}rq zrC2cbQ10$CuCgKIlN6tg=ea1{Ung*|+~>n~Mf>-I!qBK3V}b$}u}rS}m(gNG?;F%kbp zXRKyh%~H^6)dnP(;rCwUzobMp94wD(waV1j<(NkCea~Qct)i0hQtG23`wrc;(ICWQ z%j$y1LjR`WaSf#_R;IT-*@@jlx2M5xW(h^7LwvjTJ~%{ zFHBuqbeU74Bc-wyWaQj*X2Rp zyf@d9+d)lL1A=Q%ekjoJ=of!PEL#$TMT2LaJoQ2(J_mL~hqgX5%PsddQUuOC`ZhW) zL5^YikDQdyUpo_+M);xEi#-#57!UQ$#lmHNK!7auJ7zI61pzMEyB-a^1$V1v8H}^) zTJhU586!3?mIMca`^)Z|Z(FAsqKf%#8KroW_luZdB60;ReL6)RQQ+%UogR(GzDe-o zZV$x8ODoCXwzVXO;OM}`1UVQoIAs&u)UcXnZNhI0>&oxVY$ZvI+ZnP*B2?c3)d%8= zRq)#~yuSXsSV{)8Qna>L;_0OgzVp)KyO_WZ`eO4~E=clN2klJk9=!D!1Pz((#44md z1H)2JU4(~S(ME$yMXoi_Kml{_RjRGfwmKLfC$>T%cLvEMT-R*7zsa5H6rAo}>tQwJh*sjyzSX*`vc@as{x%SzRC^BQeKB zhu_YSi5n|&h1wntdgPBp_kux6d1)6ssIfC-4TxEMo|FSbxc9OsSQ=cgv7T-o^NUe}x1uemr6ExXDAM9E@#$1J9z$~&QEh=`YE^-A7eFNr&jNqOQBZo`= z&##AoS%h^zT%v#YC+xo#z%cT>K?`yPOxKG*3!vdMv)aX=1@O&*(7~I&0q{}68F|nG zxaj?3$`%G%0J6frwk7L~$o2nwoTF!%LIxT7*}w={j51|gU?6*)feojwwFf#h)xNHK z`Pui$Dakh-U})kIrSy?%#_M7{*Xke^&S!h;z1AAa`6dAk8iA6{CwwWtQg3 zqu=1y{bZR!xv#99VK*BOgfmmaz#I}4t=u1n|1g0m>-^Kv=|5_A@iOi$@83p>=Xncx z$^y}u)QrU!zyp;Ih^5IF?i^YHjcQmxG7d;8Jo#6@X@mJmA6rzu&jdBf5dTm8ODayPo(`M_606u+-S!Q8 zUYB+zxsDF*7h4$oHecuV*+;bpbk`h>dCrsuevouuZax^?u6#CflM&lQ Define data, use random, +##-- or do help(data=index) for the standard data sets. + +## The function is currently defined as +function(evalRoot) { + fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); + fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); + fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); + fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); + fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); + fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); + fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); + fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); + fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); + fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); + fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); + fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); + fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); + fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); + + eval = list( + AlleleCountStats = NA, + CompOverlap = NA, + CountVariants = NA, + GenotypeConcordance = NA, + MetricsByAc = NA, + MetricsBySample = NA, + Quality_Metrics_by_allele_count = NA, + QualityScoreHistogram = NA, + SampleStatistics = NA, + SampleSummaryStatistics = NA, + SimpleMetricsBySample = NA, + TiTv = NA, + TiTvStats = NA, + Variant_Quality_Score = NA, + + CallsetNames = c(), + CallsetOnlyNames = c(), + CallsetFilteredNames = c() + ); + + eval$AlleleCountStats = .attemptToLoadFile(fileAlleleCountStats); + eval$CompOverlap = .attemptToLoadFile(fileCompOverlap); + eval$CountVariants = .attemptToLoadFile(fileCountVariants); + eval$GenotypeConcordance = .attemptToLoadFile(fileGenotypeConcordance); + eval$MetricsByAc = .attemptToLoadFile(fileMetricsByAc); + eval$MetricsBySample = .attemptToLoadFile(fileMetricsBySample); + eval$Quality_Metrics_by_allele_count = .attemptToLoadFile(fileQuality_Metrics_by_allele_count); + eval$QualityScoreHistogram = .attemptToLoadFile(fileQualityScoreHistogram); + eval$SampleStatistics = .attemptToLoadFile(fileSampleStatistics); + eval$SampleSummaryStatistics = .attemptToLoadFile(fileSampleSummaryStatistics); + eval$SimpleMetricsBySample = .attemptToLoadFile(fileSimpleMetricsBySample); + eval$TiTv = .attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); + eval$TiTvStats = .attemptToLoadFile(fileTiTvStats); + eval$Variant_Quality_Score = .attemptToLoadFile(fileVariant_Quality_Score); + + uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); + eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); + eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); + eval$CallsetFilteredNames = as.vector(c()); + eval; + } +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd new file mode 100644 index 000000000..67c2c7b28 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd @@ -0,0 +1,55 @@ +\name{gsa.read.gatkreport} +\alias{gsa.read.gatkreport} +\title{ +gsa.read.gatkreport +} +\description{ +Reads a GATKReport file - a multi-table document - and loads each table as a separate data.frame object in a list. +} +\usage{ +gsa.read.gatkreport(filename) +} +\arguments{ + \item{filename}{ +The path to the GATKReport file. +} +} +\details{ +The GATKReport format replaces the multi-file output format used by many GATK tools and provides a single, consolidated file format. This format accomodates multiple tables and is still R-loadable - through this function. + +The file format looks like this: +\preformatted{##:GATKReport.v0.1 TableName : The description of the table +col1 col2 col3 +0 0.007451835696110506 25.474613284804366 +1 0.002362777171937477 29.844949954504095 +2 9.087604507451836E-4 32.87590975254731 +3 5.452562704471102E-4 34.498999090081895 +4 9.087604507451836E-4 35.14831665150137 +} + +} +\value{ +Returns a list object, where each key is the TableName and the value is the data.frame object with the contents of the table. If multiple tables with the same name exist, each one after the first will be given names of "TableName.v1", "TableName.v2", ..., "TableName.vN". +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +report = gsa.read.gatkreport("/path/to/my/output.gatkreport"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd new file mode 100644 index 000000000..0a8b37843 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd @@ -0,0 +1,48 @@ +\name{gsa.read.squidmetrics} +\alias{gsa.read.squidmetrics} +\title{ +gsa.read.squidmetrics +} +\description{ +Reads metrics for a specified SQUID project into a dataframe. +} +\usage{ +gsa.read.squidmetrics("C315") +} +\arguments{ + \item{project}{ +The project for which metrics should be obtained. +} + \item{bylane}{ +If TRUE, obtains per-lane metrics rather than the default per-sample metrics. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +Returns a data frame with samples (or lanes) as the row and the metric as the column. +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +This method will only work within the Broad Institute internal network. +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Obtain metrics for project C315. +d = gsa.read.squidmetrics("C315"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.vcf.Rd b/public/R/src/gsalib/man/gsa.read.vcf.Rd new file mode 100644 index 000000000..cffd35e8f --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.vcf.Rd @@ -0,0 +1,53 @@ +\name{gsa.read.vcf} +\alias{gsa.read.vcf} +\title{ +gsa.read.vcf +} +\description{ +Reads a VCF file into a table. Optionally expands genotype columns into separate columns containing the genotype, separate from the other fields specified in the FORMAT field. +} +\usage{ +gsa.read.vcf(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) +} +\arguments{ + \item{vcffile}{ +The path to the vcf file. +} + \item{skip}{ +The number of lines of the data file to skip before beginning to read data. +} + \item{nrows}{ +The maximum number of rows to read in. Negative and other invalid values are ignored. +} + \item{expandGenotypeFields}{ +If TRUE, adds an additional column per sample containing just the genotype. +} +} +\details{ +The VCF format is the standard variant call file format used in the GATK. This function reads that data in as a table for easy analysis. +} +\value{ +Returns a data.frame object, where each column corresponds to the columns in the VCF file. +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +vcf = gsa.read.vcf("/path/to/my/output.vcf"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.warn.Rd b/public/R/src/gsalib/man/gsa.warn.Rd new file mode 100644 index 000000000..0b9770b5c --- /dev/null +++ b/public/R/src/gsalib/man/gsa.warn.Rd @@ -0,0 +1,46 @@ +\name{gsa.warn} +\alias{gsa.warn} +\title{ +GSA warn +} +\description{ +Write a warning message to standard out with the prefix '[gsalib] Warning:'. +} +\usage{ +gsa.warn(message) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{message}{ +The warning message to write. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Write message to stdout +gsa.warn("This is a warning message"); +} +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsalib-package.Rd b/public/R/src/gsalib/man/gsalib-package.Rd new file mode 100644 index 000000000..2b8d6db9f --- /dev/null +++ b/public/R/src/gsalib/man/gsalib-package.Rd @@ -0,0 +1,68 @@ +\name{gsalib-package} +\alias{gsalib-package} +\alias{gsalib} +\docType{package} +\title{ +GATK utility analysis functions +} +\description{ +Utility functions for analyzing GATK-processed NGS data +} +\details{ +This package contains functions for working with GATK-processed NGS data. These functions include a command-line parser that also allows a script to be used in interactive mode (good for developing scripts that will eventually be automated), a proportional Venn diagram generator, convenience methods for parsing VariantEval output, and more. +} +\author{ +Genome Sequencing and Analysis Group + +Medical and Population Genetics Program + +Maintainer: Kiran Garimella +} +\references{ +GSA wiki page: http://www.broadinstitute.org/gsa/wiki + +GATK help forum: http://www.getsatisfaction.com/gsa +} +\examples{ +## get script arguments in interactive and non-interactive mode +cmdargs = gsa.getargs( list( + requiredArg1 = list( + value = NA, + doc = "Documentation for requiredArg1" + ), + + optionalArg1 = list( + value = 3e9, + doc = "Documentation for optionalArg1" + ) +) ); + +## plot a proportional Venn diagram +gsa.plot.venn(500, 250, 0, 100); + +## read a GATKReport file +report = gsa.gatk.report("/path/to/my/output.gatkreport"); + +## emit a message +gsa.message("This is a message"); + +## emit a warning message +gsa.message("This is a warning message"); + +## emit an error message +gsa.message("This is an error message"); + +## read the SQUID metrics for a given sequencing project (internal to the Broad only) +s = gsa.read.squidmetrics("C427"); + +## read command-line arguments +cmdargs = gsa.getargs( + list( + file = list(value="/my/test.vcf", doc="VCF file"), + verbose = list(value=0, doc="If 1, set verbose mode"), + test2 = list(value=2.3e9, doc="Another argument that does stuff") + ), + doc="My test program" +); +} +\keyword{ package } From 097828a466bf60ebf650f45b5f27a40e4a79321b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 27 Jul 2011 11:36:53 -0400 Subject: [PATCH 042/186] ParsingEngine now maintains the list of rodBindings No longer try to reparser objects to find the right fields Direct support in RodBinding for getTags() --- .../commandline/ArgumentTypeDescriptor.java | 4 ++-- .../sting/commandline/ParsingEngine.java | 13 +++++++++++- .../sting/commandline/RodBinding.java | 8 ++++++- .../commandline/VariantContextRodBinding.java | 4 ++-- .../sting/gatk/CommandLineExecutable.java | 21 ++----------------- .../sting/utils/text/ListFileUtils.java | 4 ++-- 6 files changed, 27 insertions(+), 27 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 16e02c5bd..9b751cc3a 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -299,8 +299,8 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); try { - Constructor ctor = type.getConstructor(String.class, String.class); - RodBinding result = (RodBinding)ctor.newInstance(source.field.getName(), value); + Constructor ctor = type.getConstructor(String.class, String.class, ParsingEngine.class); + RodBinding result = (RodBinding)ctor.newInstance(source.field.getName(), value, parsingEngine); Tags tags = getArgumentTags(matches); parsingEngine.addTags(result,tags); return result; diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index e2e694cfb..edb212f2c 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -59,6 +59,11 @@ public class ParsingEngine { */ private List parsingMethods = new ArrayList(); + /** + * All of the RodBinding objects we've seen while parsing + */ + private List rodBindings = new ArrayList(); + /** * Class reference to the different types of descriptors that the create method can create. * The type of set used must be ordered (but not necessarily sorted). @@ -342,9 +347,16 @@ public class ParsingEngine { Object value = (argumentMatches.size() != 0) ? source.parse(this,argumentMatches) : source.createTypeDefault(this); JVMUtils.setFieldValue(source.field,target,value); + + if ( value instanceof RodBinding ) + rodBindings.add((RodBinding)value); } } + public Collection getRodBindings() { + return Collections.unmodifiableCollection(rodBindings); + } + /** * Gets a collection of the container instances of the given type stored within the given target. * @param source Argument source. @@ -391,7 +403,6 @@ public class ParsingEngine { return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type); } - private List extractArgumentSources(Class sourceClass, Field[] parentFields) { // now simply call into the truly general routine extract argument bindings but with a null // object so bindings aren't computed diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index 028d2f411..86b1be162 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -34,10 +34,12 @@ import java.util.List; public class RodBinding { final String variableName; final String source; + final ParsingEngine parser; - public RodBinding(final String variableName, final String source) { + protected RodBinding(final String variableName, final String source, final ParsingEngine parser) { this.variableName = variableName; this.source = source; + this.parser = parser; } public String getVariableName() { @@ -52,6 +54,10 @@ public class RodBinding { return tracker.getReferenceMetaData(variableName); } + public Tags getTags() { + return parser.getTags(this); + } + public String toString() { return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource()); } diff --git a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java index 66a428369..29b97d07b 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java @@ -36,8 +36,8 @@ import java.util.List; * */ public class VariantContextRodBinding extends RodBinding { - public VariantContextRodBinding(final String variableName, final String sourceFile) { - super(variableName, sourceFile); + protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { + super(variableName, sourceFile, parser); } public VariantContext getVariantContext(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 10573cf25..ec3c96d83 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -25,10 +25,7 @@ package org.broadinstitute.sting.gatk; -import org.broadinstitute.sting.commandline.ArgumentSource; -import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; -import org.broadinstitute.sting.commandline.CommandLineProgram; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; @@ -100,7 +97,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram { loadArgumentsIntoObject(walker); argumentSources.add(walker); - Collection newStyle = ListFileUtils.unpackRODBindings(getRodBindingsInWalker(walker), parser); + Collection newStyle = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser); Collection oldStyle = ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings, getArgumentCollection().DBSNPFile, parser); oldStyle.addAll(newStyle); engine.setReferenceMetaDataFiles(oldStyle); @@ -121,20 +118,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram { return 0; } - private List getRodBindingsInWalker(Walker walker) { - List rods = new ArrayList(); - - for ( ArgumentSource source : parser.extractArgumentSources(walker.getClass()) ) { - Object obj = JVMUtils.getFieldValue(source.field, walker); - if ( obj instanceof RodBinding ) { - System.out.printf("Found rod binding for field %s of %s%n", obj, source.field); - rods.add((RodBinding)obj); - } - } - - return rods; - } - /** * Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled. * This report will be written to either STDOUT or to the run repository, depending on the options diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 7d4e47a94..b8e39fb61 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -93,7 +93,7 @@ public class ListFileUtils { * @param RODBindings a text equivale * @return a list of expanded, bound RODs. */ - public static Collection unpackRODBindings(final List RODBindings, final String dbSNPFile, final ParsingEngine parser) { + public static Collection unpackRODBindings(final Collection RODBindings, final String dbSNPFile, final ParsingEngine parser) { // todo -- this is a strange home for this code. Move into ROD system Collection rodBindings = new ArrayList(); @@ -141,7 +141,7 @@ public class ListFileUtils { * @param RODBindings a text equivale * @return a list of expanded, bound RODs. */ - public static Collection unpackRODBindings(final List RODBindings, final ParsingEngine parser) { + public static Collection unpackRODBindings(final Collection RODBindings, final ParsingEngine parser) { // todo -- this is a strange home for this code. Move into ROD system Collection rodBindings = new ArrayList(); From ada2f21976a618b8ab5def511f0d9c75acc09bcf Mon Sep 17 00:00:00 2001 From: Kiran V Garimella Date: Wed, 27 Jul 2011 12:27:17 -0400 Subject: [PATCH 043/186] Revert "Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable" This reverts commit 9c81ef835a3ac581d4eb9cf1243e30df20a46795, reversing changes made to f23d3ad5aec1c70cc1ecc48b295258aa70d30c7d. --- build.xml | 2 +- public/R/src/gsalib/DESCRIPTION | 10 -- public/R/src/gsalib/R/gsa.error.R | 12 -- public/R/src/gsalib/R/gsa.getargs.R | 116 ------------------ public/R/src/gsalib/R/gsa.message.R | 3 - public/R/src/gsalib/R/gsa.plot.venn.R | 50 -------- public/R/src/gsalib/R/gsa.read.eval.R | 83 ------------- public/R/src/gsalib/R/gsa.read.gatkreport.R | 64 ---------- public/R/src/gsalib/R/gsa.read.squidmetrics.R | 28 ----- public/R/src/gsalib/R/gsa.read.vcf.R | 23 ---- public/R/src/gsalib/R/gsa.warn.R | 3 - public/R/src/gsalib/Read-and-delete-me | 9 -- public/R/src/gsalib/data/tearsheetdrop.jpg | Bin 50343 -> 0 bytes public/R/src/gsalib/man/gsa.error.Rd | 49 -------- public/R/src/gsalib/man/gsa.getargs.Rd | 57 --------- public/R/src/gsalib/man/gsa.message.Rd | 44 ------- public/R/src/gsalib/man/gsa.plot.venn.Rd | 75 ----------- public/R/src/gsalib/man/gsa.read.eval.Rd | 111 ----------------- .../R/src/gsalib/man/gsa.read.gatkreport.Rd | 55 --------- .../R/src/gsalib/man/gsa.read.squidmetrics.Rd | 48 -------- public/R/src/gsalib/man/gsa.read.vcf.Rd | 53 -------- public/R/src/gsalib/man/gsa.warn.Rd | 46 ------- public/R/src/gsalib/man/gsalib-package.Rd | 68 ---------- 23 files changed, 1 insertion(+), 1008 deletions(-) delete mode 100644 public/R/src/gsalib/DESCRIPTION delete mode 100644 public/R/src/gsalib/R/gsa.error.R delete mode 100644 public/R/src/gsalib/R/gsa.getargs.R delete mode 100644 public/R/src/gsalib/R/gsa.message.R delete mode 100644 public/R/src/gsalib/R/gsa.plot.venn.R delete mode 100644 public/R/src/gsalib/R/gsa.read.eval.R delete mode 100644 public/R/src/gsalib/R/gsa.read.gatkreport.R delete mode 100644 public/R/src/gsalib/R/gsa.read.squidmetrics.R delete mode 100644 public/R/src/gsalib/R/gsa.read.vcf.R delete mode 100644 public/R/src/gsalib/R/gsa.warn.R delete mode 100644 public/R/src/gsalib/Read-and-delete-me delete mode 100755 public/R/src/gsalib/data/tearsheetdrop.jpg delete mode 100644 public/R/src/gsalib/man/gsa.error.Rd delete mode 100644 public/R/src/gsalib/man/gsa.getargs.Rd delete mode 100644 public/R/src/gsalib/man/gsa.message.Rd delete mode 100644 public/R/src/gsalib/man/gsa.plot.venn.Rd delete mode 100644 public/R/src/gsalib/man/gsa.read.eval.Rd delete mode 100644 public/R/src/gsalib/man/gsa.read.gatkreport.Rd delete mode 100644 public/R/src/gsalib/man/gsa.read.squidmetrics.Rd delete mode 100644 public/R/src/gsalib/man/gsa.read.vcf.Rd delete mode 100644 public/R/src/gsalib/man/gsa.warn.Rd delete mode 100644 public/R/src/gsalib/man/gsalib-package.Rd diff --git a/build.xml b/build.xml index 438e9c90c..60c678591 100644 --- a/build.xml +++ b/build.xml @@ -1089,7 +1089,7 @@ - + diff --git a/public/R/src/gsalib/DESCRIPTION b/public/R/src/gsalib/DESCRIPTION deleted file mode 100644 index 6116e8c66..000000000 --- a/public/R/src/gsalib/DESCRIPTION +++ /dev/null @@ -1,10 +0,0 @@ -Package: gsalib -Type: Package -Title: Utility functions -Version: 1.0 -Date: 2010-10-02 -Author: Kiran Garimella -Maintainer: Kiran Garimella -Description: Utility functions for GATK NGS analyses -License: BSD -LazyLoad: yes diff --git a/public/R/src/gsalib/R/gsa.error.R b/public/R/src/gsalib/R/gsa.error.R deleted file mode 100644 index 1c6a56046..000000000 --- a/public/R/src/gsalib/R/gsa.error.R +++ /dev/null @@ -1,12 +0,0 @@ -gsa.error <- function(message) { - message(""); - gsa.message("Error: **********"); - gsa.message(sprintf("Error: %s", message)); - gsa.message("Error: **********"); - message(""); - - traceback(); - - message(""); - stop(message, call. = FALSE); -} diff --git a/public/R/src/gsalib/R/gsa.getargs.R b/public/R/src/gsalib/R/gsa.getargs.R deleted file mode 100644 index 94613bf93..000000000 --- a/public/R/src/gsalib/R/gsa.getargs.R +++ /dev/null @@ -1,116 +0,0 @@ -.gsa.getargs.usage <- function(argspec, doc) { - cargs = commandArgs(); - - usage = "Usage:"; - - fileIndex = grep("--file=", cargs); - if (length(fileIndex) > 0) { - progname = gsub("--file=", "", cargs[fileIndex[1]]); - - usage = sprintf("Usage: Rscript %s [arguments]", progname); - - if (!is.na(doc)) { - message(sprintf("%s: %s\n", progname, doc)); - } - } - - message(usage); - - for (argname in names(argspec)) { - key = argname; - defaultValue = 0; - doc = ""; - - if (is.list(argspec[[argname]])) { - defaultValue = argspec[[argname]]$value; - doc = argspec[[argname]]$doc; - } - - message(sprintf(" -%-10s\t[default: %s]\t%s", key, defaultValue, doc)); - } - - message(""); - - stop(call. = FALSE); -} - -gsa.getargs <- function(argspec, doc = NA) { - argsenv = new.env(); - - for (argname in names(argspec)) { - value = 0; - if (is.list(argspec[[argname]])) { - value = argspec[[argname]]$value; - } else { - value = argspec[[argname]]; - } - - assign(argname, value, envir=argsenv); - } - - if (interactive()) { - for (argname in names(argspec)) { - value = get(argname, envir=argsenv); - - if (is.na(value) | is.null(value)) { - if (exists("cmdargs")) { - assign(argname, cmdargs[[argname]], envir=argsenv); - } else { - assign(argname, readline(sprintf("Please enter a value for '%s': ", argname)), envir=argsenv); - } - } else { - assign(argname, value, envir=argsenv); - } - } - } else { - cargs = commandArgs(TRUE); - - if (length(cargs) == 0) { - .gsa.getargs.usage(argspec, doc); - } - - for (i in 1:length(cargs)) { - if (length(grep("^-", cargs[i], ignore.case=TRUE)) > 0) { - key = gsub("-", "", cargs[i]); - value = cargs[i+1]; - - if (key == "h" | key == "help") { - .gsa.getargs.usage(argspec, doc); - } - - if (length(grep("^[\\d\\.e\\+\\-]+$", value, perl=TRUE, ignore.case=TRUE)) > 0) { - value = as.numeric(value); - } - - assign(key, value, envir=argsenv); - } - } - } - - args = as.list(argsenv); - - isMissingArgs = 0; - missingArgs = c(); - - for (arg in names(argspec)) { - if (is.na(args[[arg]]) | is.null(args[[arg]])) { - gsa.warn(sprintf("Value for required argument '-%s' was not specified", arg)); - - isMissingArgs = 1; - missingArgs = c(missingArgs, arg); - } - } - - if (isMissingArgs) { - gsa.error( - paste( - "Missing required arguments: -", - paste(missingArgs, collapse=" -"), - ". Specify -h or -help to this script for a list of available arguments.", - sep="" - ) - ); - } - - args; -} diff --git a/public/R/src/gsalib/R/gsa.message.R b/public/R/src/gsalib/R/gsa.message.R deleted file mode 100644 index a2b909d3d..000000000 --- a/public/R/src/gsalib/R/gsa.message.R +++ /dev/null @@ -1,3 +0,0 @@ -gsa.message <- function(message) { - message(sprintf("[gsalib] %s", message)); -} diff --git a/public/R/src/gsalib/R/gsa.plot.venn.R b/public/R/src/gsalib/R/gsa.plot.venn.R deleted file mode 100644 index b1353ccc1..000000000 --- a/public/R/src/gsalib/R/gsa.plot.venn.R +++ /dev/null @@ -1,50 +0,0 @@ -gsa.plot.venn <- -function(a, b, c=0, a_and_b, a_and_c=0, b_and_c=0, - col=c("#FF6342", "#63C6DE", "#ADDE63"), - pos=c(0.20, 0.20, 0.80, 0.82), - debug=0 - ) { - library(png); - library(graphics); - - # Set up properties - for (i in 1:length(col)) { - rgbcol = col2rgb(col[i]); - col[i] = sprintf("%02X%02X%02X", rgbcol[1], rgbcol[2], rgbcol[3]); - } - - chco = paste(col[1], col[2], col[3], sep=","); - chd = paste(a, b, c, a_and_b, a_and_c, b_and_c, sep=","); - - props = c( - 'cht=v', - 'chs=525x525', - 'chds=0,10000000000', - paste('chco=', chco, sep=""), - paste('chd=t:', chd, sep="") - ); - proplist = paste(props[1], props[2], props[3], props[4], props[5], sep='&'); - - # Get the venn diagram (as a temporary file) - filename = tempfile("venn"); - cmd = paste("wget -O ", filename, " 'http://chart.apis.google.com/chart?", proplist, "' > /dev/null 2>&1", sep=""); - - if (debug == 1) { - print(cmd); - } - system(cmd); - - # Render the temp png file into a plotting frame - a = readPNG(filename); - - plot(0, 0, type="n", xaxt="n", yaxt="n", bty="n", xlim=c(0, 1), ylim=c(0, 1), xlab="", ylab=""); - if (c == 0 || a >= b) { - rasterImage(a, pos[1], pos[2], pos[3], pos[4]); - } else { - rasterImage(a, 0.37+pos[1], 0.37+pos[2], 0.37+pos[3], 0.37+pos[4], angle=180); - } - - # Clean up! - unlink(filename); -} - diff --git a/public/R/src/gsalib/R/gsa.read.eval.R b/public/R/src/gsalib/R/gsa.read.eval.R deleted file mode 100644 index f1d49092b..000000000 --- a/public/R/src/gsalib/R/gsa.read.eval.R +++ /dev/null @@ -1,83 +0,0 @@ -.gsa.attemptToLoadFile <- function(filename) { - file = NA; - - if (file.exists(filename) & file.info(filename)$size > 500) { - file = read.csv(filename, header=TRUE, comment.char="#"); - } - - file; -} - -gsa.read.eval <- -function(evalRoot) { - fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); - fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); - fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); - fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); - fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); - fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); - fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); - fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); - fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); - fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); - fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); - fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); - fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); - fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); - - eval = list( - AlleleCountStats = NA, - CompOverlap = NA, - CountVariants = NA, - GenotypeConcordance = NA, - MetricsByAc = NA, - MetricsBySample = NA, - Quality_Metrics_by_allele_count = NA, - QualityScoreHistogram = NA, - SampleStatistics = NA, - SampleSummaryStatistics = NA, - SimpleMetricsBySample = NA, - TiTv = NA, - TiTvStats = NA, - Variant_Quality_Score = NA, - - CallsetNames = c(), - CallsetOnlyNames = c(), - CallsetFilteredNames = c() - ); - - eval$AlleleCountStats = .gsa.attemptToLoadFile(fileAlleleCountStats); - eval$CompOverlap = .gsa.attemptToLoadFile(fileCompOverlap); - eval$CountVariants = .gsa.attemptToLoadFile(fileCountVariants); - eval$GenotypeConcordance = .gsa.attemptToLoadFile(fileGenotypeConcordance); - eval$MetricsByAc = .gsa.attemptToLoadFile(fileMetricsByAc); - eval$MetricsBySample = .gsa.attemptToLoadFile(fileMetricsBySample); - eval$Quality_Metrics_by_allele_count = .gsa.attemptToLoadFile(fileQuality_Metrics_by_allele_count); - eval$QualityScoreHistogram = .gsa.attemptToLoadFile(fileQualityScoreHistogram); - eval$SampleStatistics = .gsa.attemptToLoadFile(fileSampleStatistics); - eval$SampleSummaryStatistics = .gsa.attemptToLoadFile(fileSampleSummaryStatistics); - eval$SimpleMetricsBySample = .gsa.attemptToLoadFile(fileSimpleMetricsBySample); - eval$TiTv = .gsa.attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); - eval$TiTvStats = .gsa.attemptToLoadFile(fileTiTvStats); - eval$Variant_Quality_Score = .gsa.attemptToLoadFile(fileVariant_Quality_Score); - - uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); - eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); - eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); - eval$CallsetFilteredNames = as.vector(c( - paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), sep=""), - paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), sep="")) - ); - - if (!(eval$CallsetFilteredNames[1] %in% unique(eval$TiTv$jexl_expression))) { - eval$CallsetFilteredNames[1] = paste("In", eval$CallsetNames[1], "-FilteredIn", eval$CallsetNames[2], sep=""); - } - - if (!(eval$CallsetFilteredNames[2] %in% unique(eval$TiTv$jexl_expression))) { - eval$CallsetFilteredNames[2] = paste("In", eval$CallsetNames[2], "-FilteredIn", eval$CallsetNames[1], sep=""); - #eval$CallsetFilteredNames[2] = paste(gsub("^(\\w)", "In", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In", eval$CallsetNames[1], perl=TRUE), sep=""); - } - - eval; -} - diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/gsalib/R/gsa.read.gatkreport.R deleted file mode 100644 index 9b3ef1ad1..000000000 --- a/public/R/src/gsalib/R/gsa.read.gatkreport.R +++ /dev/null @@ -1,64 +0,0 @@ -# Load a table into the specified environment. Make sure that each new table gets a unique name (this allows one to cat a bunch of tables with the same name together and load them into R without each table overwriting the last. -.gsa.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) { - d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE); - colnames(d) = tableHeader; - - for (i in 1:ncol(d)) { - v = suppressWarnings(as.numeric(d[,i])); - - if (length(na.omit(as.numeric(v))) == length(d[,i])) { - d[,i] = v; - } - } - - usedNames = ls(envir=tableEnv, pattern=tableName); - - if (length(usedNames) > 0) { - tableName = paste(tableName, ".", length(usedNames), sep=""); - } - - assign(tableName, d, envir=tableEnv); -} - -# Load all GATKReport tables from a file -gsa.read.gatkreport <- function(filename) { - con = file(filename, "r", blocking = TRUE); - lines = readLines(con); - close(con); - - tableEnv = new.env(); - - tableName = NA; - tableHeader = c(); - tableRows = c(); - - for (line in lines) { - if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { - headerFields = unlist(strsplit(line, "[[:space:]]+")); - - if (!is.na(tableName)) { - .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); - } - - tableName = headerFields[2]; - tableHeader = c(); - tableRows = c(); - } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) { - # do nothing - } else if (!is.na(tableName)) { - row = unlist(strsplit(line, "[[:space:]]+")); - - if (length(tableHeader) == 0) { - tableHeader = row; - } else { - tableRows = rbind(tableRows, row); - } - } - } - - if (!is.na(tableName)) { - .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); - } - - gatkreport = as.list(tableEnv); -} diff --git a/public/R/src/gsalib/R/gsa.read.squidmetrics.R b/public/R/src/gsalib/R/gsa.read.squidmetrics.R deleted file mode 100644 index 39fa1ad32..000000000 --- a/public/R/src/gsalib/R/gsa.read.squidmetrics.R +++ /dev/null @@ -1,28 +0,0 @@ -gsa.read.squidmetrics = function(project, bylane = FALSE) { - suppressMessages(library(ROracle)); - - drv = dbDriver("Oracle"); - con = dbConnect(drv, "REPORTING/REPORTING@ora01:1521/SEQPROD"); - - if (bylane) { - statement = paste("SELECT * FROM ILLUMINA_PICARD_METRICS WHERE \"Project\" = '", project, "'", sep=""); - print(statement); - - rs = dbSendQuery(con, statement = statement); - d = fetch(rs, n=-1); - dbHasCompleted(rs); - dbClearResult(rs); - } else { - statement = paste("SELECT * FROM ILLUMINA_SAMPLE_STATUS_AGG WHERE \"Project\" = '", project, "'", sep=""); - print(statement); - - rs = dbSendQuery(con, statement = statement); - d = fetch(rs, n=-1); - dbHasCompleted(rs); - dbClearResult(rs); - } - - oraCloseDriver(drv); - - subset(d, Project == project); -} diff --git a/public/R/src/gsalib/R/gsa.read.vcf.R b/public/R/src/gsalib/R/gsa.read.vcf.R deleted file mode 100644 index 5beb6455d..000000000 --- a/public/R/src/gsalib/R/gsa.read.vcf.R +++ /dev/null @@ -1,23 +0,0 @@ -gsa.read.vcf <- function(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) { - headers = readLines(vcffile, n=100); - headerline = headers[grep("#CHROM", headers)]; - header = unlist(strsplit(gsub("#", "", headerline), "\t")) - - d = read.table(vcffile, header=FALSE, skip=skip, nrows=nrows, stringsAsFactors=FALSE); - colnames(d) = header; - - if (expandGenotypeFields) { - columns = ncol(d); - - offset = columns + 1; - for (sampleIndex in 10:columns) { - gt = unlist(lapply(strsplit(d[,sampleIndex], ":"), function(x) x[1])); - d[,offset] = gt; - colnames(d)[offset] = sprintf("%s.GT", colnames(d)[sampleIndex]); - - offset = offset + 1; - } - } - - return(d); -} diff --git a/public/R/src/gsalib/R/gsa.warn.R b/public/R/src/gsalib/R/gsa.warn.R deleted file mode 100644 index 7ee08ce65..000000000 --- a/public/R/src/gsalib/R/gsa.warn.R +++ /dev/null @@ -1,3 +0,0 @@ -gsa.warn <- function(message) { - gsa.message(sprintf("Warning: %s", message)); -} diff --git a/public/R/src/gsalib/Read-and-delete-me b/public/R/src/gsalib/Read-and-delete-me deleted file mode 100644 index d04323a6e..000000000 --- a/public/R/src/gsalib/Read-and-delete-me +++ /dev/null @@ -1,9 +0,0 @@ -* Edit the help file skeletons in 'man', possibly combining help files - for multiple functions. -* Put any C/C++/Fortran code in 'src'. -* If you have compiled code, add a .First.lib() function in 'R' to load - the shared library. -* Run R CMD build to build the package tarball. -* Run R CMD check to check the package tarball. - -Read "Writing R Extensions" for more information. diff --git a/public/R/src/gsalib/data/tearsheetdrop.jpg b/public/R/src/gsalib/data/tearsheetdrop.jpg deleted file mode 100755 index c9d480fa05f4acf066e3bf1cf469db47b8a1afc3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 50343 zcmdSAcT`kOw>Nmm8I+tsL86kgK!Zw_tbjx%2gx}$K|r!(1q2k33@SP2B$65=$3{eQ zw?NZC(|pJ0eeb>Bnl*RLH+RiHQ_bn~qq@@GRl92MT~*if*XzIyO;rt500##LPzOK2 zH5^Y*F_Vy{1;pQ3lIOrel`w&^Ui}<#Ld>p4FGV7 zK`diy=WPRGB@o-&dfT{y_-{FyuCDH2`M4(_=Ckqe0`uarf|&a+U%)pQ+||JBSTH{A zzv5ni?PvhG!1%w#@n``6H5&jl;rv&+WBPx1g+u?%+g@DkpW<-Hy#Rm!eSLilrX^|x zfGe-->&y4o*H^`09SZ=^?P=rd<@ZlokPw_P0Qi6U|6er!t8@UE0%-ulKYjFnkN?G& ze^QA6Kv6dUJP8MY$Kd-N_~vB*fSbVppa+2QN&v37_Joyg{JOWgJ$KZz(Fa*Q^ z2|xsp1;D2LtAD{%z>O*ZD3lSARCzimN)`S$=6?@9b^-tJ?V-CzpqG<_qYtMtIJ)ib za%#BQ2;Jor6A={!t~Y>il>jGu0MOP31i;+H009m)C^>(*4e}d@8I+@cab6RL<+(O*{NI_6oi1!~Dc=b2!^%C$9z{SD&`}gmF zM}YS?5aQ$G5fBj)5fKp*5)u)Ukq{A+5)%@VP>_(4k&%;=6Om9*Qjk-EknC?Ef3xG^ z{>@83ModUd_WwFucLO(xfmZ~DcsREK+?zOfH*v1}0S=IQ0&oWUI|=KhuHnp;|X`}zk4hlWSK!KP<^ z%+Ad(EN;Lze{TKS{=Kt{{BwMAiaJA|U;L#D2f+ImS^r7d|AnrbAYHg%AK(-Hr3(kw zAAIm`;uG8wC8So?Cwk#Yb6YHenD#;Hhc7)OT;c`@I%}^fQhIKQjXTJ{r2RwL{~2MC z|0Bx&ld%7yYY`yF!vQ-F?y%^ zC12}S58UFGI^;7<@420k3QLY+`wa{Rb{ENT$x}JYozgawgv%0!->88T$Y#(azEP2& zDBaU)!cQ>;h?jEk97SSvoytMP-cBv#%>q&Bs4e3t-_ivTmOe*IAF_J=hnD?x##Q01}Dmn$0^iu=1<*v%)mh z(BA-;oBGMlW20{Fi|Ika0E`pyL`akpj%$kyE$_I3(-T%^<*W%>K-{JpIAHANhVhTE z&)nJy_ASIAxz*(PATOVUwm3P%hw(^>P<)(#sfTGjSbwgpr28?>umlk8e}n&fVV-Ry zf1xYH)S{U?XcACeo@7zrTd99O?N1r3hnqe;#`1N(_$A$h;c5m&zkeaYK@KHjFw9G1 z$eB{}SNM%YX^A41&rVXCIXK(0drHKLFY-=GSqo|3F18$Ls@pOBoCGQ~)=x`bMC}vY z!b_a`tcOdjt8-HQC%-gaPlR9Eb?cCGEKT^VB@lq?*v~I-Voy`{!AKFw`4gSdtsK~ygJdyskP>O$Eo*RTzBAfBq(`v(` zwb_D)OV!SX(>RffYjN4N=CDPv`s1d55(QV^pVpD0Gt=)Pn;L+$Qd+>3)1i@%Nbmus zXixd{q>wQBZodzXQ_h%1= z(bVhV<}<97@Uny+$E!Z~)Y?kE2226HmL2Wq-aUJoH07lpa-@LMgQ0311$2=t0FO#v z3vm)nQ)G}&yCOv1DbjRalClIQtEP>_JcPpeUuGbkH71P z!^nyVf@Rzdnr>zzkOnIc2l6p`fceE~;ZIcYreZzN1~x)E&q3m#eVeiHXAW>9F+wvy ze`HU|Qf?Dbq7bkh59lmUKUrr-mlFKEm^R7VoDFA9%0uu|;Z5q%&=L%7DpA_*Bxfw# za9~aT6+^_8f6}HQgZT~M_kJ^s*`;v*ZF%e2^lI)NynMa2q%z!?V;e_%SzwBkleHceZUb5(bK)HVE%p|Uj<48%jo7(L+ zO*(-`s;WA&h4^HdMKYTmd@pk>J(X|_l!kG0`q*cR!-KXFAEkb7mKWmQmV-qLMoh0p zMT~Kt2NdGvc`xRLIUet5b5Z12d}t*HdrtR7g?4GPolV`eBy5NMc7g%G&V5#+l9Bx0 z>1jOmlZ`UGHu5|2KBZEIiTN%$un;}e!PeODY4xSq+rvp(&R-F~rH-^?uGD=jR>b3b zezXhU`*4rxTd1dnj5&J{DJ?FgEZgs!eabd>`twbbL-#(wLU^)9TCJI4t#PV-;<%m< z7b)xn|2&ZZe%zKpRYV^|Xcc7BmINoOsCeG>xP^TqL%3+369oRcu*2!;>O+W_gMMA}cr;ThlG{^L-1Nob!&0jHvG%UWroe2i$V zz!}i%j+#HcfH~kvRQ}n{r*d{750d~|mR0Xh0{0U2nwhpC(L(T8_IdzUs=!(Q%zIB; zAqaLz0A&W8WMBHzZj6#X5TTTtYKx=*BsB>0`|y@0ZAb73!#%u}h)4g>{3w^9iW)AZ ziFYI8Ko%0+Y<7UX5#^nZ7ax)-S&Ctg3CEWmclmY=xV?X<59_inNXRx-88~RHc3odF zHRFG|k_P9G&MRT&(S0lPoLCT)ywit$>=Xe<7+R46^z+&zwLEAqSv7Qr{Ks^palfI} zs@1!(B)`5H=n6Cy^HcgD8tH2ih17z}a79wZxd&VW!}ZWU>k#v6;A1J0;~F3}Tf^Sh zLFDIt3JH5OEi7DhV#Ugl_U+=&)0iPkNFT*a)q@_7pu78z+&q6}$YwUX(*+nBBBbMl z>qA|1+hGl^Qua-4b#2if@)uQPY<-@Sz6oZ=h9@yoy9tqARdrQzOr%v?@@r@}tR5Yn z-iM-J>LwJNJi&5Z0~=bw*T9hDH4xK}Zn*|jbm0&zZBrM zYX9W_Fk5H$RXAil9>NH9hkV@=F`>dolUg&it5rW3&L&x2=;>QN>!bLBP=3QMm`it; zo-nu4LRIt-hAkN0=~ZJ%E-QbrTp~2mwG&j=laA^^w{b;V#arE6c)N}R*MezW16>WM ziGAa10RP8T1H`4j?uRfY5J|Jq^~%j-^9hpSwL3iDaROGrD#%{U~vdO_Z=2X& zyP#h(?{uIe+G|_yz6lvm!aPDI2lt)(%%N8O<%1be1b@iVH6VCiU>Y`Qg)qGaQVKZo1F|irFgHJqL^RRtHQ>hRPI9C`MZZleR40SC&b@zzgwIY8{ZGK=T6i3nQrxM1!{N= zeD&w}I@AXa@5#!NYhZ`_={jkZzqfIDdC*Qh^sH#bO4);x(&jH6B|k-^&&gdIbo|r zu-r(JAy2J3XPTKD;PEAl@NQAgQ$|ZCU=Y|*qTC4F(1-PYb2gKO_?u65&dDo*R0xwI1(iwQ(k^1u5rE z;x^&M>!}7xEFbVz^Ys)05#toXci%gafXs;&7NQKx0G3gE4}{Ee^rPX~v_Rf^nw;OK zC4*p_RN=F>cH-+2f#R5u-Cxu&3Q&m|`6mB>HBkRXxS&1330@dXCh0#Y6%M6TRdN8d;Hpp^gAVWYCEv(Z7(@Zv<7c8;Ie< zk&0oH^VVJJzA*GqmO142bA1q$u0&E0q%@G>tyx4i#>v>t#)q>ktl;#4O$A`(=cp#CE&G(0 zwcQ~bFkDDr`4$qCj_0OkYVN%lF#)Lt@D49w(wZcj)p$7ofbY;DgmW~;3Ixa-Ja2kT zRSod%%EB{}L!D}HKM&kZCZ3Q0-VQqH+5YiX572p)uFQxxM+-zZ(nM%d;qkl)UWZ)+ z#1p1~3>W8#1Wp_G)bggxfWR9yc|AN`;CMCzxZ%(NcrQkZt^cyZrIZI20CQkjZI4VeO#^-~${kEdJ74QSF=!|5y%e8;$+ zvu;a>Z8F}@QT5zYqOv)*VfFoxYS-dBedBbw-@SgHF62}gdG0^@lB3+sYXSvX^GJzd~}yOY$ZgO;0H_zVXEbh z?=>Lvj6D2PKvEn&F4ar{J!HLtg9LjcY$~UqS8D;IUyf0kzBA1w$DZ=R+?hd|Kdfp; z!uk%^NSvV$?8}pcAY89qVj>vFK)81SEHm5<=d3mzD|ij`*oIMu@FIueed<189wSxL z*I9->BmLtrlIxc%#!84PWVn4Y3PmeWP5s7}m}B(!JZJnN)R6qcEHTTdad(W{T3S`iqk_##`8hFBo9mGBT8^b+CPI3D z^coS(|e;+%Yt#Y&{9W7qj*X2v(%c7Lo^?oW0D398K9Pv@H6Xp_Dp7 zdPH^5fC~d&ordGm17)yQIdt&lJ&e8;-b8W81LRSX^>oaa&<_C|ebRELzf!Grezdlm zsLpe7v%0wsG2x{a5L}f*)*BafV;Zi3g*p?oaVffP5KDlrs~Wi~4J<~>of!mIK8oLA zAD_7$!~Dvh{u)qdB#)2(Zp=DK^jmh_VU2c8geGcQrZy*x2|I(OMmY2zEgBmmg$J~C zww2HhP#eWMyQ-a02b=Y`xjM-0yhiCHUuY_0v;Scn#Me9kpFIC{$hptO~K^pg3HTVy_F8LAiqIBNu zx2{bI%RRZ(Yd}vu(NlUMr1zGFNABD7dbbJ5{p>JBDEiJ1m}SV*tHLm0jB6KiFG;W$ zOIM)W*!9D`#^&m0tZx0ZYt}Q9TLcT8D(1G4s!EEd&HADCy<#{$OnZwhY!7vE293lb zRfF>2M))=u5NUeGT+q? zzmgZ>zZ6~cj7IEX$z%~$@S{Ww)5ngK{s{`FYk>Ob#dHV@oZ}{%!vIzfp7{5`=Ey8uzZ-n)?sj+|5gzIeF8qzFd39Fvj z=h-iz(z?FqyV{F;S@({G9Pnpz$~A>V8^1c0_`S>RhsC$0sZDQ2kMsLsx=$zQK%uKe z^YLQDYcX_q{rNOVf@>iDbb@jPqcAP(Lq!$2)w~ZEpcn*!NB@bOTRY;0Lr;&z!f@DkpQ}dxdIjTp-Oyg_3~}4+SP#O`fRI zy-O@3)WB`^roe-)xK;rZcmSQjhCtLa+3zfCY7g6 zQ*z6vaPj1KdzMs)U@WQNx^TiZz;H5wtMv)AoqF;G=MJ{pr|yQBZMdw^?tNDJu$4Bo zoyeVgGm3WDLsmeq+-UHA|v({)NkH~j!-f3DCKBdmdcw=hv zZ0SMUcDi%))2F?dPQ5LcMY{C8xI15vwT`r$GwdR+u@4fdsR+{p0^sO9o4!s>5DkTws z8H{kVc%UJ#g>94>4YZ}>_2BffmQj-|QHGaK=IyA8WE6i8GP!(|XXtvzKa{yOd%@M& zpqB95Y|g6tOp?hkhmlUtJ7v!bKOhAajr1QP6UN&EogUabPQu6lEnfs^`jz$#e*N;p z%_qZ3`clnkre@+WE@C}3QVUmQoBO%eEuN|n=Bp}6UA%zRHT}tU6(m>x8r8iJr~_hX zfY~9%ratQ$cr+`N#clk#v+2$!6QP=W2M!vEub;jDkQ(_*x40n21U;3JY0?@8@n5ZT z)?uwMs!PwwxIK5cCG$Z18JLvDhi9)q0> zBSGFqpACNA(MNl^lcBA8vFy3eIvS!+X0tAQ;_<(!{E{cgQRNUXGf$64t)T-MdY?h< zgwYlxwdT?hx4UAs+FM*SZa2dYi(8Us`h4*tOc^xn^ftm572S;S&!45lE*#=)EcT9j z)G||2C)&=u5ArN0z9*UfYpZcwKP@4os@Zx+6MI@6jCud=@SsN)673uiHn{c4`0?xG z9V>--Xpc6Q+62QCHI#M@^g1ZMt8!UGB%|F%7Fh$sC1HE92Vi$|?Im7mO7v$W!Fy#*#e8-&c* zTgq~ADTZ-H)qUq-|0K{!TI|%bsNGXz?8^`;QJ<3#%rD8{bf5kUZSl;L%!LC-%J1oE zGCMlU#yhK>Mu<5Kz4-cC+$of*D;r6rgD6dCAoi)o8#GOcl4Q}PaA7bQwuzT<|M`k< zbv@djG!{RGlUnX*kpoMrg4jlRpuaFgxECTz<1yxURVzb_(Q19hIj7?(wmJl3@WrA) z(KJf&|rq* zkZs||ee)SVU&Pj{@QXm(YnfBeRp#xtj~^lG9W|@fHTBb8@gDvW&~98d(2)C*FpGpz zYYZY;?fCET?00Q|KAH<>C3Ts-_+P$PipSsY9gA_GJS{ECp*g1uSvDD{|ULXW3LIWibH2hY0=)I^CebSyns5ws1Gc~b{5 z(Z9kqcnwgU*!`%%a8;od|EyX5;o*@H`uKHGyPy5}{d}>W2coXc+apCi&klU~@6tUg zf;jNN=kCsAuv*L}i)GjdZ@Kc?PZjmUtejk-sr!$mkdtS$mF#fc$8&?wy4Zs?$idS~ z5$u?v2-50xyA|T7A1jLVZA=QpSELIL=3ugD$g5)sNbE~s;1Fp^OzosDL^v1hBf)9O z$ejj#hDysG>|{eWCT&nf%~4w|E-wc>IQ+iggjiDg)93StoUAd>^igzUo2oQUy?w{B zZgdNZ4gS2`TPJ!3X0WytjQ?@j;Qe;Rmb=y}%t*{+S2uGF`lki;SspaLh(U*|A{Ta+ zoeHgwSnBB%oD~xO2$T0O-^tO*m|CnG;l4-^SRzvXhHGw{p9%Iv9WRDf_X!$QatUzJ zn}{K!NqPO!0GA$)g|7KIV@sx%y5#QAEaSG|%WM`d4NDQ!{^g@Dkt5CCmXvr=buWjA z;)h^`SDD1JBW=}O2DCqmR`oO08(Rgtc#FT#Ztx9sSe;8>a9H4_giL3Ok?iH1$M#tv zjD@3Cd67cz@V(IAM*El#3nkw34*GX8b~GXy%l*th4p4qVzE0jYsQCHHTE@mo$2Qz0 zN_qn`!I^RE-HTFa5rq=TZhiXaWQ{3IR=xzCk|`^nvd5Y5L?w|F|uk?t6}?G52q zl2=xtO)00sBxr9vS3whGP40%-XcxJYF<%e2Mg#F=&CR!haxqV3Mt(Y!Ull^vi@Qj> z?7LWZ@>!9DB^4`sL8z!}fP^y1sc0wN*=x^(DT_9F>^Zr8X2Kz95ewb&n+o-lYhcFm zHY%g6sRO1?&2<{Z83CTp<%IpX z1`v9#vJjgCJX9Xq0?u+^+-*fAZ%_$0lZZ>>xbb_;|0*}MQHn{JOub2OShS#SGxJT< zK+;CY=Dl7uj`-du(Ij4rUJ3dkBh9-%I)3IAt;$taYbFJGXWzBWRh(Yv*~F8erlXRd zq4)p1o8@6{_APyB`SvL7>!mCeu3Hgo%J{4^m#6|$g!ZFxNG3~g;>A=Vr3y|RES#1# z@mN)9FGqV`hL(PDL4Fc9Vn}y9S>tGcm-`f+L5)*LAZx!XI0RY$(4R8TyQ#O0ePrQs z4XE4KK3^pn>Ih`O&{9fdqiE6J@4OkH3E+B0x#9nk<3%nn8*+~BM+B%9lwj|i=QSUl zId^SxBoCxs z_!kF6&i|=fun-;q=fd2VdW|5TU{`JQp2hdKEJTT`!<2>Ot&0pGb0*(7zq>;8J%i-OHB8NW05_ zbNqR0CVJG9Z1!&BDwc}$MPo}YFh5;cG zq3?yJRzMBgZ1m3`Qo-3SJC1o9cmCQd+8Xf`*}h(e+S9T>cw#X zEmfulyh|SoYUVwi5JHG86_N5LbTJ*Slj8U0#hSME!%IO`V?0Iu_wmaX(jSMmX!2a$ zjzlIz0$qMq3QiQ;R27V3_L=-t62H$W$Skti-Mifh-SYk;yuQ}$&WxR0y{k>uy-GLN z{;X1wq!G4X+~U|HvvN!Ck<0FPCAP7!xUf+O!ews2Do)V7EJP0}lz`aQ4H2I=ji_{f zyZiLjOU@JpX%gEZ-_r{+cnXG9`cg4$D4)iiIBdd-V$QO${#!pP%can9=}U_ETg-WG zV%pRP_oL$+v@{A-8Y9tI7_{ES+-zX;4uU>aO6d!tn5QSh?;6m&J10A2u8DI}%WhJ2 zBz_I}7nk+5pJuO2Pu0HLS0hW^f*#RX1fpV6FE20!!&f?pSPZoqN)RkF$Psl>+s_{& zTekM7ZT1>~6E={=4ewHq#4GbEFnsenNMGBviZNacD@RQCHOXm&?-(=N)i?mgfFkFj)R)ZRd+=p)_g6pJd>R(FGaQ5B+kSa(}!Zxf?oCyQ_uL*+xA> zyA)i|`E*@z$9F!A|5LFLD)&?ne#gMTJv6gTux>vl2mONGiuIsN?m!J%O)eG$db7u=WKf0F$~|mOxQ8!;^p?W{ z3JG|*_LTLL<>IBb%K7Hk zp!9g7!64s4K+(9=)UdnAjVW}8WovjZ9-#UzQ!==Pm&`Akc`_yp;H-wzBnB}*2!ee* zq@A9jJONrZn_%I~`68mT(M(M8)9oFj=t*3HajFu&m)o)!zHu2;u^Ui`CxdEA|+yj$jS0{(dMPk#p|gPhSP54mVNrdmFy%P z>3O8NKqOlTlpQQ4+MdBv1DHw@NI#SnRh)UDOkxL_zpgY=8!ny35O+1b#nUE>+V7-{cJ;5T!BwO$H1K0Z++@Lr5 zc3OWVgxO38uR5~~N<~damX5b)r@iICt6L#oRPT*`m3Sx@yRpXhb8-iUu8~@A=7cw{^CpDwpKRmTjDH zjol;PmtvpID}nAv99FgccORZDgUf>FHQ@4IN6WE5uO879>G~nB_ULxJnaf}*&OTG& ztJF2?3*)7!JQ4Pp0+1<_L_gQ|2`;@bLf}jZ`rmpWotz;Y=z+?4M=Qp5hH7*gg;noG zKU@8g%)n*i*kEWJ*e)%IO6@8+nDQ5B10`)HG>=T zEZ`0`{T~-*XhInI&Z16gdEZN94?GY%QEo<-U~fkl&_0Fo&A_d49fy6xQDnPRf%ZJ= zWWs>X?nttbJ80#N_pYa1uT52ABkQfGf4_M)hN>l#dLLRk_FJfT|bJ=$#Sa9EcK;~V`Dklvevtw_69``TyELVjt~)%|uk#l8B1Td&v- zennQON%mtW*Erg5xg$1F(&#l`#G!Bm2YKuu*^1>K!AgmGt^2{bS|_pk?a6!@N^bKc zU%Lxd%k5>y!pfb4>}qPhl&q9Az#hRLQ4g|mB`4yr-rO@VH!Fcjczsk)`C($qQ6_({M&`8~(= z5L|+|xi9fbvffqXph;6tKmiphK^|6c;KEp+_DMby)lpYRJq}c9LsuV7wWz- zW}xw?B3l);8sIv+d-u^Df#SErtAy)N?K9+QnIwhcnn_*O;_fO-LSRN!s%8XCDJaIU zhr<-Wq$0ADrZqobRbmxF&4pv~+*x?dkeZ${}@xRo|7| z{rWXYxane)v(4Sl18ieGp`RYsAKZa@`5eftsZGNh8)8^o0vKL;^hZ!jvSq3Pp#p-s zR8FWM^F|MoXAfP(#Yf0JDu!;^WZf-%o)&a5Ni<@G&>r?#y9w=4hPuDkk*O{TH5_1P zPb%qF3=!Nwzhh9CJ4?HXMb^fhx|1(~{!4_tT5Qch>IavWFUhuwsmDEvZp%Yj2sjPX2b5Dsi@r#Bl|1-LX9atAH#DPU4L=@~Dk*nHvK`7}! zZGiZIqQcZ^){(;GyV7H-8tdj*qSp1Z+68U1hbj81>m;H*10gSvby$)jxYk zca+G^i`E5AQHRi46URnc@eIGDzDOyrwEdRRrBlOu{~T!& zAAh6NfX2;y_KZ2RfZO4ot4rCpxsF6p0iBGDr%Pr|7Ykv$V{2bxV`{tB-&)mAyaIPM zN4pr%YN-6FlVNLQRwcPh@TY(@mKR$)U%wS?)#vaGwA;Q(%%y#PpMw{HfV=Eaf7U9LuiB~n;ZBc&b4uA&NqeT50})YzyDUi%UCAbw3?g` zlh{~nUa)6Mexd2hQ(h7G;A^Szf-Il6K|X1Tp{=O z6L6`KQ51$wtzgVR_zz9m=~$8mq}MMZ4TwuYf-p;DRdwmJ8{G(H0+(;yNDj&kWY}t_ z;;WrYc?D@o^QYCZdAvmd4oEMF^sak{=Rc-lTGX3%sv0Y8t87oZa=)MZgxDj-z>`>A zC^IBi*rXyadZEN*_hL1-JRt3Ge0-u$PW#|j>RqEOzX=CdT8LA=YER+_S2!1u+eTX~ zS`CrjUr=|#TzlZ;6kHLoq>eU2+6?$z483w_N;V7SeUPh{B{+JZpuXIc`sFz1ak=B1 z)2$namj+a59^rxe_laGtuc{RJRb1Tb2S}Za$XFk-CRH%t5v9g|hOFmtFic=p)&{yT z3=2@CPd9cm9-W7? zw3mH5Yn_$5e`nSW)nxb$%M-AqfzG~k!-OFzqoD2;zjL?T8> zF1|Km(+t+hdDH}JSGKloa`d%eUe};DCk(#D)9<=uC#r5^yKRV)K))zLx53Ag5Qn4i z(%3Nm0N!3jp_wo)vNf_D!xhEa3Jw2z{S?)^Q{vCmxW&ewalVSptcF|D=~V{Vonfet z;H3%G+L=cOX2=KFRdcuaZOzYFQWurC=%#ic2+fp)EU7OQ*tr7wd$S^R)Cxj<&JmMT z*%6kF!f{{a(3%@pGa%Th12jpBO*lG?Qri=vVw+10P~<-E#v1{${%Eev#i zeq(%B%B1giR*kp<$q6>I8tw;Mn1D{uy~^ua{|LHcsRlWi1)qiRBRXndhd@HBHCR|6 zq*iNYkFuthX^BQPA|;!>aBIS>2?8e)6&3m}A3=MR6!|gkXu(elA-agtUia|Dg4oi| zhPXqYHCj+>Y1o&0x3KjWIw`db!WTQa|`GPs@_!C|=0mtYTy5rM)$@_u>U!?uw_?-n-QKv@H!Kg`rG9XlNc+@+YxqBUfD1Qrk zAN0g7R=djJ(~eGk#b^#0#n|{&@rJMdy(i~^>0T{Y_6Dx{PST+p!3+1!+uJxO!lti* z`qflqI*J;hSO#LnFPkeyi z?K$yr^LdM&@|a&ExU4>g#v6H>t1%o{Ayv8C{G5JBR1(mLdwgFP{;um4G+hJgPjA;~ zYcVMh)Ms>w^n$qt(Yq28>9oG^k<4@TQ@RtT0(wM@Q&f4DE)(&_uZm9}D|$6_N=PfS z({UJup1pwQdSuJn9v$7elW{BIRmN3%Ym-Swx`jhn$3%)7M#~j`guc@?v|`=&xP9uS zWXj@WtHlM2#fIvEc7r{+C$GOg=$6oJ$jIQN<7H&5ZSGos-}*|51EB|95S=j5uU^Bzc2 z=ij(mK}C3Cf^7GuhsgJGWn*vuMgj8*!(Ppat9RaJyNw+`&W}okJ4*QA{NKcUEVK5g zj-bJpYZ1q1efVe>2!`6bPXwb2dh+H}5S<0cK9j7Wflh`N`ZEs7BV(hkOR0*(hs1@d zeu-byo{A@it8@K;*nkHg^Y5bHT^e9i(Y_xsPZ4Ja3hTcWnO4lzHb)h3XO|2n4_DoV zDA-*ed?yyd{1BE}I`CbBO1mf({)pQd8BJAmc7tl=Re_RL8f}z|$ZpP&Kz)QqggZ zB$X?N zhOV#02ZcpcFBRHlYjwqHcR1bqqzY8Uf4#+tdLBIIuz9k#YXkm*aC5UXRqgO#gyHf>Df;!PzkuQ!F*%i}9wm%KI5)G>&6p+4YENWLe9 zEvu#DUWaMaAAQfM)6Bo6!mDzH#9CVYIxi3in_g?0=vjx}m|sti%-|C)1{PjE=_6Islx#ZQKv<~UfRlY!!FLSmE0QC+Q#7nQj1sL2 zy?lVtL-I6K&O3!M7p(tE*hr#OaM$UG*f4JJaiPs$y#MTlklP)f^B2qJ=@YxrM?+Q= zdr%t5i6}Yu*SrvFpEc4XyUk!HsITiSDdh4yRN%{yWxI!NM~TK6O&Dqicr5CB(G_m#|5NWS`RJuTQmDOG&G!|eJ&-%n$ z&Lh#zsMsL-QKo6l`=uuciJ6Z@+C0S_#V20nwjZ68;f~PJER@-h-JbN?^zgYMD&Xp? zB1_mr7ee*GYCSftBuWe2SNNNGE~*C0u1#B8wQJh&J)57^(@Y@yy_+RmR(`mThX*1} z*tWL7u|EC*QG_L>x|Bg%4d|&!$I0Kkl)TD$!NWSgG8F8DAD;+m{;EZ<_LO(FD!e9N-LSh#BREOQ-x4q33C;Tn@3<>l9qD1rDL$vmWP2`uaK8*yRFh6a0&Ujq&t zi`m_42yg;fq(NnX8z2K$nwx#>yhw-s`?U;?x}G)?)$mY(UyCcQHlIa{LQ6Isw_f+Z zv$IKbkUt08etu6?qHa(s;S>tI``G2Kx< z|KH(=h0!mf3;H09?#Z|7+TJRBbG{H;86g^toJ{qofb8W|P-=`jul0ap&P8$7*CE); zPCpkWfi4<^QpiK##O>N;0S4E=HGGEoEhYGl$H|8OCmThK`{gnxjK?Y_bKjo?Y} z94#;1W*0(l75Y3S$qL2gVs-IQG8i{$Y{>Z!c)-%2F02Q_9QiYS-PQ3q`Sul|^KSPw z&<-BJrN&H+CSL=7l~{&z@$)9M6;k~1^j)*QPT|kZvmv+q`BSr2GTl!CO#rtli)O0u zHn{uB_X<2$yJVBj$gZnodBIES-%=bmI|YV(HxFn`e4&H^VZE!r2HBt@LvmV`!*V$sqFXd^lcMzm*Z*EkBBl5*Atim7d!FG-#qpj^hz7_5KCs#k)=E0xB+=?A-p-!_x>T=VSmH7ETFG1C@Am( z9M3`~8t@dt^tljCqNQr@Ga+lHAC6wA>3NVRoc3`2rx6C!qiiohyF?y9in-qQZ+Jw#H%h)XrC zrRDA&at456HB5;t`n?#fKZG1qBv#j=uYoVZu?S$KBzK(J=ylj^>ZCWF6og5o2V@z-l?% zD9xQZ-){4v@yGU^ZqLG;1iUX2e0J`eQd*w2{%KRxIx+V9Gk;<7OV(WA;oiuTs-~Qj zwy?Z!dy8h3U%TENLF$_@tg&Uk6=|*T!^SM}rmzeFTDlM29m>oXl;%83Z3uGk+(Cm4 z_qc8@$_g|zvC`l zi4;z#jlkK2`Rus0SDn52_$F%QHQsDjX~9Cy3@`k)?xoNTM=(WF{JJ|Cy(y>z0wA8(j6y0nB|59T&x?t9gJInq!Inv_R-8Mn7dp zqC`Y|HJivyW2gZTXMwUiEv%AENX*F$F%@3HzzefOTW(Z4k1ZBtWll;(mzBmChnU6y z(&9)bzeI=C$ahR9#RfXwO8=wff;Q=xlAqSi?Y`l8PtO1#?iX|-@gZ7J$aqg(Rm}># zN%+e4Vp0S^i2nB`&M)m>{1# z?0*sVrr}Wj;oJBidyJj3j8I6(zMGIOSt9!~l?rJ{2pKaXWDi9VqwLwT%Qp5Up{!ZL z43T{%8h6dobANyT<9{5_@w|ARm%K2y&;6NeIj{3PFD1}1A~?oRy#A;r;S{TSy#C3@ zc8q&VI^gLEyxAM{-NtZ09+ttq{7SlR`EEWKx^hu17s9!={JcvKEua>h#JIkx4k`Tc zz(#G)qNACa`2jZ|Y1rM>aMF@}I+a)XY={gD){wNalJLojNWo zHH(rL`0p*Ru2ZhHRuHoEY$B74y5Zx|5C+4e{h*_b81()9F(uB5kwTVVwg&RwAisC4 zPoz+;^@x#jA!=FMTvD5JOp`fpu#b&Df*N6yfUVgVCt*9*F7`GG0{~ zDpN0pu;K)ww#^_+dt(rhMPRamY0IMM;ikCnknKH)&LY-LfaUL$<<5{B>+%4^0Q+R( zimC`{;WgF8uwyETdoY@jP3Lzp=S0c*QdK`D)bdk1$k0h6g9?+Ap(W=pji^U&=7NfB zu>$B7e^hL>65Q>7J|Du}`S{v7k+rE@ZLT+yv~AEb1d5hdX(7FhhERfyQ%B!~7Bbtj zPG;qQ{9Pn&P*MDQts0_pDX>}r&Q$QC=eW+CbmQ;gCFrpC&^=@D0SXIueXkE%LpVCa zi=u6-CGWS`nChn{Y|Am^b%qa@oLZYqEvxY}#823Ps{fX<;47-a8Do;qTgKAqO^8YL zx{!gD^=BC2YnQmbHo9uCx9~n2QruM-0EsuXDP$N&2ZpG16#?^KX2sq&s+h(R}X}NMU%#Q;3a{E!H-vmm4xT^^%+7tW=g*beLIuAzusFS)%0iB z)ieH3pxx-(x4&Ae*g*sr{VSZk5zNRY4#{rBp``n_rb8CL%{;f|Oo?%fz9aqC2~u%_ zhfU*G7?pR^tSO5iyqEO=iC6nhMb9BO9!9_Rc30GYuk}9m!W&F;)I9{C^~elMXPY3J zM5+naP4n;tM{8JhvwnfF;xC z*){esj-7F3X`jaZW+oRz-Y!hYrF0RUpBsq>b|1uG8!@~26vuFHlN%GN>n zK(Hoh6rX)fNap|s{auSE-%g)t(mc)$UHbXdNagmSoeF1zSj?r`Jn9|cT^dMtbRhfA z;O@{A`u{d3f+X4VoPnpp$EVz8Ya4|M!tX{#>FOveNw61V&PTY9*z=f0HzSY`bPDw` z`0v(I6#;gVIfD428NoxnSpjH@?CU#fJ=H%ORkIL&MTcj&x_kmDDkD><+;9PpzEv3P z+@*@?xlU)JauL(}YmRVBFO>leGv{rY;-e#3bwqu9u*u}XTe zJ&C#jD1v-1XxvQbI)|R|lB2!pFbO4TxspP=p6eSFdDbQ$EzVjhYKWdvzJD%v zFQ4IQ4`$*LR~yWJd_8wFt*KHGKIqgu`?oW5qSVvt z^0}C$2)q38`l@>jmp-wbWP`v8Vf6`A0ljW)(@IK*13pS(Xs|$llpcF4e)BrXmbWMH*z0VS;-1x43DMyd%6gt;5jo*T+wdZodUIGfAI zLsTkmpezjl{Yn1!a;BrewF#;a6W)Axiu67(9xW-MHm8qn)9Ctm=WTdkUyj=tz4FB5m1MGS zbw=__ihf+;Lb>Mt%~qc$)0kB}X1?|sQO5AhR1I1>JkO}<*%(1#PrB#y%V|TlYYN6I zi&yJzjl>qAQnMABqR)RC11rS3oIbtUcm&aN>b6 z-k^+qntCd-JQ)jJ(afpd0yQL>uPc8#-&4KOQQzvAE!;Ca+{gWNb?p~*aT~#c@dNc| z2trsAUEO~LLS-f#%*Xc81tw8ZEIDOf^X}WvpVgF2?w3p>ed4Fb1d??>q3SoJo;*9r zI)@;dPe6Xc_`2ZqFIYj84vCZCJdlSFT=HE|U)GsBiqg9}sqd~4*HO8#6*O-Ic`a$_ z^u0_a?qQ*{w!JUo3RMU}t|gi8{#eC5{mcDiO%ERDh8cj1x&uC*B-)aSB`of~+o`h| zUp}9_X04o4clO17+t+CqPX!smK%_(r&4BU({0Qk!|3So2-V_t+1=6|tTuf>RPGq~y zlj8F23OFX_w>G4UtV)ld_yh7r6CmT1$XY$h~+H3WH5D2EX4+wh)ERv&0nkB&{ zD0AF&NcN0FUyz82%XiCrf-}|B-*;Zli%8C=sMw)7Ny~{8jqosX4Im#yVF`a~MDlkd zjlV)!76aV$Uz^75#ElKDgG%>(&1PcpZHc5mviGtXo--YZf)`Q{gAeS6CH_5rdZ!+U zTN8KasWT5ERG`k_74{xY^^sZd^)E8*0k4q446XGhyGpA~}jP z+}l|DDq-JpRs(lMNeV8LbV)ybz^Q~*FE$M>Jk+=2iweg7gGlrhecx#T+3&LPN=H%VSDTx8h5hiY zrqT~ybExh+_aSdNM(-ivf$8zp6c*TmlmD12T1Y8VqbTHp$amVS(H~4?)c$u|it`{{ zj8ix=HZ({R?#m1hbR)XdVJXf=nTrnJBQ$p1;a~Ad0%3<@ zaWd~>TnN!{uYV9fC?J>K7dS5qvi=j4c02smprSy7Ez;SVSR0Gd3$69f`c}cPzdZZh zGKu-V@~a<4_v1!bPvwA|ci59=Y`x$=$l$!bZ7A$Xqi29J*N(LzRs+OO6MhSSOq#E* z-8*+Zd)8vTK$T!BdM)->F+UNp$DM;~NT&+P_7|v;a6@BZe{K9@@s>E$tz4Uz zp{3hgciRJwUMqzvSgy6a&=Ec(FzBIabC4xrO_~5@JtSu;KZYDj8q6RBKEv@I!IQAU zD2a1mhZd5|r8+fG?O*Y&=PW(n&+p`UWQmi@=W4z`rF=`jELgXWd!PVVt@J9V;Uv^; z6SAFFc&G+d;a!siN^cnY95ITXS_TYqh8%*}c)^B-Fh?Ew_}ve~n;9v;aE&;sP#@m5 z8=6>fj@a(E!l&;+Kf#L?-bV8i^OqY+!!PL`z!tPrl!l+#zJ3{2pR&%#CdNt?AmH=J zWZJVDZPzRUOEq6xifYl%mNPGSzwoe_qnsyjbq9Mb#8Emsc6DXJryC}AaO*2t5}SyY zBB{Fx{;10Iq#D&vU+H{9=G|2YQG)!^tl2BMNHOze+8*;KPZFJ}8UfhaMe6n0P}oT; z;@!g`BJgm?%C)iD{A${^Ed85RZI=(*Wh>Uu=LPRRP&L5YG6wJyMmo-hfnXgE^eM_G z*r#khe9)F{MK=CsR~!3}dFu)hYRMb*7%j-l)R~!Ag7;Ciq+1qcwD(^A7)TGXo)i16 zwm*y)Fcw)$MQPVY@h2N+ypvMD*9{T#?2F?cvM8*R8sE61$9!$IM9Y!go_$29rsdlS+{ce0W1jqnp};SUcG zGg2R%e=Yp-kGy_24f4lxFSY-oUB4I-J&bM9>ccFST_y>Y5vN=}&hF|fR48SYTWO@s z-c`st5&VjL^*#ROUB?^O1s{Cs7L@9;oyPb=Sus6^AWHB7D^7JKRf430rzU+kVSPn> zDbe}a!QQ%V2&j+_79W)6;u|KIM7OX(m;`;c>%!NgLlG9*f#hk(Uw`sDTDv}!A_pW~B9a>d2{^v+ zp@9cW9zW@)h&8dn)%T8xJuTy}Z?u^F7|fefwHWSL<&RCX8KrZy69esN*`Q%bAjQ02 zs6ZS*JW@Xp{D$xib8;Jx3++04(Zgjt@qDY=(6=-j_JyFsIjTT1x$z?aG7Ond5hr3a zCK9_OXn7s5P$J#a4VI_|JQ_cAoZUBEwnqxh{{5>X*=(9Bm7nFUr{ckVeXb9!OWH^d zrf^|;EJA!cTnPt!+inBnu>^75!$F>8&#?Z}Hz_CmD_-fp!hBu7RjTQdaXF9&5_Abx ziQvMl)_`>pS`HORT#30qH{`@L#BYVf!+AmBZRZE#aq`(z*VpQM#xDI~r>@_15^)w; z82cFfmb6Ldz!Do)p@T@S!th&__i)&dA3MGDzA$*s*@-QnW_j{Ej5V{5 zHFpBUO(O%O{~NhQ=#zYp4JFb7#lzoDU1?1i&b!L${84Jz0;G%LR27C#|8%GXfu*Ov0N3Vg2@sxH>3)a5p1y$HV3vz`A3 zm%chs{;#(u_UXDAZy@&ve(8yV7G2{7!fz;LS- z7%n;^_0}wku?^mjlK?7vXJ_ZSr0a>cv4OwAcEjvOt1;0s>TJgyspTXOGfoSKjrqn{ zFJ}sPwR$d15ezdZr-8-Qmel2)Zj1!0mbeO^<| z%PJz(6%G_p?wh_xgA$EK;W$(TX&W>D7|!LN-G}AFf#+i0Ax)@_g*P97?9ZHDp}(oksT9ojpO3R&hxsOq zeA%!>j}cx&DQ@m^pZ#?1fKkb^2oYtx4^&kev&qldfT;4?lus%4{^wG@o$ zD_3vXeb)@pdsgeXn!mSnp;!3V2;bYKZ*~v^HNw!|#3~{M$6zQ3to7pfsMl8W6}*6w zV!+L|;U{+}XTHfE@?;x#k^h6YyT`+&Iv&x(5NXMnP_! zh9^?bk^%o>vI+knACO(ruIAxblD1~-j}@8t>#AqumYo>%tR%9???`367?8Aq z+2gGZcPD3%V7sv-x2fxg9VYm6;raP{bO|7)((hEu%!S6CIG@)_cCSX=ak(6t-mIe0 za2vD)>eAOP1%Tg_WS$Tz8ilw-5=_y}ejUYdPGQvVxusY|!GqyXW)U0BBI|^XFgklD zVL}+?PK!Yc0JVc&JPn|eSLh0~Cp7Y%>%6E!q*U;Sp@4`uPeu!A{j%3BO_5?rH}y#e zmmq^G7MH{;9z4(%U}`r|yTo`~qTjABNZ^(1LYnhWIfy=w{QLw{U2%7CE6Ic@B@M){ z7^(BawKBA>P$6(;c5@;nOg=sm!|cyjeql7T*+JsH33t8`>zAb8^_r9<(gYxP7rHA* zssi)CpSIpTYG0gGm3Woih}RYUzC#p4Uf5k`tKDBL*!ZQ-#d5 z9a|xo%3GH#qdEGcA9dLU=NhI)5ER|!Y^U}D0&5RSTom`J34V85GoCqKHa*e6cL{Pb z)4!Tl5TJh(IuNCU;u5v38J9^P2~FMeX_33S^QZy6zVPSf=dQ5*r#F-FMjB+BCqDQ+ zXb)Yci}vE(WT2sn`{m94_yxZj4KAg{VML==Y<4L0?1y08j-jaSbDHP_RosDgipPiA z?76Oq+qnL}N)t7+U`&2;^Fz=;P2|F2^&%K%w<$Gw%ZqjZLm6OBXWJntz$K`crUR*_ zq+bhsf6b%S4Xi=82E7esXsu^2`VTAorty#nUkZF=tL-0TPlwn}=d3RB1sje^kdDrQ zLQYC+wh00qxd>MOt3y(1NcE?#pND=7-*Er&XI<|W?A7tH+A_jgV>)v*_(ICBjw;vh zBcnpHnU>?0<(0Xm)~6@QzshMXmNf-ebZZc@k-x*Hx69sqNRod}6M7y?w5?6Teh5s#)pwl%0wNVG*X6yd+%}?=9hf>m*Jm3RR5Pcf_;u&!zHWlY zNg8>cdRH6Tr$agCokSzuUEKU?WW3}4dmH@SklS;+vQ(aleq1Ab023E>8rEM8H6bB( zJIUr&mAGfDygiKE+0gTK1!sp8Brld53t0^J{2d}Y0iG0n0M;h~pZN#zwpPGH6VbA6 zmgo7Ssy(hMzIr3*`YhXbFWr=9H_2E9my!suDh3ZAPK6qBP?rHhCRu2Z;ElZA?S@UOUO3 zY(5<~(N_x#Pww5qeU7w>#HtS7{hc{^>C14`Jl9>os|KOvEMK#_GM`6aM|kg= zB6wjTraWgME%P5lc|dv@?rRH3ChTbt8OmOxPU;7i8exxCIF?e)KZAqzMb3wqgBkQ5 zdh}7|)=GnNU%UUSJ1rkuh=X}zbrx|FnnD?jRb|hOPVo5uS|Nky5nJ97fxq5T50tEm z^A9xdJdH9pecd?vIV>zOc0f8(`31aohqc#M&>h&so5yR6y8K95ah*g4zO%fd2yV5qsumF=dOL(m4cf#=f@VV%^&a3y%J`L`C|1N=ch;CTYgs;XCdF<<+B1Jo@m4R1 z*vz`btzTOv(09zbV>2W+$%mnZK#-g2-{1V+vE@pO`0L<;>MLgj{Yztgdy2Wk9w+IX zR1M!U*H40oD(+pelnfauhNP9I2ud5URzsMdt-hg5wk+*fJd9E?5&;!y3{>gZD~1P? zS?`FJ>PqegUy>=MHV33l-LbHmbTu?M1t(_b#m7jkdJEZRacE^+s;*5a>Hx*ZY}Gs8G-@QBV>UQRgvX7?%$o^yBmq2SsEV zZ?3gm2)0@*9Mb|Xh=5q3!I(Uy^!Y>cx4-vHNJH>A$cZ~fM@63twTuQ;ETe-{K$)X` z`0T}@ZDiX|ZLvU(;n5mt z#{ycnb4BO(+g9;%NKiq(JmS=F=^B%4n0j&0TnU6v<%Zm#@QxX`&ekQ#V|9q+S)0e? z<0Ac-$VY{1b0t!5vZgm@qlHh2O@jWjFWv-=oxpmeKZe+tubJHDG1#&dtE2gj1iXS@ zfjIOWjGZ|p^}Lx&{*-|=L@B&@F@31|Tx9To9QYS_VBWJY2Oo!kk5Tz+h|Zo*OkSJV zJD`YcAq1Nc3!8+MxTYv?`SEO^#+~{FWv%O)@lMkqY6*IEzfayGqn`}=2^VmMV|EX~a#|Aj9}a+*#>^^i26a}ePhB;9$ei@( zwNmo?a5uPoDWBKkI76CdqJ-&lA!7l2J)1-z2u1=*vfb8q*ZQVBas5$DHl%_Q|5Cu& zZnlHBVvB637@7DkUPpcB^;?%s>nUm2tBY5=z85}cAWP(xow~jJ4{|}eqHH@iXF>Yc zyFcErC$)KK6|lfIe@ zvbos(3xAGX1P<)sh%Shy2zJyp!j3%BPzH##1R^3d^T7!aFM#QBORFdZyfy^6)SYua z^xLUl=^puU@awPP)Bq&Po`F4mHkw6eJ?YQl+5uHiYjtyXEsRJPk940=&Ld=JC^g}Y z-fd}}F7p3S_~a$BsKqSPQbySBg{i%T3{fCs;|rJ)Bj_ymbkDB&wP6O3i2gSH!@inu zo6+vJr^}K=LT~xxSh?%7LnKFK#o?*uW-%o~ngG+D;=BP{@z7ZA4?|Yeed5I48Dc`; zvOe){g+v1$Bj{>g?lXL7UikyYPDkP%4c z?^WAQ7I#xD#bUp{Aezr&k^Wq9@7))N#4cWbVy``YUQ!&w6m3ws0SCD$MCZV#;J8vd zJg`T|YAxRY6|CO8%uf=kz`HL(q2cu|Gx=FI(`Qr6gI*dt#tRN=HUxN3z;1=0HvvlS zyYyxn7vr0zLvY;N!jBpXncrsX`H749q{Q1r92IhB*%8UuDlfN*NN>L|jjV{~Nh^+F z@&RrAf@;~4BlX8aZH+%5(}(2%y@=q&_+g{Fq=^%QFrEnl0tQ5d;CL6dgNMXyG4Hm? zmO=L}nc5BNc(=zr6BB??e!=T}_6|u_(FH;B|L^r(J?whMY-W%txj++ z`R5LO<89X1{a~yjp!ci&P3mVhy9W;#j+7)j8!{*CT3T>*Z;;vSYp&iwKR@-cl4@|5 zvuCYD-Zi@1OcP+skmRT<7vXrX4Pp4=JgkW$I$~m4K&Rco9MWSL?>$w(+jUmB&nY{| zckbbi+Riz%6hHqmnY*jYf#wo+HnHeOsF~nZC{-Y*2hk{-r0=l0cY1Ni@Fb9oALD8o zP3H=4>C1U6QJQ^qjL&mO?5B_~s#_R!znhN%w6cxzFZcVfBeW;*wNg5!O5`L@pvE2bZhZbGpO$Pel1Vme@+8ae>ZoA;$Zfr)6;f(<(;g5%eZB49Ze z-MxUcp&mmn9EpKA*4Zl@Z?)NyQtbZK>RFE;2WnCCxi8ZUYcvw&9ObXEpYIoZ_GtmK zqoKQI7QExP*d6v28XCV;zisqgPdv-r?s4&C6_QRONSBxif5K)|iT|(`|-@i9rO+JhJ-Qj$`)IxvIQL%?PTGEaM`|Ka&mQfWb zhv{G@wN1D+Of`ZWw{T?#V&+ekZ{8CW&whUPY&*R7^Mu zI^8*QYw4Y+f$KZXNy|1ZN15G+5aa$7+E?PJa~lcTr_8;AusIeE%k``}yfU=R!4Zk^%is;qVeEH6GE7PsVkFifC;&3m2L?Qyw`PCfl-}qlj zE?mI%RN5|tfjtxqea_EIiFmL(U*qbg+;6)J?2cC=BLdF4yB0PvOYEQ0>~=c4bRbnT z`Tde1?Pf+&tziZ-GWf*QqPw+0h2n{H5uc|8CG4j>D9+=!hJO&MAovJFxD+>^=&~<# z8LB?$gH?T}lSoTX)0Yn|#Pap<9lYF?xU;Bo`OLhknEdz>Q=0c67=3JOaojjn?~!H^m{cv6lpKzL@!; zwY$}%K01H;Q{1U4eYL=dl7Z3{z+u1(mgV{7^`zO&$oqAkMrWJ4PcrnWb`1=R<*v}c zRO9_0V*?!KEuU$g*0&B_3lsr%`n9|+fT!crUynAK# zmKr}`jNpK!=&@`s0+2o4T_L~{A15SsID%93q1cecJT7IEud*{uIR52SUD4!^XPTN) zh;NVEpJ$YkGF2J=1Gk-_Y}=T+^uZO;a8_GVM}Zb#RdOXaC%#FU4PjCh{+RCWoc;CS z=#R9k(Bc#6&uSv6GPl0jUt*opElu{RVnfgQAs->v`As+a>mzJ*Ro78f2Id;d^Y5&( zv=dMKU_7OnIuQKEr$*IQ;x`GpyG03W**jZ`9$Fx`6Vk;1Xg`K`_W(VIA3@2H9~o+O zB%4^}C9GWh&h_U0SkQ^#E2Gg2(z$!+3z(kUhP+gH5-usE=>Opc%p*C>1JC*T!3e^% z-Ip2MJ?4!2djB_PrRzRYu|~Ebiu}uxuf4nf#tWG{Hrm6r48S2+1zG`G6g^LXgUiPg zH@!lL&(dtb!*>l-qg*?T_##BT8?8cpa$V6g(Cnn*5C0WE?UXa< z(AP_bDE?Wf>kRQA@yPNh%G<*4Yw?rP=2wh-*Uja+pBtZskPGL%-|#PFICES-+*jBL z1kNk(MG?~H5qQ?P5L|+j+oQK6Tf$sY6$*+MoW9Z9Bo~w2PVzp0Zni<*7 zbyN<5b%`x*Aq;Qcmj~jNa9R~wDjZan=lce_UnXhhm9)g`TL+1(kkuP`-hQxr=G&jF z=@70L|9zU__|b#1G+c?~_mmhTaxEW#r67G*8iYae#DOpsF?B$dB?JnX-?^o)P-Yxj zZNKJ{D>+f-YT~mY`cbeyT4Yq-amhk#*12Z_kT2^~PVo{V?X4q-+2P*qz|B&_1_I(F zNOozR)Ywz~ddu(W#&{@~%>D93FV#;rEDu5R+r;_;(WqOT^VP^79WiQRVO)H#y&Yy8 zCnUulswHiyce;&-MI{SB)dHVK3xB@PkUiuO%@P%dI*Z2wN_inC9@FS8b^T!t#{Od%2jG}NRKti9cPclde zso!1*%`Wq6g=d8_T1()+>^(3uuSP!x^9ydZA$x@bRF9v=4k^cIITu`L?QCoLFr_pY z8yw+Z?7`Vu)SB-2G|peDVq+k&8U35!Wu+9=f=BE{6j5)`+EKE1V9997@O#KGeCTo) zmb(eelA5ni_vaSd^(Ohn2b1&6pW@R0w^K1jjm0&ctgZ$-R)0l5;c#H>l|{ zTyMg%ZW&5-Hhr8{@6`Rf(SOjAOPD{K$@1Im#uaQ((jTU)VE!UeS*EGTn@AY-0vs?7 zMf%fu7nPrl;YA0C)-!){hI!7_IFCw>QE*xkUJt|em=kPC$8-+xZnm$&1`q;(CCC*( ztA7MAVh@47Ys7@uz|d;7-ZKP#hH{VI67QS5e1g`ym6_c)R++YAOj3Ak175lv7`Jqo zynxUhP4Gkg*8GIczCGvw`t)>lQ#v#}vzN=^Dy~IlAOflHFJu2!fA994n{Ooiefcxq zpamGZH>Gk}xD6t|v?uD5OKy*=U4onmdNY|ZvM9J$#a?zFT2XZF* z&QmcV$FQYOc{&FiK;?hCLN0{n3)^*0h9Xr%KQ?qWzJ6OU>!{AyT9x&T?tcfUeWz*obYTK|lX|Q#r3Q{~kbDG3iXab@!zWC(hu*9kh>|gam zfqn9>lf;j@6XX0~!jmdUAXB zzG+MHKUFFYF;DSWAljCx z;yR8ys%CySw&+vHYv83WMY+;CnxJ4JckvYNneFF{2A>kMGJCaTN>xVz8j>(y*cY`7z~=9B}282&--e2G!}lJ@k0{{cJ|OEeYN zTEq6u(`D(ybd_eHZZ&6g33wEdt<2+FW46XA&3wpqs#iPBSaXkWGYDWBkPw=t@)AVH zq}`@!k5Zj&U-X6Ei3}hg3r%uY$OJG+LyZ|~ofWHNs?ERQ?Ja%R{59xuooE_$LRPIP zt;p4BKyz{C%CRj~)h%i<0tPCfLE5F*gRb+$On{SINHohK3HKf+Nn%}ErV6utyk56| zc+Tdtllk?}ZH8W*S{G6Rl~;l637$tPV7563Z?Nr84&?H((4P;X__@)0S@oY5jNf&y z)b#$=458E!a033ppy|&9FZB!wS%bq{hdC3MTW*ILVM;wm{DXuiErul}*0c?FCTjaW zy~y`yt}HSfb%DEd8XmMB$pwj43!1AIR~m&rwtj5se|)e6GBrSCKSm1u6;U6%w6p@8 zTN_y5>ar!{;*#$@K#IE(9Qpep>kWhN4gA+XzKx71(BUo0RCRnA&yg|B`)zKkb{FLB zvg)FofGgQ;hyeqB8{l=Z`BN>D;8bY%T&dN&S9iO2y`8*g{)|PH6{BZXJ~&eaGX-J? zg(UbskQ}$;RKg3EeBHXeyDv{mhw_OXUCvLZXpCOiR2x+v!ha-meK$b=ra#gx%DVHq zwDqK|3@)=28;mHK*qp_^htBIQs!=#b8>LUxgg{8nUY6&ZGNRSIivfA4Nq0Cdf9rl%zEFRY_L|}U70vAcN2zW z>N%)2b*A4F;oiiSM1XTIYswYCfviWv>N}Fu2Ip|Mh$~O;pC+%@oNOo%(v~_AJvk~Z z z)s^3iz0P6#oMunY`(BWJG*vTJU)_eMzRtEFm_I1K%^WHTD~|ZnN+{rVUQwR}Ad!<~ zn5IqeWHh%0+)eMsaqWcdd@}hR0OIn@NMMK5q%ThLb^!E;MOZ-u%?t%MU zjrTZ(GMZ|Zi2PLpeL>7ru+W0e^Pog~k7{q`o=;>>w8K z6gTx^g)Met~!PgG|qD6n9!Vh;7@1}YT0(5nmBJR({j|#cJ z#0YG>L5G6tw`5|+I3f&RKprGt(u+HB8?DX+Dh`q$#%9na(fY`wKkok}UIHR%g%xZh z0#8w`0Y{RmE#o~bccWG3U4MCXQAPVjcb8sV&Y9NKf^!^cg3q7i{CW97<*;Q}rxvu^ zXkMZ6>-8c|qqHa{u03ljyx$OBMtGVnxEf|l%fToti1E_hB<&y1w4Y!ivKQR7C2vK; zuPz=)*f5Lsh}44Z%j#1c(W%;s&e23M2nT;2(B(=`phm7L8Y^mf#CNOCd$M24J%u;F z8{QJ}u+r@FHpgKa?-Lu{PJq*_t(Rj|cESrcVNS!{^*I(hbi8ypCt8f^WtPQA%#%P z2_>~$dmV!TvZ59Rei*Gx&qhjt?26P zcWH;F8&Ya2X&?gS7<$rq!_0%_d?5_o^1szJ2w6*ldbjDok&6M|HvJm=y@!V-Mk^Xm>jy;H5YWkxa#5KEU#qn`A0#x($7<;sj9))TnsDUo%YQyMCtm`nyW)dB)*9>}aU8437)b?hS@( zOOvf^<`u#plD}zRK20*cn z%pn3)baEIffVgEP&U=LewlI@?48xW+<`|cec7i&OJCi@Wq!Y-*29zSs&a4h z#Crx6QA@j-W1+)L^DE#~^4q}`+bx6+h@xKUkR>%Ata8^9Y@e(MC{h^+w(NdkZ5bar zh_uoQKLV53Ez`K=5aobxsWYCcYOQZxc9UyQ? z;CCl`P}ho=dUI6t9~53cmtcIH3woVK8BgB6$vmu!rh-uhIxzhKz6(_g#N>fIpE)-oz&!UWrFb_n1BwF=! zoh3s1BDs_YmQ~wGWE{&{X!S(tqTkpfD?Ug|^R?6GV+0too{Y%mLUxQ-`ONR#ebiF7 zXTq>_cvL%b0LYu~!70{g&<4%HHX3)b5t_Cq8)InbzjG^NWOYww8F-}G6)$A*!|_&pY1s6M>w4_p2IIV;j!IUgQXBe@KIaOwzJ`FFjh0;prXp#k6pgXBFUV)it|T zFt=M@a@3q*&>jAu%YohX*DFz55pZT3HvfM~Wkn*aUH%|st}rnGPw(PQUM&Mf2R>1E zM=z1m`_o0axPSdUw2HNBX{>WsNKPz$dDomH;{uEEb(S-lX*O)E^OtXY`F{A#c^yX; z^d~>3m4Xg0XgDpO3NJx_nOi;=m`p#X%TgH>wpuO3;QP*9`*Zfs=FJndigEEgQFGZh ze>eB6|3SF?Dbn;w1Va;-)|v@utzx^TEWU*nCWYo7#2&S~@9FC0#hE_04upJG zoYs;b4g3Rv8;`Kk|A52i#s{|Il`nz0)fK*Q3Vwi|gI5;$9?pqqT|kDmM`&^hX|y5l?8vV37E);{sWP>B}Od^gZ^lcS|S{ zl7Q|B;nP~N>UOyqs>}VqbjR)wL^Rk#;DZmXqZo?j>9ZIH%xW2oLtm#Bb)^+t5+gh~ z`?!{zzd}7j))YRi_`Nfwen4=E{2@9~CPrE;y6~1b+o~{xQu)-4VM$I}np7y>srLq4 zr+YD~{Cn1t^QRS}>&l{*J1rh}19mFBwrArm@T6jhmVv(=KOb=0eO@|3A~f;kOZo(o zYv$|8s@4|0C#P!C>Fm~&yJdTSC=jYP@LJ}pRrPl6y#@bIPn<`m_eNrN)3b$%xCWG1!MgU& zYq)P@C6*m1>zmvm`E#!33uNrJJiy86qFQb~<3egTkIBNI61bM;{ zAQ30*fP^(grnRu1z-P{xF3Y1q_-`S|U=!bzyC$LwyeG?Uzn!kX+0kOmgCAzyt`r*@ z5N+YTd|P$>ArzcVf6p3#u<7XN+kfPo$_-Fk{9PihYsWA$(J6aAc-j#z5S~5NvwUh= zBg*(1b9|1!;*ZY>SS9T*?JcYCti?Yp=W^6{o&;v;Hgq{iVro$MK6vfCEU+r&!Rx zbji^^qV=g_0O@^rFt}DC*DLW{BBscX$u_|~W~*kAVw4liD^vcVayXJs%&6Y4$eYM`EUn0+p8Wp2l34f6lg5X)$WwhWq4-zLYc@ z`;cv1qj^${4>|xN2H^G_NC$KdVWP4#&_Z%1e3(@qgL}0Rf8*k-^)<@oP0pk(EF&7N zf(4B2=RbCvE*M_TobQIvl%&r+ubp^9LcGKhAK32sQ$Qwhk3kcLF}wwj85H$b=AxR; z#mDHwqwvwbI@=Pze?beLK4CJhfDWQ6c_-85-Hcr{P}1qXusOT$M8j}bg5BTh9B ze}}C{3w~JH{p!y8D&L~Q?I@Suh1^{>L(B2T{k&E!KgPqi5U;^*B#0FZl}4BzgD!S( zBrXhQPLK2}Ujv&KtywUf%i#!{jx?sT9TS^G7Pr#S@y7ixyaq}#w zD{ZQ>9|{VPN_8^DsL5u_Ek1u!0hWsk`AZBIJJ~yCHc`wdQBp%cripcM1yC?7Ax_|1 z`kxt!QpJg7vFdwaUemVm_e1sK+dt-RYwztX5Y#9m%pU`6rd?jGW->vd*26`p@~)V$ zVZlEJ;1_?vmYhYBr3&o~VCqNdUv#UyM8K*s4y>wN5v;y<>8@@Iv&a%-zqh7Rl6MEmC-3J{t`e{Rg?9&gu^*I$#9o^H^R(rk-E|h9&*!U-Ovz zq1NYR{V}KSJ!*SC9k5n+3%6x0{@$QYWnCykFic2r3EXq@Um9eF`@^4L8T8K(dw&9% zbmb&3?k=ot%ap7W@%lByU|wLDq`PZ9;oIe*phHne`8k|5{UTK(j}X>l9oF;LtFsa~ z2MW(J2Pd4qtP36I>eWBba({lYDm8}HBt?P?)p-X@Du+NH2{Qoh!$EdwQST9t(>kEU zAK{-a~ymOzXGmhv{yFhWW za`E~#+S|q268{j_ktAPTd-}4&H#H+(=^0^guN3KEFBrbZL=;}z05xVQ3qyTSagS{; z*-1r1MwA{gKfkoL{5zT^rB$y{b)16?>GH^08vJTr_a^%)A3U&e&$LQLS}&umwp!Sb zyUu;SDN@hsX0R^XMyt!qJE7u&c?UdDqlIX=H0G8qpD&D`Ziwko4-9Ev}=Oxji)7H6JspCr52vOA{EmFBNx6Ci8(_re)dFiDh_tc_B0jzPhxb44&g{K+Yw5 zpr*V?f0fG%U;rS@C?xpBwM8y~wHzeH6vtZKb^9~vTn37JoG=}bvE<~PS{1~bOk=chH5ST^u@KB z*Q|@T=haYG(INny2~aBjJD_GGoTJ^Fa_GcJK*^-FPR&IwveVO39L$C0)#0w2Y|XYe z1-aCoE}rQMc8olc)n*wV527yH`P(X=W`K412;($M%iN=2Loiw%bJ`^vO&aeS`2*Zt zT-{C6PrtA3{;bgSx*XOO4L1gw!DFC8GaWTh#&(>=;*05WBkI&aU1PFfYDFyvOJjAo zh4NCCzWnI!*Jo^w4fe`{_nAMmKRa4k>H!@rAs|^$fhO(Pfr>?ob|;?98tTSj&FH)O@Cu_c@8h!z4GBAoi#uX*zgCmZ z?XXGWlWvK?XqAU=k^YGssge2Idj9xSM#vKn?`lgx3f2nd6RtEa$*Hu0GP$dIm+I0l zJY|`nmY=+KjZGoJIgw3h6?w#(D&&2YnFtzV5xnV z|8XP0CwNNjs#ovqvRWs_#bu5;`gl0%z-lFGh2*qC-~DP%tc9Ti!Ex69&;ERpnM!1c(l=!Mbjmqw zu?GgCydmYfA(; zE@eSLk1Eg(V0-!eb5VrU{dTUKt_Z65{0i5r*?976zNsJeZyXTfP%P(s%`_OX{bs|{ zq#-k1d&HW$qEO(k%_J*s4HKpaj9=Xd!1X;3$Rmo&F_FFyWw(hW3_!t9*u@X$@}k|r zX5RG^En800J$^RX*L=L@AX$0mV+A{FoOaHlYhNxY1`*l_qc_up+yE1*H1~stzR1I4 znVZLcW?h+m{=3R~w)o)d=|Jl$xvbl$rVr2nlPoMJ-hs6260e}yRsqB4(A$CicL^7b ztDEW@o?}BnFP8wc!7-m4Y@dGkrD$`sS(*3yI)d&aP4z1wc2cy_#`&+VuRUQ5V2dMN z&Yv#qzpG4bBZRgGa;MPT3Vzm&0@{>fS3n&8&O+|byN25i9M!79 zJY9$5#Qk{N(W0?sg>Pdmj>LNReTF}6RBQS;1_mZI9W#UcoF9i_fnHfaF%kGwvC7nD ziw|B&Q^!L6E$VAtis&{*Z^wRgzxBj)`oUKQ#)dXXwvEP*!l!PzksT=C#xSx_USEd@ za2dt)EI$|ryf>0`3tm7!{&jcy1ogs?vzphfhV-Bz+#>to#Zh%}WEZ(VOH(t;QupPp z8u%T>lX+7NXS zDM{b=8`^DB?Ppu{?{JgwpAK14**Z$LB;3i5z^gF)%u^@Vy(xXQ3F>h#PNCxV*8>*r zvh`nazNyqE%K2FHOaKlzKee)PNZ(XQ4siMsuq+D}<)PMd6px0nhYL6SmQ$u&uI}uL zSZuqs@4ay@%0Z}_j*`q7vi;*pY|h=uD9DHAu%V6epm8fV*rs)zw>i+$y

Wi2kB9vvK-Kn(WvvTD}Y_TktzT ziFUB2mCX%?crd!Qe|C}H>*4ehGh%#P z)Z}^?jbCD39imTltPAwiL`GgQpBK2XX^&|~)vI*k{%Hf>W*><4w5s}0MBu$DEWQ}& zn5(4Lf^;s+UMInjY(AGkPu7Jg?;tUfY3lJ6Nv3Byl@rqiDf9VCJOVGO*j3Z5E&iql zI_^9aH`sHGru&LoRBjtxz2`3AeS_`2lavCyssKKKoA#t&rdk;=KaeEr%A=vHy_Q&k z(2%8&qskoj$Goc5LtFyOdN`EQ!Me0eNeiO=j z#&?2qu}DRDzp`d8Yfu}XS^-0Y!V#9Y0AnK2{|Inq2bNUeKJLoR5EGHAa@?m$<`th+ z#CvZR363AWyldZI@?Llb-zm(SHV~i&U?!1OW;A}jj$@mD^;e6@8HB^$M43!Hjsx<& zJaziOH?e+k#8@M@=+g&bhf&zR!z5N10J9bVb z2$;Q2iJj)G0;~c{t=WZ8w((&m*eG7N9oI7=4)dY)NH&ij;*NQr`wy19y-$s>wmE`A zsbu;F28nO+^;UfyY58Jx@}^Szj=gC_Mpv+b7&J$AqQpS)AgG)K@{KXb!6lWbbGSGa zJzwRtBIi(#y9CR}V<+=EXW_N;rXJn{w(COAqL*GMVd`v{ow9xzt6Qj_Ep;+u$c)-J zqHpy6Ve;|e^Pdt%dnadmhq$HU7Eh&U_Xhqdi)51qHqoyyl$z0kA3);qR7TOO9`(P_ zJfy!H$kTrEiW}$DYGTBNsvBiLUi~|%)#F;8Iuk?ZV4<3BwCW$e)fk?swt`-H6!_4O zPg!Ra4VOS=L!*6LV{=MFg`Q{ols*)so)CBLp8J~rd}?!|;(2yuEq~SU=*I<`%2>1I zy)<7<&78+_ohEUs0+~$QMzG~uVIypWz8;&$b|9O(g8{&;@IzNN0R+kx<|H&gb8pH| zP*3zTSnEELll6*{>@qntD%X9bTIoZE^3NarR~{7bJE{Qj79A8&0RQ zDA#L-oBsR{On=9DJ6xYVq9ZH>?8B*UTr7^c%@;AN*e$_-SgdDBMbNI+PcSc@B=seJ z*b2gO)6P5Aq;CB!&U$(O!`P{>cTP_JdQ?z&=S;wtgFg=w%@1T+kJk}?UEtAZPexA~ zKH#rAYq($5)b{vcDzCfCJZ?QK=&&clzwTegNqFE!QhTNWk)MHxB3<%-x~KqnQD8J# zvRQJo)D651zi*yI^aj=V^JmQ~lNZ)*B+TzIHZealePkrFII+{cdyU=b*XtDTQw==7 z2Ky-m+R>o7&356FDu|(n=|?LPe@uP><5?>`g33f^3kp()V~?V!AX9Z*s$1 zx%Ih%u45Oyu}?9c%DRWMl@Mxqcy)LQ)%gu2;5t|Vr8*c$kb-k%vqLZ^`L4N{!~CnA z&ztY<9Fa;Ylf3OM&L16YpoSR@$v@VtavM1R@EL69DH74_#qfIyCFnZlgT!41m`R6NI+FyI6j(KlRaGa zE5xica&QyR_pU<0Kh(lqW#z}8a<%CZs1GgqW?k8SV9E zLNEQS@R>!cQ6f}$e{l93ZHHVL71S2DDAV)>*;$}08EV}T&IMxCx4OO4AXIq%N+ z(%CJS5|KygV+i=eRL;#yd6&Xs_gQzXujujlV}%4YFTBnRE{pm8uDSGP^`#PM|F8wQ znv&sOHA8FltPoRnd<8&1F`Xs9K;nbNy;iIgSvleGEp!0v;(<^OqHH{<)vn&{5jyc{ z+;-5@yUe=jWA=TxjSL;9T)K0`V;7Z=0MMd5ILgz*%vl2V7+QN$!|EhwM%hn}!*b70 zSW%&~WR>RknF~<_F;?V4iKBdQRw@~{5*Q=i)fk7`y<9Yp%`x~z)BH2Ng!Qz64h?z^;P8@I8m_IFecU?Dw+o}8?I!9pU* z^rg?oCQLPP^K$ERi6be!6mz1%_b%+@2~>rU@Md3QB@1{HvoRg52=M*?)fENfDgelQmH1U&7y`h znJbXy*!%PNq^bn4OK+(NfaqW!0#ZV3P56<)u{NB%8$(wFKA*)nVFImExPjuX)jlu^ zB@gnZ(a28px2Yy#n^1d~97`H+-4{|1O#2Kq-bo$b9DG81b&-Pm4nL zy6ECDu*db}i7Jqs8BGvQ34^|ns7saI za+u?^Q=f8hE@Z8zHAkCun71VxulrpeXLb@~e|NR_igFP-(h7zt4%O;TgQhw~+^Yrh zic8*=dkYX;HjY>NhTM}jT#}7#_bR%!loqLA#FplAsmDey;9?2Kj)Lx&pw_x{nGR>$ z(!V?jFk1Lw2^ODRAE z6@CVJw+tLB_Ji##HC~f>`^!)JzKFghf2}^8M5Wvr$KJDnl=|Y3KE4QmiqV1PHV_Oo z+SfOrcDr2qYw(7sbkVWp$v1)6e#<8nZ=-2Zr+>T&HenZjWD6e^uqw>+K5MjT{#_fP zTA7Cge7$wnAcq$uIquK|NJN5DcO_!uZ^Sr8Y^+&v$#UV2+{Xz=k93M_XpU}Z=CWzm zvmu^NMFDLX2!;}5w>_x3AdFUu^&|fdF zyOx@@dLn_!K|eA_(AC9Go3*#{ppKv~kuZ(;Ly1Xo3|ttpry4gljjvtFqlC_GXt<`G z8k;vC<}=_(o<6PiXOFzsyLk362LREzVpFjk25gv-ML81u$$l*G=($r+xSdOPtVhT10@h4PZrW%}#_02hl!8`m$y+0AphlVG zoR$imLi~$U-PA1k5B651RT8|>1~mV};_Ki-1FcFb>8OmF)T*~%Z_2F8B>EqNnmVV< zqj2_L0MD`D&W4Una zA||PiX{X-%-nboP{`=ZDUyi=Jg@c@WllDuf>o<$IIp|z-WI!hdhh!@wTX{fm1|+MX z7@)a!iqke0{*)$Ix~}=4aKqtiGRt;12lEH}RkfA)J?+4HpR5AzUHXv@uy$3{fvz1f z-?Q^Xny2`>>FVSw1wpC28)=G4%^BNAp3TKEG`}AgQ=@$!+K%Pq!L+e6@lC0ZgsAo? z-?wS#fuViD_RRhXr2A{2ABrOh4&d(+4wv;d{F4%Y8m|+c6)xLB$?u;6QJszsjNUjJ z?B~o>D2DW^vy}lVCANt`YnN(8&=P{AD<$Q#rf>hsc<8S*b1`2!eiWWHyz1dS7stE{ zk`G$t+=r0lu$g;o8hhZxQjO3Bvd$A)PyD1{cmWEXeqe8_ z&_s=uETFNEt*jj=YRVZh9~+`dTi2~OnfzH%XbDJHHALj%NQS6XETS1K=gcXjomf%^ zUR~mBW~~xxOXAcd+U>@>b)HMVtpk-l1%C-jP7yvYvt8nwwAu>dQPC>c4tW(UJ|`cp z=T{!u)t!~63{{X(4sNKPu;lc!Gg5_@esuXaI=(c%Hm|GWIDd;;AXj+`*cEqZMwMuZ z!+3T7m^>ZGmmJcLQTq|ZWqp|Xq#0i3Jsb3$cX+kjEaEgH*K~%2>$ygSX?s@R{rZnq zFx}n8%9kKvTM=ll%KxpsFaKGQio{_^X}J5<7x=Uw!7Os$h8cZ{Je5E{3HeYjj@|`H zb6mzQCg~1LkkHq1Z2 zZ}pw-p6;}eKF}ZmfL0WxE`#C(=FA5~5>aFoeEs%`g3!WFJKU`M9LS}5JGJY#WAD28 z^haUn>3IRwp@O(sV^*Qwqq}V&`qu|Iko|$B7hF|S4N@7imrB~UMsMfiizl4W%!{8&PLd@L0{h)Zr62LYH146iN@sR&uR%26@ z50Nf^aCG)u@->?N|5hA6DCYktjy_PF{C|qG(g+l1#EN2`>o@i=aqp{L7*YCWq(K09 zj851U?mSK+-NU7io;84a`wh2=%E%`frp~I_9fGL@P~=Ws*rzx)q!*4m^p)|%0Y|F5 z0WTCsQG8$W;%iE}>r4XzUCLFn5^=_N8f3{~XG=hd>~<^|YAD&gq7(g}AmKL6>WHlVCPvTul7$?#mVVe@gB zzv=ApE2^QPHi}RW>lfucSIy%0rpi!km?j$B2%ewDl7V?L%H5XgYmV3sJobs1YxW z(%8td1lcDv%fgN0A_mz@a1QrP>)))5Pbt}TPe&*9M@xoJFh|ZYWu9#AqaVqnUf7h- z0!kr{)+UE^!4pwj&7P#G&a88%_T3c#NMPpe(5$pRpY%DmVcs73AoICz^U}ip&tsJg zC3yZtQBITAXaaBw0%XDjlyF4ocK^lpA(+<}6@ECRQaF zux+TWCEaNg1xM2PkhFgS)>8#iM0M;Yp;&9S-ROd}OQ6b&T8a26at*H3ni0WTDBj+h zF3o(Zn&))fd06cC!UrEZd?|+V+5j`+f2`WRR7hLX>K+18Ox==L6p6psNM?kdrsN(k zQGYfw5O7#n^2b*%t+2_|RWzaem|A3*)0P2i7B2EbXZx<|~f6vn(A=^PT zn4<@Pneh=$gb;M8h`8DIxF9#vnPpea^aYb%-bqs@)I#g>?6O!SyifxtfEmq3kesnI z!!}UWJCKsE$E2J_@P3#N8P_U)nZCDc62EJ*xnRP_cH!4|AE~+cXK^0S^51S54hZ}* z>w!ZGZSfD`%nO@gTJ5{DsGhtNV->~O|GX_-0>`6wuitP8<%MgcNcOQ0H{kiY9%{DX z6X}v*2TGk*GbgL`zJl|6BY4n`mA+xEh3_^g^~bZre7=>=3te{&e+b@2CjAg&yN{ETq>6hI!Jem@h62EEWHy={OoRI-+(`4|n=RPml=A**b z=n3lie;tdbHs6`+`;z17+&tvAM4+t5StKPK!EJC9&b}PmjHrzN@e$Ds@nfH|ssrr96hP@Fz73e7zWnI13lnDu4{7qAq@3 z(Dvwp-y;zn)QcPA{CCAvJKEXq9KGqK6e(})kreb1MHK8wL`1v8TF4T`C87CFwV&m< zO5$ro2mNhdRI@PZT51=){39i;%zvv2#%Bl|Pty5ZW1>mwwhh%E+xcb`Be?M(|B38& zNzdQfgC{9Abv~_5&y|$kEtdR#RWHyhC_K;y_=`yATfzxiPXQ*aO9NDwW<(aGAICkB zn#4yXy1ERtd9o_r&7j5#@%(!5%G&w?2E_fMMbPeK|1E#2IcBVBxEn-_YD(hwZ4z5$ zQc)ux3nAwU_nR7xZGmFzEY)WX<{He~rQ(^BI1hQK!E_vo$3QLg%aUi8e-x7SKbTjt z$Z!wtn7VVbH-VZO?=l>}y}zKUQZak~PR^nh>WtjpZNK*?ECil%2S;L_|6i6~r3OfE z^nQZ9je1;fAd6+C8IiX~0$K7B@i=Zb->}%_Mc1a%lm*cN<4-Z_#UA#TkAIVsL`Y&r z5p}~IjY#|l-tywMMyM=&G z5+!8Gtd&F?L9ij(;RnskKQ}csxA;X*?hpjx98;#9JFDUs zc@a&FuhDP*E#w)!iy*~1x*E*^Es<>S0Eu;1G_-B$Ve&huW8WX<^Vxeg>$R}+jpzeE zTImeFx$SZD?eTlIt}8t-%r};SK?cK{VXF6%&v0>U)?|a& zptnB3A4)hs&(6PX!pFqH%DEk19P|D7Nbgg3^nG2<{I8Kn;kLWLfD3)qfh7guy12|b z7Wr&MHc4pR0RO_)a)}qp#?BjMYujhb75wZ6`*r404xE-&4%=vAz;Zy4jjN!a0yP@zwDx?JR$%*~V5x`yh z0l<;10_duts@twtf85=F6@?}Eoin*?bFS-p?#c!1$*a!Hae|WDXBZ3-Wp_j5n3qSz z%to7aDME+UD~F#kVkYl5aGZ2`ngUrPvqq)-XR9iymXa>{vWu~g5|Oj9aTX1F)|(HC-# zdlk1+ycuI}{V~Pb1B0Vzk$oH^jqT8F)$>*ZEImkg#{3;ae!QNHUc94;TK<~(jI)SjoVnW zg~$bFH8X=(EbW$A$xef@lusGj)Va%yHjos8CV=a}ksSObLVL%qK|%hcIZ7m{?iE-5 zlWiBVLF^6Yox*P~f84noM<5tZ{z~eeV&bGur;eE1Gw;DX0^(vlk^_6dd*tY!l3zPi zvZUm(^gE7*Ep~+hJ7<-veeNFknF^$Q+dh{$@kV!BZ{RfFE&H;5cHnS7o*!h$NX6X7SN}EuqU4WGl6c9KACUHvlqbty<6)MY&t9+YQZO4 zcU^3(3wOQ1wEemLWD1xBuJp56e}Z?UBN+%-(t{Ro_T$?Ym(LjAldQ6&5tpSGAzbBu zbR#Twq04GT?CF(_*;7aB^-5*6i2~FdZo|sHd`O3Y5~Ba0;5cY-(%KTDw^G7q5k|f~ zr4e2s*zQs>^djwuppS8w675x-zuJdO8^z97d`?0;{>XlSP4esz%Yvp(| zI<8F)i(lu!@ne2qKj-C=!k5-MaY-0m^6kom(?R8>`znk+*-dUOpIzo-5`S)gKile= zbT|2R>Aa}7y&NNMyNd{)ZL@~pRDI)euQv95Tx>l?k95gEvY9kgg$*e`4e+wDcbu~b zi~Q#9*y{LcxB9!NrmOiA8sfuY00~Tx#^2RtL1OEqv`Rv%)QfXIS?8?M7uNY`!eo}N zt^iK2;CNTfrwRR$^Ic}q7s~hA;I|Z=S?P z8?R(9VjSu@xnSrh0!olRK?hD~jGwv_$vu%&hGbOf@=B-E$DG=*yvA=|R0Sv)POdYW zpV-q%Q}naydC=}b328r@O_E7`oqqSabOvB|n`}d3@!9!x3nd*$NY70W%xdNPXc7;aYPG5w7bpxn9qy>YTAFpp6 z>=$HAjaH*Q3FTNu$kKlDBYu1bAVE7TuO?cLt&Jl>hol}oejT7Um=F-9g7K|~~~!y)Z2AR$?xuU%!N~iZI zKdj8Wf7@IvN#vgtjpAMmmdk;YFRsQN6hepIe|G0b7|#rG$?N?eA!gKJT1LYsBehi2 z%V0V}gy&j5FyEMza}xV6WTR(F)Q)MdTjX|WuNyMa8inwVEox4F%GZgj?u0VGh2b0K zC24766)?ZW^{S1@cZ9jp<@aj>Dfn)4|7KPNUoOX*nGRFk@R)v$s&EhZlM3c%+s^UQ zo|Slm8iq)n<^5t|DQWv*&a97iDsbc7KXl5etx>aN#7bv zKHEEEhr?j&vXR$%YU7OWo7~*M9H0L{_DrHR3{)h#Htc%fL`#L%^L(uO{O^viaVdT2 zkeOl$*pPqBySP|8=kEOjWl}qSukYH;#s!BihuIyk-et-_0cT62_I@!Fe*$~PGvlz3 zV(mGgdWbx1ZDj&Ff*eTakV15dBZ;`gR!->TFn*yek^DAS-(c!}t`A{k$G2Y^p$@V? z|ASrD-wOwP*qdbd5|+jv-h*U0P)RnU#6pj5fKygF?_9DE;cJHmz|m0p8Flxg@@Yqp z?4iW(&!j3}XfiUsKh=z+v+EK7r&%YK=zgi16Dukj(wi!71%yAC&tAU$)- zluJl4{&SMJ+Go|JH-^1qL@ES%>h^6MKDesj%!qfi6N#NJyELV5EfQZTU7bxawN)aQ z0($P3ax9+er7ZBSeUtWNXopc9{s;213C(}_Ve3H2xa04~FIw>r5nSI05gy%IO?s3Z zU%+cvft~B3OhF*d#3FQ`)ttx`}T zF_kCOpNj1I>oWij_M!`_zeE#OS1J09HI`TLp$o+vzX4Vtyt-8v+hK(fr*T2`<&f~h z{3mp9G92f(y{l(@8$Wym<%=Vw7=e1N^gr7~`Q!Wcgfo|7F@7&3Co1{Cf^6N&0eMg^ zk=ANJWsV+&7j^d>M0a5FQUvVNwz_L! z`P*IOwa&$J5D~dGlqWX1efQxoSEzAkomo#lkKbU?O%C#-K;MA>mEorA_4|J^+|auK z6`TGIkK1}hJ%%X?vj!GQoJuQT1Fb?M= zIFw)!Q=~pR`z5S53R3vpg=2zlZuUut{E#9Yug4NlY*2pJZxNX{exVuDubfNjnv!pI zq&Nqk<4w*IK4q;RPiLo+GrIo=kcZEV$d;@pT0ZY(AV;D+o&qo=jO7--Dz8L#Z1JVq zH+9%5&};a%*~E54EsiEwzLZx%8lunjgqdcND&5CQ$eQ`07A2@5mD9_bpWHm|uCio0 zQ;uKOGUWSmUjhU`SqP-@EMRe(siana>#*Uh>O<}=D!@FS04w@E7LN4nApP!G)ciuW zE+O;v-Tr%{}V!Zf?`53&ml>^&x)j%9ji^opI{^N)h zq6rYH-nc=D79-7$`0#BNTe5y;S=`H-hFqK3BHJJI6`NPooZS^1HLj}D880L>G-=jH=oq~HnRT7Idi=z>S|ln z?`@k>Z!pbvso>ZSjuHE3i)<$X?&hkAwQ-s8eM`mRV>GpzgyDxy_M2H=jas~qg155S zKhE?Xy;0wRHYUSOkR&tz6fE~E+XN)Q=Ak6UxnWJ7opl3QZqSfc-J@eszrI#AUJ+*V z?jS@OP@sPqlvgJXKm`PVCyx4C5{veSN7EFS%EP}XcHuYsCueumf`4l{2|m8-DsR7V zWzpktCjIhFYsEQ`&=>uTrjkfuM%Gj{gUo`ZE4k9XSZ^xRVf9uoN)hQvovY8w+%s8A ztjT8q4mY{8i`~HAGy^Ncf z06RPZLNf0+l|TcBfTEt@g?|z*zJ_8PPXe5=T2#T9-^fh^)IG8&_$TEpRU{(-U+lvJ-kSSRgcJXtM-d!feLT+Jm$!KlKiK7Qc z?+qv)th80dcTylF5G6fBdg}H-1u~^6y<2~)bekAqI&b19a^&MB=cKQZ%zlqQUdE>- z?EmNln4;$kk;ys@h**WFnzhCvreHIGnR|pyFUtHiOsCMhV>P+GbkmS+aVWqvpid@S zH@o&ES8H4(%nUsVz{TSKg2gg^b}hDc(0(+`C5GXy@(=&Pru$0&gZ;8%q94(qDk1)q zIH4fB4J<7Kb>s1H3rkF}u)sRt;#)=R^C4;MsMSaL(=S5~nT-E~F{J)MNaRD3eOvW; z24dh;ZprL_T4MX3mMBm_5U9+N059o)2IZ)JpO2%iX?PB@ZV&$VXg-}qWe>h}=?gvT z4IpUlqot;@(T;giT#0n0i7DiP>OTR@XxiWtuK!@t8*2#4|9phHQ8jG?8xJsqM+VHKFi7>eDyc`fBGdR;%C%Eli#6LZ3;p zvFd|-mhoG&cT&9tl8ba_Xzy#jmnx`p=gWX3y@XL&D+^-NrzU0#tP)H(eK zJJAKd_=}h9Y*Az_+w5vD88-RFJ9`j z$`lF6L_KxZA$z}RQ?R2c(Sbg1do;t=RZOa%LFwB?% z(2MiNi;nAu7BhP;Mv5@3{T)%Ae|_k^_Gj$AU`QBDbXEL}SwRtd`(9E`06Hq=T~3H+ z#`p+hr9&k5xzV(Y*9kTR820Z^r2G&I?1Wu@@9TMZXQtt7;QT1V7b$>%%*>Q$SY!tih^beONxFZ0a@7dzB=HWsiZ;o&Z@Sz?P$N@pnRvo&FT6PV}&_(j9}s*{<| z8<-#NT71(IZyI4LlssrJ(o2BrUBC6CX1fQ*j3*}93o7oHoV!tabUg?5|9%><|Gzv8 zd9ffwu8f7)Yg=mq*NM9tFma*POmpcxZ`XSaVyig@zxiSG^2M}QALWJeOZXG_{zbhx zl2WwaJdfHn-MSu9e6MI3pgA+)9}dEPodr(mMU!QW`iBUM0w%qKO{04O>VEgYvZ9=q~A;Z`G z0#(^Gqt(m@jcYmw1q>-RcWupCqXzUI7TuB_0YKcFrsp0O8HSHlrFb!XFxnl21q=|f z8B$DG8hY9IVGIo(e@0kw7q9Ku7+dMR286i>I{Gd0rb53F?)%*O@~ln(-SCL^Kp(#c z$LMqPw0cP-z=oCymDm>p^sj?#IWQ*PNBkcigx`$)y7qc)@?XaVuxDH+#BC}#ALm?P zYk?_){%^uD-qU^RFE}5_F~Eo!J#wOP$lU*tpm9DgF7g;x>+_(0{laA>o-cvILnhAf z)XFd~qO6#G(nlNCsJ-hWFl})zCgI@Lv}MUjQX1p6@E{}sCj#{s5~GS z_dL@e(`ui!7lzyH;t}QmLk^iyR@hL4LWHhugy!}B?i8ZlWf*ONRsI8qs#K)D98*b_ zE|zDO_1fw7TSUf2>b8NELBz+3s1ag?O#UO8{4<#re?2Q628Y}a&6?vZ`wX|EN60a~ zc-XI=)L=Kf;-nFA(n^`QI$(oxDP#SSF&Zf~%ky-ZY$F>rs7?wT7pX2@f8bNkV0PqO z4lHGHX7@ISknH%B!&@(&^#V1n-Q6LD2&84aWulj(^0_L3+zKf7`uF>uA-|h!S>sA7 zOiDVt&!|j$o@|V;an7(qsvUzHayd0*u4M$i&KOvI(T;fzfL{#Yq?kyHmGi^^wBTl0v1lR8 z!|P|SFGbOObAHflCkH>@{k%UQo%7RAIsMm}e#`;Pp@phwpJ>nN`W)77^v zRo)AK?7{pzSqS0}%x5@tV+_SkAPsbc4vKHlHs6gmpU(DiIV~?saZI+VuVv? zc&4uFTH&tPTNCa2)3%R)^^`Y&8#E?@lA$ZTukcrQx;wp2@VN2bv5Mil1q+EbxW}rq zrC2cbQ10$CuCgKIlN6tg=ea1{Ung*|+~>n~Mf>-I!qBK3V}b$}u}rS}m(gNG?;F%kbp zXRKyh%~H^6)dnP(;rCwUzobMp94wD(waV1j<(NkCea~Qct)i0hQtG23`wrc;(ICWQ z%j$y1LjR`WaSf#_R;IT-*@@jlx2M5xW(h^7LwvjTJ~%{ zFHBuqbeU74Bc-wyWaQj*X2Rp zyf@d9+d)lL1A=Q%ekjoJ=of!PEL#$TMT2LaJoQ2(J_mL~hqgX5%PsddQUuOC`ZhW) zL5^YikDQdyUpo_+M);xEi#-#57!UQ$#lmHNK!7auJ7zI61pzMEyB-a^1$V1v8H}^) zTJhU586!3?mIMca`^)Z|Z(FAsqKf%#8KroW_luZdB60;ReL6)RQQ+%UogR(GzDe-o zZV$x8ODoCXwzVXO;OM}`1UVQoIAs&u)UcXnZNhI0>&oxVY$ZvI+ZnP*B2?c3)d%8= zRq)#~yuSXsSV{)8Qna>L;_0OgzVp)KyO_WZ`eO4~E=clN2klJk9=!D!1Pz((#44md z1H)2JU4(~S(ME$yMXoi_Kml{_RjRGfwmKLfC$>T%cLvEMT-R*7zsa5H6rAo}>tQwJh*sjyzSX*`vc@as{x%SzRC^BQeKB zhu_YSi5n|&h1wntdgPBp_kux6d1)6ssIfC-4TxEMo|FSbxc9OsSQ=cgv7T-o^NUe}x1uemr6ExXDAM9E@#$1J9z$~&QEh=`YE^-A7eFNr&jNqOQBZo`= z&##AoS%h^zT%v#YC+xo#z%cT>K?`yPOxKG*3!vdMv)aX=1@O&*(7~I&0q{}68F|nG zxaj?3$`%G%0J6frwk7L~$o2nwoTF!%LIxT7*}w={j51|gU?6*)feojwwFf#h)xNHK z`Pui$Dakh-U})kIrSy?%#_M7{*Xke^&S!h;z1AAa`6dAk8iA6{CwwWtQg3 zqu=1y{bZR!xv#99VK*BOgfmmaz#I}4t=u1n|1g0m>-^Kv=|5_A@iOi$@83p>=Xncx z$^y}u)QrU!zyp;Ih^5IF?i^YHjcQmxG7d;8Jo#6@X@mJmA6rzu&jdBf5dTm8ODayPo(`M_606u+-S!Q8 zUYB+zxsDF*7h4$oHecuV*+;bpbk`h>dCrsuevouuZax^?u6#CflM&lQ Define data, use random, -##-- or do help(data=index) for the standard data sets. - -## The function is currently defined as -function(evalRoot) { - fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); - fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); - fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); - fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); - fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); - fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); - fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); - fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); - fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); - fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); - fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); - fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); - fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); - fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); - - eval = list( - AlleleCountStats = NA, - CompOverlap = NA, - CountVariants = NA, - GenotypeConcordance = NA, - MetricsByAc = NA, - MetricsBySample = NA, - Quality_Metrics_by_allele_count = NA, - QualityScoreHistogram = NA, - SampleStatistics = NA, - SampleSummaryStatistics = NA, - SimpleMetricsBySample = NA, - TiTv = NA, - TiTvStats = NA, - Variant_Quality_Score = NA, - - CallsetNames = c(), - CallsetOnlyNames = c(), - CallsetFilteredNames = c() - ); - - eval$AlleleCountStats = .attemptToLoadFile(fileAlleleCountStats); - eval$CompOverlap = .attemptToLoadFile(fileCompOverlap); - eval$CountVariants = .attemptToLoadFile(fileCountVariants); - eval$GenotypeConcordance = .attemptToLoadFile(fileGenotypeConcordance); - eval$MetricsByAc = .attemptToLoadFile(fileMetricsByAc); - eval$MetricsBySample = .attemptToLoadFile(fileMetricsBySample); - eval$Quality_Metrics_by_allele_count = .attemptToLoadFile(fileQuality_Metrics_by_allele_count); - eval$QualityScoreHistogram = .attemptToLoadFile(fileQualityScoreHistogram); - eval$SampleStatistics = .attemptToLoadFile(fileSampleStatistics); - eval$SampleSummaryStatistics = .attemptToLoadFile(fileSampleSummaryStatistics); - eval$SimpleMetricsBySample = .attemptToLoadFile(fileSimpleMetricsBySample); - eval$TiTv = .attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); - eval$TiTvStats = .attemptToLoadFile(fileTiTvStats); - eval$Variant_Quality_Score = .attemptToLoadFile(fileVariant_Quality_Score); - - uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); - eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); - eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); - eval$CallsetFilteredNames = as.vector(c()); - eval; - } -} -% Add one or more standard keywords, see file 'KEYWORDS' in the -% R documentation directory. -\keyword{ ~kwd1 } -\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd deleted file mode 100644 index 67c2c7b28..000000000 --- a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd +++ /dev/null @@ -1,55 +0,0 @@ -\name{gsa.read.gatkreport} -\alias{gsa.read.gatkreport} -\title{ -gsa.read.gatkreport -} -\description{ -Reads a GATKReport file - a multi-table document - and loads each table as a separate data.frame object in a list. -} -\usage{ -gsa.read.gatkreport(filename) -} -\arguments{ - \item{filename}{ -The path to the GATKReport file. -} -} -\details{ -The GATKReport format replaces the multi-file output format used by many GATK tools and provides a single, consolidated file format. This format accomodates multiple tables and is still R-loadable - through this function. - -The file format looks like this: -\preformatted{##:GATKReport.v0.1 TableName : The description of the table -col1 col2 col3 -0 0.007451835696110506 25.474613284804366 -1 0.002362777171937477 29.844949954504095 -2 9.087604507451836E-4 32.87590975254731 -3 5.452562704471102E-4 34.498999090081895 -4 9.087604507451836E-4 35.14831665150137 -} - -} -\value{ -Returns a list object, where each key is the TableName and the value is the data.frame object with the contents of the table. If multiple tables with the same name exist, each one after the first will be given names of "TableName.v1", "TableName.v2", ..., "TableName.vN". -%% ~Describe the value returned -%% If it is a LIST, use -%% \item{comp1 }{Description of 'comp1'} -%% \item{comp2 }{Description of 'comp2'} -%% ... -} -\references{ -%% ~put references to the literature/web site here ~ -} -\author{ -Kiran Garimella -} -\note{ -%% ~~further notes~~ -} - -\seealso{ -%% ~~objects to See Also as \code{\link{help}}, ~~~ -} -\examples{ -report = gsa.read.gatkreport("/path/to/my/output.gatkreport"); -} -\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd deleted file mode 100644 index 0a8b37843..000000000 --- a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd +++ /dev/null @@ -1,48 +0,0 @@ -\name{gsa.read.squidmetrics} -\alias{gsa.read.squidmetrics} -\title{ -gsa.read.squidmetrics -} -\description{ -Reads metrics for a specified SQUID project into a dataframe. -} -\usage{ -gsa.read.squidmetrics("C315") -} -\arguments{ - \item{project}{ -The project for which metrics should be obtained. -} - \item{bylane}{ -If TRUE, obtains per-lane metrics rather than the default per-sample metrics. -} -} -\details{ -%% ~~ If necessary, more details than the description above ~~ -} -\value{ -%% ~Describe the value returned -%% If it is a LIST, use -%% \item{comp1 }{Description of 'comp1'} -%% \item{comp2 }{Description of 'comp2'} -%% ... -Returns a data frame with samples (or lanes) as the row and the metric as the column. -} -\references{ -%% ~put references to the literature/web site here ~ -} -\author{ -Kiran Garimella -} -\note{ -This method will only work within the Broad Institute internal network. -} - -\seealso{ -%% ~~objects to See Also as \code{\link{help}}, ~~~ -} -\examples{ -## Obtain metrics for project C315. -d = gsa.read.squidmetrics("C315"); -} -\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.vcf.Rd b/public/R/src/gsalib/man/gsa.read.vcf.Rd deleted file mode 100644 index cffd35e8f..000000000 --- a/public/R/src/gsalib/man/gsa.read.vcf.Rd +++ /dev/null @@ -1,53 +0,0 @@ -\name{gsa.read.vcf} -\alias{gsa.read.vcf} -\title{ -gsa.read.vcf -} -\description{ -Reads a VCF file into a table. Optionally expands genotype columns into separate columns containing the genotype, separate from the other fields specified in the FORMAT field. -} -\usage{ -gsa.read.vcf(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) -} -\arguments{ - \item{vcffile}{ -The path to the vcf file. -} - \item{skip}{ -The number of lines of the data file to skip before beginning to read data. -} - \item{nrows}{ -The maximum number of rows to read in. Negative and other invalid values are ignored. -} - \item{expandGenotypeFields}{ -If TRUE, adds an additional column per sample containing just the genotype. -} -} -\details{ -The VCF format is the standard variant call file format used in the GATK. This function reads that data in as a table for easy analysis. -} -\value{ -Returns a data.frame object, where each column corresponds to the columns in the VCF file. -%% ~Describe the value returned -%% If it is a LIST, use -%% \item{comp1 }{Description of 'comp1'} -%% \item{comp2 }{Description of 'comp2'} -%% ... -} -\references{ -%% ~put references to the literature/web site here ~ -} -\author{ -Kiran Garimella -} -\note{ -%% ~~further notes~~ -} - -\seealso{ -%% ~~objects to See Also as \code{\link{help}}, ~~~ -} -\examples{ -vcf = gsa.read.vcf("/path/to/my/output.vcf"); -} -\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.warn.Rd b/public/R/src/gsalib/man/gsa.warn.Rd deleted file mode 100644 index 0b9770b5c..000000000 --- a/public/R/src/gsalib/man/gsa.warn.Rd +++ /dev/null @@ -1,46 +0,0 @@ -\name{gsa.warn} -\alias{gsa.warn} -\title{ -GSA warn -} -\description{ -Write a warning message to standard out with the prefix '[gsalib] Warning:'. -} -\usage{ -gsa.warn(message) -} -%- maybe also 'usage' for other objects documented here. -\arguments{ - \item{message}{ -The warning message to write. -} -} -\details{ -%% ~~ If necessary, more details than the description above ~~ -} -\value{ -%% ~Describe the value returned -%% If it is a LIST, use -%% \item{comp1 }{Description of 'comp1'} -%% \item{comp2 }{Description of 'comp2'} -%% ... -} -\references{ -%% ~put references to the literature/web site here ~ -} -\author{ -Kiran Garimella -} -\note{ -%% ~~further notes~~ -} - -\seealso{ -%% ~~objects to See Also as \code{\link{help}}, ~~~ -} -\examples{ -## Write message to stdout -gsa.warn("This is a warning message"); -} -\keyword{ ~kwd1 } -\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsalib-package.Rd b/public/R/src/gsalib/man/gsalib-package.Rd deleted file mode 100644 index 2b8d6db9f..000000000 --- a/public/R/src/gsalib/man/gsalib-package.Rd +++ /dev/null @@ -1,68 +0,0 @@ -\name{gsalib-package} -\alias{gsalib-package} -\alias{gsalib} -\docType{package} -\title{ -GATK utility analysis functions -} -\description{ -Utility functions for analyzing GATK-processed NGS data -} -\details{ -This package contains functions for working with GATK-processed NGS data. These functions include a command-line parser that also allows a script to be used in interactive mode (good for developing scripts that will eventually be automated), a proportional Venn diagram generator, convenience methods for parsing VariantEval output, and more. -} -\author{ -Genome Sequencing and Analysis Group - -Medical and Population Genetics Program - -Maintainer: Kiran Garimella -} -\references{ -GSA wiki page: http://www.broadinstitute.org/gsa/wiki - -GATK help forum: http://www.getsatisfaction.com/gsa -} -\examples{ -## get script arguments in interactive and non-interactive mode -cmdargs = gsa.getargs( list( - requiredArg1 = list( - value = NA, - doc = "Documentation for requiredArg1" - ), - - optionalArg1 = list( - value = 3e9, - doc = "Documentation for optionalArg1" - ) -) ); - -## plot a proportional Venn diagram -gsa.plot.venn(500, 250, 0, 100); - -## read a GATKReport file -report = gsa.gatk.report("/path/to/my/output.gatkreport"); - -## emit a message -gsa.message("This is a message"); - -## emit a warning message -gsa.message("This is a warning message"); - -## emit an error message -gsa.message("This is an error message"); - -## read the SQUID metrics for a given sequencing project (internal to the Broad only) -s = gsa.read.squidmetrics("C427"); - -## read command-line arguments -cmdargs = gsa.getargs( - list( - file = list(value="/my/test.vcf", doc="VCF file"), - verbose = list(value=0, doc="If 1, set verbose mode"), - test2 = list(value=2.3e9, doc="Another argument that does stuff") - ), - doc="My test program" -); -} -\keyword{ package } From ca35defdcd4852f367a9f62655d4440d9a1a83ef Mon Sep 17 00:00:00 2001 From: Kiran V Garimella Date: Wed, 27 Jul 2011 12:29:43 -0400 Subject: [PATCH 044/186] Moved gsalib sources from private/ to public/ --- public/R/src/gsalib/DESCRIPTION | 10 ++ public/R/src/gsalib/R/gsa.error.R | 12 ++ public/R/src/gsalib/R/gsa.getargs.R | 116 ++++++++++++++++++ public/R/src/gsalib/R/gsa.message.R | 3 + public/R/src/gsalib/R/gsa.plot.venn.R | 50 ++++++++ public/R/src/gsalib/R/gsa.read.eval.R | 83 +++++++++++++ public/R/src/gsalib/R/gsa.read.gatkreport.R | 64 ++++++++++ public/R/src/gsalib/R/gsa.read.squidmetrics.R | 28 +++++ public/R/src/gsalib/R/gsa.read.vcf.R | 23 ++++ public/R/src/gsalib/R/gsa.warn.R | 3 + public/R/src/gsalib/Read-and-delete-me | 9 ++ public/R/src/gsalib/data/tearsheetdrop.jpg | Bin 0 -> 50343 bytes public/R/src/gsalib/man/gsa.error.Rd | 49 ++++++++ public/R/src/gsalib/man/gsa.getargs.Rd | 57 +++++++++ public/R/src/gsalib/man/gsa.message.Rd | 44 +++++++ public/R/src/gsalib/man/gsa.plot.venn.Rd | 75 +++++++++++ public/R/src/gsalib/man/gsa.read.eval.Rd | 111 +++++++++++++++++ .../R/src/gsalib/man/gsa.read.gatkreport.Rd | 55 +++++++++ .../R/src/gsalib/man/gsa.read.squidmetrics.Rd | 48 ++++++++ public/R/src/gsalib/man/gsa.read.vcf.Rd | 53 ++++++++ public/R/src/gsalib/man/gsa.warn.Rd | 46 +++++++ public/R/src/gsalib/man/gsalib-package.Rd | 68 ++++++++++ 22 files changed, 1007 insertions(+) create mode 100644 public/R/src/gsalib/DESCRIPTION create mode 100644 public/R/src/gsalib/R/gsa.error.R create mode 100644 public/R/src/gsalib/R/gsa.getargs.R create mode 100644 public/R/src/gsalib/R/gsa.message.R create mode 100644 public/R/src/gsalib/R/gsa.plot.venn.R create mode 100644 public/R/src/gsalib/R/gsa.read.eval.R create mode 100644 public/R/src/gsalib/R/gsa.read.gatkreport.R create mode 100644 public/R/src/gsalib/R/gsa.read.squidmetrics.R create mode 100644 public/R/src/gsalib/R/gsa.read.vcf.R create mode 100644 public/R/src/gsalib/R/gsa.warn.R create mode 100644 public/R/src/gsalib/Read-and-delete-me create mode 100755 public/R/src/gsalib/data/tearsheetdrop.jpg create mode 100644 public/R/src/gsalib/man/gsa.error.Rd create mode 100644 public/R/src/gsalib/man/gsa.getargs.Rd create mode 100644 public/R/src/gsalib/man/gsa.message.Rd create mode 100644 public/R/src/gsalib/man/gsa.plot.venn.Rd create mode 100644 public/R/src/gsalib/man/gsa.read.eval.Rd create mode 100644 public/R/src/gsalib/man/gsa.read.gatkreport.Rd create mode 100644 public/R/src/gsalib/man/gsa.read.squidmetrics.Rd create mode 100644 public/R/src/gsalib/man/gsa.read.vcf.Rd create mode 100644 public/R/src/gsalib/man/gsa.warn.Rd create mode 100644 public/R/src/gsalib/man/gsalib-package.Rd diff --git a/public/R/src/gsalib/DESCRIPTION b/public/R/src/gsalib/DESCRIPTION new file mode 100644 index 000000000..6116e8c66 --- /dev/null +++ b/public/R/src/gsalib/DESCRIPTION @@ -0,0 +1,10 @@ +Package: gsalib +Type: Package +Title: Utility functions +Version: 1.0 +Date: 2010-10-02 +Author: Kiran Garimella +Maintainer: Kiran Garimella +Description: Utility functions for GATK NGS analyses +License: BSD +LazyLoad: yes diff --git a/public/R/src/gsalib/R/gsa.error.R b/public/R/src/gsalib/R/gsa.error.R new file mode 100644 index 000000000..1c6a56046 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.error.R @@ -0,0 +1,12 @@ +gsa.error <- function(message) { + message(""); + gsa.message("Error: **********"); + gsa.message(sprintf("Error: %s", message)); + gsa.message("Error: **********"); + message(""); + + traceback(); + + message(""); + stop(message, call. = FALSE); +} diff --git a/public/R/src/gsalib/R/gsa.getargs.R b/public/R/src/gsalib/R/gsa.getargs.R new file mode 100644 index 000000000..94613bf93 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.getargs.R @@ -0,0 +1,116 @@ +.gsa.getargs.usage <- function(argspec, doc) { + cargs = commandArgs(); + + usage = "Usage:"; + + fileIndex = grep("--file=", cargs); + if (length(fileIndex) > 0) { + progname = gsub("--file=", "", cargs[fileIndex[1]]); + + usage = sprintf("Usage: Rscript %s [arguments]", progname); + + if (!is.na(doc)) { + message(sprintf("%s: %s\n", progname, doc)); + } + } + + message(usage); + + for (argname in names(argspec)) { + key = argname; + defaultValue = 0; + doc = ""; + + if (is.list(argspec[[argname]])) { + defaultValue = argspec[[argname]]$value; + doc = argspec[[argname]]$doc; + } + + message(sprintf(" -%-10s\t[default: %s]\t%s", key, defaultValue, doc)); + } + + message(""); + + stop(call. = FALSE); +} + +gsa.getargs <- function(argspec, doc = NA) { + argsenv = new.env(); + + for (argname in names(argspec)) { + value = 0; + if (is.list(argspec[[argname]])) { + value = argspec[[argname]]$value; + } else { + value = argspec[[argname]]; + } + + assign(argname, value, envir=argsenv); + } + + if (interactive()) { + for (argname in names(argspec)) { + value = get(argname, envir=argsenv); + + if (is.na(value) | is.null(value)) { + if (exists("cmdargs")) { + assign(argname, cmdargs[[argname]], envir=argsenv); + } else { + assign(argname, readline(sprintf("Please enter a value for '%s': ", argname)), envir=argsenv); + } + } else { + assign(argname, value, envir=argsenv); + } + } + } else { + cargs = commandArgs(TRUE); + + if (length(cargs) == 0) { + .gsa.getargs.usage(argspec, doc); + } + + for (i in 1:length(cargs)) { + if (length(grep("^-", cargs[i], ignore.case=TRUE)) > 0) { + key = gsub("-", "", cargs[i]); + value = cargs[i+1]; + + if (key == "h" | key == "help") { + .gsa.getargs.usage(argspec, doc); + } + + if (length(grep("^[\\d\\.e\\+\\-]+$", value, perl=TRUE, ignore.case=TRUE)) > 0) { + value = as.numeric(value); + } + + assign(key, value, envir=argsenv); + } + } + } + + args = as.list(argsenv); + + isMissingArgs = 0; + missingArgs = c(); + + for (arg in names(argspec)) { + if (is.na(args[[arg]]) | is.null(args[[arg]])) { + gsa.warn(sprintf("Value for required argument '-%s' was not specified", arg)); + + isMissingArgs = 1; + missingArgs = c(missingArgs, arg); + } + } + + if (isMissingArgs) { + gsa.error( + paste( + "Missing required arguments: -", + paste(missingArgs, collapse=" -"), + ". Specify -h or -help to this script for a list of available arguments.", + sep="" + ) + ); + } + + args; +} diff --git a/public/R/src/gsalib/R/gsa.message.R b/public/R/src/gsalib/R/gsa.message.R new file mode 100644 index 000000000..a2b909d3d --- /dev/null +++ b/public/R/src/gsalib/R/gsa.message.R @@ -0,0 +1,3 @@ +gsa.message <- function(message) { + message(sprintf("[gsalib] %s", message)); +} diff --git a/public/R/src/gsalib/R/gsa.plot.venn.R b/public/R/src/gsalib/R/gsa.plot.venn.R new file mode 100644 index 000000000..b1353ccc1 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.plot.venn.R @@ -0,0 +1,50 @@ +gsa.plot.venn <- +function(a, b, c=0, a_and_b, a_and_c=0, b_and_c=0, + col=c("#FF6342", "#63C6DE", "#ADDE63"), + pos=c(0.20, 0.20, 0.80, 0.82), + debug=0 + ) { + library(png); + library(graphics); + + # Set up properties + for (i in 1:length(col)) { + rgbcol = col2rgb(col[i]); + col[i] = sprintf("%02X%02X%02X", rgbcol[1], rgbcol[2], rgbcol[3]); + } + + chco = paste(col[1], col[2], col[3], sep=","); + chd = paste(a, b, c, a_and_b, a_and_c, b_and_c, sep=","); + + props = c( + 'cht=v', + 'chs=525x525', + 'chds=0,10000000000', + paste('chco=', chco, sep=""), + paste('chd=t:', chd, sep="") + ); + proplist = paste(props[1], props[2], props[3], props[4], props[5], sep='&'); + + # Get the venn diagram (as a temporary file) + filename = tempfile("venn"); + cmd = paste("wget -O ", filename, " 'http://chart.apis.google.com/chart?", proplist, "' > /dev/null 2>&1", sep=""); + + if (debug == 1) { + print(cmd); + } + system(cmd); + + # Render the temp png file into a plotting frame + a = readPNG(filename); + + plot(0, 0, type="n", xaxt="n", yaxt="n", bty="n", xlim=c(0, 1), ylim=c(0, 1), xlab="", ylab=""); + if (c == 0 || a >= b) { + rasterImage(a, pos[1], pos[2], pos[3], pos[4]); + } else { + rasterImage(a, 0.37+pos[1], 0.37+pos[2], 0.37+pos[3], 0.37+pos[4], angle=180); + } + + # Clean up! + unlink(filename); +} + diff --git a/public/R/src/gsalib/R/gsa.read.eval.R b/public/R/src/gsalib/R/gsa.read.eval.R new file mode 100644 index 000000000..f1d49092b --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.eval.R @@ -0,0 +1,83 @@ +.gsa.attemptToLoadFile <- function(filename) { + file = NA; + + if (file.exists(filename) & file.info(filename)$size > 500) { + file = read.csv(filename, header=TRUE, comment.char="#"); + } + + file; +} + +gsa.read.eval <- +function(evalRoot) { + fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); + fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); + fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); + fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); + fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); + fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); + fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); + fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); + fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); + fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); + fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); + fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); + fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); + fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); + + eval = list( + AlleleCountStats = NA, + CompOverlap = NA, + CountVariants = NA, + GenotypeConcordance = NA, + MetricsByAc = NA, + MetricsBySample = NA, + Quality_Metrics_by_allele_count = NA, + QualityScoreHistogram = NA, + SampleStatistics = NA, + SampleSummaryStatistics = NA, + SimpleMetricsBySample = NA, + TiTv = NA, + TiTvStats = NA, + Variant_Quality_Score = NA, + + CallsetNames = c(), + CallsetOnlyNames = c(), + CallsetFilteredNames = c() + ); + + eval$AlleleCountStats = .gsa.attemptToLoadFile(fileAlleleCountStats); + eval$CompOverlap = .gsa.attemptToLoadFile(fileCompOverlap); + eval$CountVariants = .gsa.attemptToLoadFile(fileCountVariants); + eval$GenotypeConcordance = .gsa.attemptToLoadFile(fileGenotypeConcordance); + eval$MetricsByAc = .gsa.attemptToLoadFile(fileMetricsByAc); + eval$MetricsBySample = .gsa.attemptToLoadFile(fileMetricsBySample); + eval$Quality_Metrics_by_allele_count = .gsa.attemptToLoadFile(fileQuality_Metrics_by_allele_count); + eval$QualityScoreHistogram = .gsa.attemptToLoadFile(fileQualityScoreHistogram); + eval$SampleStatistics = .gsa.attemptToLoadFile(fileSampleStatistics); + eval$SampleSummaryStatistics = .gsa.attemptToLoadFile(fileSampleSummaryStatistics); + eval$SimpleMetricsBySample = .gsa.attemptToLoadFile(fileSimpleMetricsBySample); + eval$TiTv = .gsa.attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); + eval$TiTvStats = .gsa.attemptToLoadFile(fileTiTvStats); + eval$Variant_Quality_Score = .gsa.attemptToLoadFile(fileVariant_Quality_Score); + + uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); + eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); + eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); + eval$CallsetFilteredNames = as.vector(c( + paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), sep=""), + paste(gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In\\U\\1", eval$CallsetNames[1], perl=TRUE), sep="")) + ); + + if (!(eval$CallsetFilteredNames[1] %in% unique(eval$TiTv$jexl_expression))) { + eval$CallsetFilteredNames[1] = paste("In", eval$CallsetNames[1], "-FilteredIn", eval$CallsetNames[2], sep=""); + } + + if (!(eval$CallsetFilteredNames[2] %in% unique(eval$TiTv$jexl_expression))) { + eval$CallsetFilteredNames[2] = paste("In", eval$CallsetNames[2], "-FilteredIn", eval$CallsetNames[1], sep=""); + #eval$CallsetFilteredNames[2] = paste(gsub("^(\\w)", "In", eval$CallsetNames[2], perl=TRUE), "-Filtered", gsub("^(\\w)", "In", eval$CallsetNames[1], perl=TRUE), sep=""); + } + + eval; +} + diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/gsalib/R/gsa.read.gatkreport.R new file mode 100644 index 000000000..9b3ef1ad1 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.gatkreport.R @@ -0,0 +1,64 @@ +# Load a table into the specified environment. Make sure that each new table gets a unique name (this allows one to cat a bunch of tables with the same name together and load them into R without each table overwriting the last. +.gsa.assignGATKTableToEnvironment <- function(tableName, tableHeader, tableRows, tableEnv) { + d = data.frame(tableRows, row.names=NULL, stringsAsFactors=FALSE); + colnames(d) = tableHeader; + + for (i in 1:ncol(d)) { + v = suppressWarnings(as.numeric(d[,i])); + + if (length(na.omit(as.numeric(v))) == length(d[,i])) { + d[,i] = v; + } + } + + usedNames = ls(envir=tableEnv, pattern=tableName); + + if (length(usedNames) > 0) { + tableName = paste(tableName, ".", length(usedNames), sep=""); + } + + assign(tableName, d, envir=tableEnv); +} + +# Load all GATKReport tables from a file +gsa.read.gatkreport <- function(filename) { + con = file(filename, "r", blocking = TRUE); + lines = readLines(con); + close(con); + + tableEnv = new.env(); + + tableName = NA; + tableHeader = c(); + tableRows = c(); + + for (line in lines) { + if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { + headerFields = unlist(strsplit(line, "[[:space:]]+")); + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + tableName = headerFields[2]; + tableHeader = c(); + tableRows = c(); + } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) { + # do nothing + } else if (!is.na(tableName)) { + row = unlist(strsplit(line, "[[:space:]]+")); + + if (length(tableHeader) == 0) { + tableHeader = row; + } else { + tableRows = rbind(tableRows, row); + } + } + } + + if (!is.na(tableName)) { + .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); + } + + gatkreport = as.list(tableEnv); +} diff --git a/public/R/src/gsalib/R/gsa.read.squidmetrics.R b/public/R/src/gsalib/R/gsa.read.squidmetrics.R new file mode 100644 index 000000000..39fa1ad32 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.squidmetrics.R @@ -0,0 +1,28 @@ +gsa.read.squidmetrics = function(project, bylane = FALSE) { + suppressMessages(library(ROracle)); + + drv = dbDriver("Oracle"); + con = dbConnect(drv, "REPORTING/REPORTING@ora01:1521/SEQPROD"); + + if (bylane) { + statement = paste("SELECT * FROM ILLUMINA_PICARD_METRICS WHERE \"Project\" = '", project, "'", sep=""); + print(statement); + + rs = dbSendQuery(con, statement = statement); + d = fetch(rs, n=-1); + dbHasCompleted(rs); + dbClearResult(rs); + } else { + statement = paste("SELECT * FROM ILLUMINA_SAMPLE_STATUS_AGG WHERE \"Project\" = '", project, "'", sep=""); + print(statement); + + rs = dbSendQuery(con, statement = statement); + d = fetch(rs, n=-1); + dbHasCompleted(rs); + dbClearResult(rs); + } + + oraCloseDriver(drv); + + subset(d, Project == project); +} diff --git a/public/R/src/gsalib/R/gsa.read.vcf.R b/public/R/src/gsalib/R/gsa.read.vcf.R new file mode 100644 index 000000000..5beb6455d --- /dev/null +++ b/public/R/src/gsalib/R/gsa.read.vcf.R @@ -0,0 +1,23 @@ +gsa.read.vcf <- function(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) { + headers = readLines(vcffile, n=100); + headerline = headers[grep("#CHROM", headers)]; + header = unlist(strsplit(gsub("#", "", headerline), "\t")) + + d = read.table(vcffile, header=FALSE, skip=skip, nrows=nrows, stringsAsFactors=FALSE); + colnames(d) = header; + + if (expandGenotypeFields) { + columns = ncol(d); + + offset = columns + 1; + for (sampleIndex in 10:columns) { + gt = unlist(lapply(strsplit(d[,sampleIndex], ":"), function(x) x[1])); + d[,offset] = gt; + colnames(d)[offset] = sprintf("%s.GT", colnames(d)[sampleIndex]); + + offset = offset + 1; + } + } + + return(d); +} diff --git a/public/R/src/gsalib/R/gsa.warn.R b/public/R/src/gsalib/R/gsa.warn.R new file mode 100644 index 000000000..7ee08ce65 --- /dev/null +++ b/public/R/src/gsalib/R/gsa.warn.R @@ -0,0 +1,3 @@ +gsa.warn <- function(message) { + gsa.message(sprintf("Warning: %s", message)); +} diff --git a/public/R/src/gsalib/Read-and-delete-me b/public/R/src/gsalib/Read-and-delete-me new file mode 100644 index 000000000..d04323a6e --- /dev/null +++ b/public/R/src/gsalib/Read-and-delete-me @@ -0,0 +1,9 @@ +* Edit the help file skeletons in 'man', possibly combining help files + for multiple functions. +* Put any C/C++/Fortran code in 'src'. +* If you have compiled code, add a .First.lib() function in 'R' to load + the shared library. +* Run R CMD build to build the package tarball. +* Run R CMD check to check the package tarball. + +Read "Writing R Extensions" for more information. diff --git a/public/R/src/gsalib/data/tearsheetdrop.jpg b/public/R/src/gsalib/data/tearsheetdrop.jpg new file mode 100755 index 0000000000000000000000000000000000000000..c9d480fa05f4acf066e3bf1cf469db47b8a1afc3 GIT binary patch literal 50343 zcmdSAcT`kOw>Nmm8I+tsL86kgK!Zw_tbjx%2gx}$K|r!(1q2k33@SP2B$65=$3{eQ zw?NZC(|pJ0eeb>Bnl*RLH+RiHQ_bn~qq@@GRl92MT~*if*XzIyO;rt500##LPzOK2 zH5^Y*F_Vy{1;pQ3lIOrel`w&^Ui}<#Ld>p4FGV7 zK`diy=WPRGB@o-&dfT{y_-{FyuCDH2`M4(_=Ckqe0`uarf|&a+U%)pQ+||JBSTH{A zzv5ni?PvhG!1%w#@n``6H5&jl;rv&+WBPx1g+u?%+g@DkpW<-Hy#Rm!eSLilrX^|x zfGe-->&y4o*H^`09SZ=^?P=rd<@ZlokPw_P0Qi6U|6er!t8@UE0%-ulKYjFnkN?G& ze^QA6Kv6dUJP8MY$Kd-N_~vB*fSbVppa+2QN&v37_Joyg{JOWgJ$KZz(Fa*Q^ z2|xsp1;D2LtAD{%z>O*ZD3lSARCzimN)`S$=6?@9b^-tJ?V-CzpqG<_qYtMtIJ)ib za%#BQ2;Jor6A={!t~Y>il>jGu0MOP31i;+H009m)C^>(*4e}d@8I+@cab6RL<+(O*{NI_6oi1!~Dc=b2!^%C$9z{SD&`}gmF zM}YS?5aQ$G5fBj)5fKp*5)u)Ukq{A+5)%@VP>_(4k&%;=6Om9*Qjk-EknC?Ef3xG^ z{>@83ModUd_WwFucLO(xfmZ~DcsREK+?zOfH*v1}0S=IQ0&oWUI|=KhuHnp;|X`}zk4hlWSK!KP<^ z%+Ad(EN;Lze{TKS{=Kt{{BwMAiaJA|U;L#D2f+ImS^r7d|AnrbAYHg%AK(-Hr3(kw zAAIm`;uG8wC8So?Cwk#Yb6YHenD#;Hhc7)OT;c`@I%}^fQhIKQjXTJ{r2RwL{~2MC z|0Bx&ld%7yYY`yF!vQ-F?y%^ zC12}S58UFGI^;7<@420k3QLY+`wa{Rb{ENT$x}JYozgawgv%0!->88T$Y#(azEP2& zDBaU)!cQ>;h?jEk97SSvoytMP-cBv#%>q&Bs4e3t-_ivTmOe*IAF_J=hnD?x##Q01}Dmn$0^iu=1<*v%)mh z(BA-;oBGMlW20{Fi|Ika0E`pyL`akpj%$kyE$_I3(-T%^<*W%>K-{JpIAHANhVhTE z&)nJy_ASIAxz*(PATOVUwm3P%hw(^>P<)(#sfTGjSbwgpr28?>umlk8e}n&fVV-Ry zf1xYH)S{U?XcACeo@7zrTd99O?N1r3hnqe;#`1N(_$A$h;c5m&zkeaYK@KHjFw9G1 z$eB{}SNM%YX^A41&rVXCIXK(0drHKLFY-=GSqo|3F18$Ls@pOBoCGQ~)=x`bMC}vY z!b_a`tcOdjt8-HQC%-gaPlR9Eb?cCGEKT^VB@lq?*v~I-Voy`{!AKFw`4gSdtsK~ygJdyskP>O$Eo*RTzBAfBq(`v(` zwb_D)OV!SX(>RffYjN4N=CDPv`s1d55(QV^pVpD0Gt=)Pn;L+$Qd+>3)1i@%Nbmus zXixd{q>wQBZodzXQ_h%1= z(bVhV<}<97@Uny+$E!Z~)Y?kE2226HmL2Wq-aUJoH07lpa-@LMgQ0311$2=t0FO#v z3vm)nQ)G}&yCOv1DbjRalClIQtEP>_JcPpeUuGbkH71P z!^nyVf@Rzdnr>zzkOnIc2l6p`fceE~;ZIcYreZzN1~x)E&q3m#eVeiHXAW>9F+wvy ze`HU|Qf?Dbq7bkh59lmUKUrr-mlFKEm^R7VoDFA9%0uu|;Z5q%&=L%7DpA_*Bxfw# za9~aT6+^_8f6}HQgZT~M_kJ^s*`;v*ZF%e2^lI)NynMa2q%z!?V;e_%SzwBkleHceZUb5(bK)HVE%p|Uj<48%jo7(L+ zO*(-`s;WA&h4^HdMKYTmd@pk>J(X|_l!kG0`q*cR!-KXFAEkb7mKWmQmV-qLMoh0p zMT~Kt2NdGvc`xRLIUet5b5Z12d}t*HdrtR7g?4GPolV`eBy5NMc7g%G&V5#+l9Bx0 z>1jOmlZ`UGHu5|2KBZEIiTN%$un;}e!PeODY4xSq+rvp(&R-F~rH-^?uGD=jR>b3b zezXhU`*4rxTd1dnj5&J{DJ?FgEZgs!eabd>`twbbL-#(wLU^)9TCJI4t#PV-;<%m< z7b)xn|2&ZZe%zKpRYV^|Xcc7BmINoOsCeG>xP^TqL%3+369oRcu*2!;>O+W_gMMA}cr;ThlG{^L-1Nob!&0jHvG%UWroe2i$V zz!}i%j+#HcfH~kvRQ}n{r*d{750d~|mR0Xh0{0U2nwhpC(L(T8_IdzUs=!(Q%zIB; zAqaLz0A&W8WMBHzZj6#X5TTTtYKx=*BsB>0`|y@0ZAb73!#%u}h)4g>{3w^9iW)AZ ziFYI8Ko%0+Y<7UX5#^nZ7ax)-S&Ctg3CEWmclmY=xV?X<59_inNXRx-88~RHc3odF zHRFG|k_P9G&MRT&(S0lPoLCT)ywit$>=Xe<7+R46^z+&zwLEAqSv7Qr{Ks^palfI} zs@1!(B)`5H=n6Cy^HcgD8tH2ih17z}a79wZxd&VW!}ZWU>k#v6;A1J0;~F3}Tf^Sh zLFDIt3JH5OEi7DhV#Ugl_U+=&)0iPkNFT*a)q@_7pu78z+&q6}$YwUX(*+nBBBbMl z>qA|1+hGl^Qua-4b#2if@)uQPY<-@Sz6oZ=h9@yoy9tqARdrQzOr%v?@@r@}tR5Yn z-iM-J>LwJNJi&5Z0~=bw*T9hDH4xK}Zn*|jbm0&zZBrM zYX9W_Fk5H$RXAil9>NH9hkV@=F`>dolUg&it5rW3&L&x2=;>QN>!bLBP=3QMm`it; zo-nu4LRIt-hAkN0=~ZJ%E-QbrTp~2mwG&j=laA^^w{b;V#arE6c)N}R*MezW16>WM ziGAa10RP8T1H`4j?uRfY5J|Jq^~%j-^9hpSwL3iDaROGrD#%{U~vdO_Z=2X& zyP#h(?{uIe+G|_yz6lvm!aPDI2lt)(%%N8O<%1be1b@iVH6VCiU>Y`Qg)qGaQVKZo1F|irFgHJqL^RRtHQ>hRPI9C`MZZleR40SC&b@zzgwIY8{ZGK=T6i3nQrxM1!{N= zeD&w}I@AXa@5#!NYhZ`_={jkZzqfIDdC*Qh^sH#bO4);x(&jH6B|k-^&&gdIbo|r zu-r(JAy2J3XPTKD;PEAl@NQAgQ$|ZCU=Y|*qTC4F(1-PYb2gKO_?u65&dDo*R0xwI1(iwQ(k^1u5rE z;x^&M>!}7xEFbVz^Ys)05#toXci%gafXs;&7NQKx0G3gE4}{Ee^rPX~v_Rf^nw;OK zC4*p_RN=F>cH-+2f#R5u-Cxu&3Q&m|`6mB>HBkRXxS&1330@dXCh0#Y6%M6TRdN8d;Hpp^gAVWYCEv(Z7(@Zv<7c8;Ie< zk&0oH^VVJJzA*GqmO142bA1q$u0&E0q%@G>tyx4i#>v>t#)q>ktl;#4O$A`(=cp#CE&G(0 zwcQ~bFkDDr`4$qCj_0OkYVN%lF#)Lt@D49w(wZcj)p$7ofbY;DgmW~;3Ixa-Ja2kT zRSod%%EB{}L!D}HKM&kZCZ3Q0-VQqH+5YiX572p)uFQxxM+-zZ(nM%d;qkl)UWZ)+ z#1p1~3>W8#1Wp_G)bggxfWR9yc|AN`;CMCzxZ%(NcrQkZt^cyZrIZI20CQkjZI4VeO#^-~${kEdJ74QSF=!|5y%e8;$+ zvu;a>Z8F}@QT5zYqOv)*VfFoxYS-dBedBbw-@SgHF62}gdG0^@lB3+sYXSvX^GJzd~}yOY$ZgO;0H_zVXEbh z?=>Lvj6D2PKvEn&F4ar{J!HLtg9LjcY$~UqS8D;IUyf0kzBA1w$DZ=R+?hd|Kdfp; z!uk%^NSvV$?8}pcAY89qVj>vFK)81SEHm5<=d3mzD|ij`*oIMu@FIueed<189wSxL z*I9->BmLtrlIxc%#!84PWVn4Y3PmeWP5s7}m}B(!JZJnN)R6qcEHTTdad(W{T3S`iqk_##`8hFBo9mGBT8^b+CPI3D z^coS(|e;+%Yt#Y&{9W7qj*X2v(%c7Lo^?oW0D398K9Pv@H6Xp_Dp7 zdPH^5fC~d&ordGm17)yQIdt&lJ&e8;-b8W81LRSX^>oaa&<_C|ebRELzf!Grezdlm zsLpe7v%0wsG2x{a5L}f*)*BafV;Zi3g*p?oaVffP5KDlrs~Wi~4J<~>of!mIK8oLA zAD_7$!~Dvh{u)qdB#)2(Zp=DK^jmh_VU2c8geGcQrZy*x2|I(OMmY2zEgBmmg$J~C zww2HhP#eWMyQ-a02b=Y`xjM-0yhiCHUuY_0v;Scn#Me9kpFIC{$hptO~K^pg3HTVy_F8LAiqIBNu zx2{bI%RRZ(Yd}vu(NlUMr1zGFNABD7dbbJ5{p>JBDEiJ1m}SV*tHLm0jB6KiFG;W$ zOIM)W*!9D`#^&m0tZx0ZYt}Q9TLcT8D(1G4s!EEd&HADCy<#{$OnZwhY!7vE293lb zRfF>2M))=u5NUeGT+q? zzmgZ>zZ6~cj7IEX$z%~$@S{Ww)5ngK{s{`FYk>Ob#dHV@oZ}{%!vIzfp7{5`=Ey8uzZ-n)?sj+|5gzIeF8qzFd39Fvj z=h-iz(z?FqyV{F;S@({G9Pnpz$~A>V8^1c0_`S>RhsC$0sZDQ2kMsLsx=$zQK%uKe z^YLQDYcX_q{rNOVf@>iDbb@jPqcAP(Lq!$2)w~ZEpcn*!NB@bOTRY;0Lr;&z!f@DkpQ}dxdIjTp-Oyg_3~}4+SP#O`fRI zy-O@3)WB`^roe-)xK;rZcmSQjhCtLa+3zfCY7g6 zQ*z6vaPj1KdzMs)U@WQNx^TiZz;H5wtMv)AoqF;G=MJ{pr|yQBZMdw^?tNDJu$4Bo zoyeVgGm3WDLsmeq+-UHA|v({)NkH~j!-f3DCKBdmdcw=hv zZ0SMUcDi%))2F?dPQ5LcMY{C8xI15vwT`r$GwdR+u@4fdsR+{p0^sO9o4!s>5DkTws z8H{kVc%UJ#g>94>4YZ}>_2BffmQj-|QHGaK=IyA8WE6i8GP!(|XXtvzKa{yOd%@M& zpqB95Y|g6tOp?hkhmlUtJ7v!bKOhAajr1QP6UN&EogUabPQu6lEnfs^`jz$#e*N;p z%_qZ3`clnkre@+WE@C}3QVUmQoBO%eEuN|n=Bp}6UA%zRHT}tU6(m>x8r8iJr~_hX zfY~9%ratQ$cr+`N#clk#v+2$!6QP=W2M!vEub;jDkQ(_*x40n21U;3JY0?@8@n5ZT z)?uwMs!PwwxIK5cCG$Z18JLvDhi9)q0> zBSGFqpACNA(MNl^lcBA8vFy3eIvS!+X0tAQ;_<(!{E{cgQRNUXGf$64t)T-MdY?h< zgwYlxwdT?hx4UAs+FM*SZa2dYi(8Us`h4*tOc^xn^ftm572S;S&!45lE*#=)EcT9j z)G||2C)&=u5ArN0z9*UfYpZcwKP@4os@Zx+6MI@6jCud=@SsN)673uiHn{c4`0?xG z9V>--Xpc6Q+62QCHI#M@^g1ZMt8!UGB%|F%7Fh$sC1HE92Vi$|?Im7mO7v$W!Fy#*#e8-&c* zTgq~ADTZ-H)qUq-|0K{!TI|%bsNGXz?8^`;QJ<3#%rD8{bf5kUZSl;L%!LC-%J1oE zGCMlU#yhK>Mu<5Kz4-cC+$of*D;r6rgD6dCAoi)o8#GOcl4Q}PaA7bQwuzT<|M`k< zbv@djG!{RGlUnX*kpoMrg4jlRpuaFgxECTz<1yxURVzb_(Q19hIj7?(wmJl3@WrA) z(KJf&|rq* zkZs||ee)SVU&Pj{@QXm(YnfBeRp#xtj~^lG9W|@fHTBb8@gDvW&~98d(2)C*FpGpz zYYZY;?fCET?00Q|KAH<>C3Ts-_+P$PipSsY9gA_GJS{ECp*g1uSvDD{|ULXW3LIWibH2hY0=)I^CebSyns5ws1Gc~b{5 z(Z9kqcnwgU*!`%%a8;od|EyX5;o*@H`uKHGyPy5}{d}>W2coXc+apCi&klU~@6tUg zf;jNN=kCsAuv*L}i)GjdZ@Kc?PZjmUtejk-sr!$mkdtS$mF#fc$8&?wy4Zs?$idS~ z5$u?v2-50xyA|T7A1jLVZA=QpSELIL=3ugD$g5)sNbE~s;1Fp^OzosDL^v1hBf)9O z$ejj#hDysG>|{eWCT&nf%~4w|E-wc>IQ+iggjiDg)93StoUAd>^igzUo2oQUy?w{B zZgdNZ4gS2`TPJ!3X0WytjQ?@j;Qe;Rmb=y}%t*{+S2uGF`lki;SspaLh(U*|A{Ta+ zoeHgwSnBB%oD~xO2$T0O-^tO*m|CnG;l4-^SRzvXhHGw{p9%Iv9WRDf_X!$QatUzJ zn}{K!NqPO!0GA$)g|7KIV@sx%y5#QAEaSG|%WM`d4NDQ!{^g@Dkt5CCmXvr=buWjA z;)h^`SDD1JBW=}O2DCqmR`oO08(Rgtc#FT#Ztx9sSe;8>a9H4_giL3Ok?iH1$M#tv zjD@3Cd67cz@V(IAM*El#3nkw34*GX8b~GXy%l*th4p4qVzE0jYsQCHHTE@mo$2Qz0 zN_qn`!I^RE-HTFa5rq=TZhiXaWQ{3IR=xzCk|`^nvd5Y5L?w|F|uk?t6}?G52q zl2=xtO)00sBxr9vS3whGP40%-XcxJYF<%e2Mg#F=&CR!haxqV3Mt(Y!Ull^vi@Qj> z?7LWZ@>!9DB^4`sL8z!}fP^y1sc0wN*=x^(DT_9F>^Zr8X2Kz95ewb&n+o-lYhcFm zHY%g6sRO1?&2<{Z83CTp<%IpX z1`v9#vJjgCJX9Xq0?u+^+-*fAZ%_$0lZZ>>xbb_;|0*}MQHn{JOub2OShS#SGxJT< zK+;CY=Dl7uj`-du(Ij4rUJ3dkBh9-%I)3IAt;$taYbFJGXWzBWRh(Yv*~F8erlXRd zq4)p1o8@6{_APyB`SvL7>!mCeu3Hgo%J{4^m#6|$g!ZFxNG3~g;>A=Vr3y|RES#1# z@mN)9FGqV`hL(PDL4Fc9Vn}y9S>tGcm-`f+L5)*LAZx!XI0RY$(4R8TyQ#O0ePrQs z4XE4KK3^pn>Ih`O&{9fdqiE6J@4OkH3E+B0x#9nk<3%nn8*+~BM+B%9lwj|i=QSUl zId^SxBoCxs z_!kF6&i|=fun-;q=fd2VdW|5TU{`JQp2hdKEJTT`!<2>Ot&0pGb0*(7zq>;8J%i-OHB8NW05_ zbNqR0CVJG9Z1!&BDwc}$MPo}YFh5;cG zq3?yJRzMBgZ1m3`Qo-3SJC1o9cmCQd+8Xf`*}h(e+S9T>cw#X zEmfulyh|SoYUVwi5JHG86_N5LbTJ*Slj8U0#hSME!%IO`V?0Iu_wmaX(jSMmX!2a$ zjzlIz0$qMq3QiQ;R27V3_L=-t62H$W$Skti-Mifh-SYk;yuQ}$&WxR0y{k>uy-GLN z{;X1wq!G4X+~U|HvvN!Ck<0FPCAP7!xUf+O!ews2Do)V7EJP0}lz`aQ4H2I=ji_{f zyZiLjOU@JpX%gEZ-_r{+cnXG9`cg4$D4)iiIBdd-V$QO${#!pP%can9=}U_ETg-WG zV%pRP_oL$+v@{A-8Y9tI7_{ES+-zX;4uU>aO6d!tn5QSh?;6m&J10A2u8DI}%WhJ2 zBz_I}7nk+5pJuO2Pu0HLS0hW^f*#RX1fpV6FE20!!&f?pSPZoqN)RkF$Psl>+s_{& zTekM7ZT1>~6E={=4ewHq#4GbEFnsenNMGBviZNacD@RQCHOXm&?-(=N)i?mgfFkFj)R)ZRd+=p)_g6pJd>R(FGaQ5B+kSa(}!Zxf?oCyQ_uL*+xA> zyA)i|`E*@z$9F!A|5LFLD)&?ne#gMTJv6gTux>vl2mONGiuIsN?m!J%O)eG$db7u=WKf0F$~|mOxQ8!;^p?W{ z3JG|*_LTLL<>IBb%K7Hk zp!9g7!64s4K+(9=)UdnAjVW}8WovjZ9-#UzQ!==Pm&`Akc`_yp;H-wzBnB}*2!ee* zq@A9jJONrZn_%I~`68mT(M(M8)9oFj=t*3HajFu&m)o)!zHu2;u^Ui`CxdEA|+yj$jS0{(dMPk#p|gPhSP54mVNrdmFy%P z>3O8NKqOlTlpQQ4+MdBv1DHw@NI#SnRh)UDOkxL_zpgY=8!ny35O+1b#nUE>+V7-{cJ;5T!BwO$H1K0Z++@Lr5 zc3OWVgxO38uR5~~N<~damX5b)r@iICt6L#oRPT*`m3Sx@yRpXhb8-iUu8~@A=7cw{^CpDwpKRmTjDH zjol;PmtvpID}nAv99FgccORZDgUf>FHQ@4IN6WE5uO879>G~nB_ULxJnaf}*&OTG& ztJF2?3*)7!JQ4Pp0+1<_L_gQ|2`;@bLf}jZ`rmpWotz;Y=z+?4M=Qp5hH7*gg;noG zKU@8g%)n*i*kEWJ*e)%IO6@8+nDQ5B10`)HG>=T zEZ`0`{T~-*XhInI&Z16gdEZN94?GY%QEo<-U~fkl&_0Fo&A_d49fy6xQDnPRf%ZJ= zWWs>X?nttbJ80#N_pYa1uT52ABkQfGf4_M)hN>l#dLLRk_FJfT|bJ=$#Sa9EcK;~V`Dklvevtw_69``TyELVjt~)%|uk#l8B1Td&v- zennQON%mtW*Erg5xg$1F(&#l`#G!Bm2YKuu*^1>K!AgmGt^2{bS|_pk?a6!@N^bKc zU%Lxd%k5>y!pfb4>}qPhl&q9Az#hRLQ4g|mB`4yr-rO@VH!Fcjczsk)`C($qQ6_({M&`8~(= z5L|+|xi9fbvffqXph;6tKmiphK^|6c;KEp+_DMby)lpYRJq}c9LsuV7wWz- zW}xw?B3l);8sIv+d-u^Df#SErtAy)N?K9+QnIwhcnn_*O;_fO-LSRN!s%8XCDJaIU zhr<-Wq$0ADrZqobRbmxF&4pv~+*x?dkeZ${}@xRo|7| z{rWXYxane)v(4Sl18ieGp`RYsAKZa@`5eftsZGNh8)8^o0vKL;^hZ!jvSq3Pp#p-s zR8FWM^F|MoXAfP(#Yf0JDu!;^WZf-%o)&a5Ni<@G&>r?#y9w=4hPuDkk*O{TH5_1P zPb%qF3=!Nwzhh9CJ4?HXMb^fhx|1(~{!4_tT5Qch>IavWFUhuwsmDEvZp%Yj2sjPX2b5Dsi@r#Bl|1-LX9atAH#DPU4L=@~Dk*nHvK`7}! zZGiZIqQcZ^){(;GyV7H-8tdj*qSp1Z+68U1hbj81>m;H*10gSvby$)jxYk zca+G^i`E5AQHRi46URnc@eIGDzDOyrwEdRRrBlOu{~T!& zAAh6NfX2;y_KZ2RfZO4ot4rCpxsF6p0iBGDr%Pr|7Ykv$V{2bxV`{tB-&)mAyaIPM zN4pr%YN-6FlVNLQRwcPh@TY(@mKR$)U%wS?)#vaGwA;Q(%%y#PpMw{HfV=Eaf7U9LuiB~n;ZBc&b4uA&NqeT50})YzyDUi%UCAbw3?g` zlh{~nUa)6Mexd2hQ(h7G;A^Szf-Il6K|X1Tp{=O z6L6`KQ51$wtzgVR_zz9m=~$8mq}MMZ4TwuYf-p;DRdwmJ8{G(H0+(;yNDj&kWY}t_ z;;WrYc?D@o^QYCZdAvmd4oEMF^sak{=Rc-lTGX3%sv0Y8t87oZa=)MZgxDj-z>`>A zC^IBi*rXyadZEN*_hL1-JRt3Ge0-u$PW#|j>RqEOzX=CdT8LA=YER+_S2!1u+eTX~ zS`CrjUr=|#TzlZ;6kHLoq>eU2+6?$z483w_N;V7SeUPh{B{+JZpuXIc`sFz1ak=B1 z)2$namj+a59^rxe_laGtuc{RJRb1Tb2S}Za$XFk-CRH%t5v9g|hOFmtFic=p)&{yT z3=2@CPd9cm9-W7? zw3mH5Yn_$5e`nSW)nxb$%M-AqfzG~k!-OFzqoD2;zjL?T8> zF1|Km(+t+hdDH}JSGKloa`d%eUe};DCk(#D)9<=uC#r5^yKRV)K))zLx53Ag5Qn4i z(%3Nm0N!3jp_wo)vNf_D!xhEa3Jw2z{S?)^Q{vCmxW&ewalVSptcF|D=~V{Vonfet z;H3%G+L=cOX2=KFRdcuaZOzYFQWurC=%#ic2+fp)EU7OQ*tr7wd$S^R)Cxj<&JmMT z*%6kF!f{{a(3%@pGa%Th12jpBO*lG?Qri=vVw+10P~<-E#v1{${%Eev#i zeq(%B%B1giR*kp<$q6>I8tw;Mn1D{uy~^ua{|LHcsRlWi1)qiRBRXndhd@HBHCR|6 zq*iNYkFuthX^BQPA|;!>aBIS>2?8e)6&3m}A3=MR6!|gkXu(elA-agtUia|Dg4oi| zhPXqYHCj+>Y1o&0x3KjWIw`db!WTQa|`GPs@_!C|=0mtYTy5rM)$@_u>U!?uw_?-n-QKv@H!Kg`rG9XlNc+@+YxqBUfD1Qrk zAN0g7R=djJ(~eGk#b^#0#n|{&@rJMdy(i~^>0T{Y_6Dx{PST+p!3+1!+uJxO!lti* z`qflqI*J;hSO#LnFPkeyi z?K$yr^LdM&@|a&ExU4>g#v6H>t1%o{Ayv8C{G5JBR1(mLdwgFP{;um4G+hJgPjA;~ zYcVMh)Ms>w^n$qt(Yq28>9oG^k<4@TQ@RtT0(wM@Q&f4DE)(&_uZm9}D|$6_N=PfS z({UJup1pwQdSuJn9v$7elW{BIRmN3%Ym-Swx`jhn$3%)7M#~j`guc@?v|`=&xP9uS zWXj@WtHlM2#fIvEc7r{+C$GOg=$6oJ$jIQN<7H&5ZSGos-}*|51EB|95S=j5uU^Bzc2 z=ij(mK}C3Cf^7GuhsgJGWn*vuMgj8*!(Ppat9RaJyNw+`&W}okJ4*QA{NKcUEVK5g zj-bJpYZ1q1efVe>2!`6bPXwb2dh+H}5S<0cK9j7Wflh`N`ZEs7BV(hkOR0*(hs1@d zeu-byo{A@it8@K;*nkHg^Y5bHT^e9i(Y_xsPZ4Ja3hTcWnO4lzHb)h3XO|2n4_DoV zDA-*ed?yyd{1BE}I`CbBO1mf({)pQd8BJAmc7tl=Re_RL8f}z|$ZpP&Kz)QqggZ zB$X?N zhOV#02ZcpcFBRHlYjwqHcR1bqqzY8Uf4#+tdLBIIuz9k#YXkm*aC5UXRqgO#gyHf>Df;!PzkuQ!F*%i}9wm%KI5)G>&6p+4YENWLe9 zEvu#DUWaMaAAQfM)6Bo6!mDzH#9CVYIxi3in_g?0=vjx}m|sti%-|C)1{PjE=_6Islx#ZQKv<~UfRlY!!FLSmE0QC+Q#7nQj1sL2 zy?lVtL-I6K&O3!M7p(tE*hr#OaM$UG*f4JJaiPs$y#MTlklP)f^B2qJ=@YxrM?+Q= zdr%t5i6}Yu*SrvFpEc4XyUk!HsITiSDdh4yRN%{yWxI!NM~TK6O&Dqicr5CB(G_m#|5NWS`RJuTQmDOG&G!|eJ&-%n$ z&Lh#zsMsL-QKo6l`=uuciJ6Z@+C0S_#V20nwjZ68;f~PJER@-h-JbN?^zgYMD&Xp? zB1_mr7ee*GYCSftBuWe2SNNNGE~*C0u1#B8wQJh&J)57^(@Y@yy_+RmR(`mThX*1} z*tWL7u|EC*QG_L>x|Bg%4d|&!$I0Kkl)TD$!NWSgG8F8DAD;+m{;EZ<_LO(FD!e9N-LSh#BREOQ-x4q33C;Tn@3<>l9qD1rDL$vmWP2`uaK8*yRFh6a0&Ujq&t zi`m_42yg;fq(NnX8z2K$nwx#>yhw-s`?U;?x}G)?)$mY(UyCcQHlIa{LQ6Isw_f+Z zv$IKbkUt08etu6?qHa(s;S>tI``G2Kx< z|KH(=h0!mf3;H09?#Z|7+TJRBbG{H;86g^toJ{qofb8W|P-=`jul0ap&P8$7*CE); zPCpkWfi4<^QpiK##O>N;0S4E=HGGEoEhYGl$H|8OCmThK`{gnxjK?Y_bKjo?Y} z94#;1W*0(l75Y3S$qL2gVs-IQG8i{$Y{>Z!c)-%2F02Q_9QiYS-PQ3q`Sul|^KSPw z&<-BJrN&H+CSL=7l~{&z@$)9M6;k~1^j)*QPT|kZvmv+q`BSr2GTl!CO#rtli)O0u zHn{uB_X<2$yJVBj$gZnodBIES-%=bmI|YV(HxFn`e4&H^VZE!r2HBt@LvmV`!*V$sqFXd^lcMzm*Z*EkBBl5*Atim7d!FG-#qpj^hz7_5KCs#k)=E0xB+=?A-p-!_x>T=VSmH7ETFG1C@Am( z9M3`~8t@dt^tljCqNQr@Ga+lHAC6wA>3NVRoc3`2rx6C!qiiohyF?y9in-qQZ+Jw#H%h)XrC zrRDA&at456HB5;t`n?#fKZG1qBv#j=uYoVZu?S$KBzK(J=ylj^>ZCWF6og5o2V@z-l?% zD9xQZ-){4v@yGU^ZqLG;1iUX2e0J`eQd*w2{%KRxIx+V9Gk;<7OV(WA;oiuTs-~Qj zwy?Z!dy8h3U%TENLF$_@tg&Uk6=|*T!^SM}rmzeFTDlM29m>oXl;%83Z3uGk+(Cm4 z_qc8@$_g|zvC`l zi4;z#jlkK2`Rus0SDn52_$F%QHQsDjX~9Cy3@`k)?xoNTM=(WF{JJ|Cy(y>z0wA8(j6y0nB|59T&x?t9gJInq!Inv_R-8Mn7dp zqC`Y|HJivyW2gZTXMwUiEv%AENX*F$F%@3HzzefOTW(Z4k1ZBtWll;(mzBmChnU6y z(&9)bzeI=C$ahR9#RfXwO8=wff;Q=xlAqSi?Y`l8PtO1#?iX|-@gZ7J$aqg(Rm}># zN%+e4Vp0S^i2nB`&M)m>{1# z?0*sVrr}Wj;oJBidyJj3j8I6(zMGIOSt9!~l?rJ{2pKaXWDi9VqwLwT%Qp5Up{!ZL z43T{%8h6dobANyT<9{5_@w|ARm%K2y&;6NeIj{3PFD1}1A~?oRy#A;r;S{TSy#C3@ zc8q&VI^gLEyxAM{-NtZ09+ttq{7SlR`EEWKx^hu17s9!={JcvKEua>h#JIkx4k`Tc zz(#G)qNACa`2jZ|Y1rM>aMF@}I+a)XY={gD){wNalJLojNWo zHH(rL`0p*Ru2ZhHRuHoEY$B74y5Zx|5C+4e{h*_b81()9F(uB5kwTVVwg&RwAisC4 zPoz+;^@x#jA!=FMTvD5JOp`fpu#b&Df*N6yfUVgVCt*9*F7`GG0{~ zDpN0pu;K)ww#^_+dt(rhMPRamY0IMM;ikCnknKH)&LY-LfaUL$<<5{B>+%4^0Q+R( zimC`{;WgF8uwyETdoY@jP3Lzp=S0c*QdK`D)bdk1$k0h6g9?+Ap(W=pji^U&=7NfB zu>$B7e^hL>65Q>7J|Du}`S{v7k+rE@ZLT+yv~AEb1d5hdX(7FhhERfyQ%B!~7Bbtj zPG;qQ{9Pn&P*MDQts0_pDX>}r&Q$QC=eW+CbmQ;gCFrpC&^=@D0SXIueXkE%LpVCa zi=u6-CGWS`nChn{Y|Am^b%qa@oLZYqEvxY}#823Ps{fX<;47-a8Do;qTgKAqO^8YL zx{!gD^=BC2YnQmbHo9uCx9~n2QruM-0EsuXDP$N&2ZpG16#?^KX2sq&s+h(R}X}NMU%#Q;3a{E!H-vmm4xT^^%+7tW=g*beLIuAzusFS)%0iB z)ieH3pxx-(x4&Ae*g*sr{VSZk5zNRY4#{rBp``n_rb8CL%{;f|Oo?%fz9aqC2~u%_ zhfU*G7?pR^tSO5iyqEO=iC6nhMb9BO9!9_Rc30GYuk}9m!W&F;)I9{C^~elMXPY3J zM5+naP4n;tM{8JhvwnfF;xC z*){esj-7F3X`jaZW+oRz-Y!hYrF0RUpBsq>b|1uG8!@~26vuFHlN%GN>n zK(Hoh6rX)fNap|s{auSE-%g)t(mc)$UHbXdNagmSoeF1zSj?r`Jn9|cT^dMtbRhfA z;O@{A`u{d3f+X4VoPnpp$EVz8Ya4|M!tX{#>FOveNw61V&PTY9*z=f0HzSY`bPDw` z`0v(I6#;gVIfD428NoxnSpjH@?CU#fJ=H%ORkIL&MTcj&x_kmDDkD><+;9PpzEv3P z+@*@?xlU)JauL(}YmRVBFO>leGv{rY;-e#3bwqu9u*u}XTe zJ&C#jD1v-1XxvQbI)|R|lB2!pFbO4TxspP=p6eSFdDbQ$EzVjhYKWdvzJD%v zFQ4IQ4`$*LR~yWJd_8wFt*KHGKIqgu`?oW5qSVvt z^0}C$2)q38`l@>jmp-wbWP`v8Vf6`A0ljW)(@IK*13pS(Xs|$llpcF4e)BrXmbWMH*z0VS;-1x43DMyd%6gt;5jo*T+wdZodUIGfAI zLsTkmpezjl{Yn1!a;BrewF#;a6W)Axiu67(9xW-MHm8qn)9Ctm=WTdkUyj=tz4FB5m1MGS zbw=__ihf+;Lb>Mt%~qc$)0kB}X1?|sQO5AhR1I1>JkO}<*%(1#PrB#y%V|TlYYN6I zi&yJzjl>qAQnMABqR)RC11rS3oIbtUcm&aN>b6 z-k^+qntCd-JQ)jJ(afpd0yQL>uPc8#-&4KOQQzvAE!;Ca+{gWNb?p~*aT~#c@dNc| z2trsAUEO~LLS-f#%*Xc81tw8ZEIDOf^X}WvpVgF2?w3p>ed4Fb1d??>q3SoJo;*9r zI)@;dPe6Xc_`2ZqFIYj84vCZCJdlSFT=HE|U)GsBiqg9}sqd~4*HO8#6*O-Ic`a$_ z^u0_a?qQ*{w!JUo3RMU}t|gi8{#eC5{mcDiO%ERDh8cj1x&uC*B-)aSB`of~+o`h| zUp}9_X04o4clO17+t+CqPX!smK%_(r&4BU({0Qk!|3So2-V_t+1=6|tTuf>RPGq~y zlj8F23OFX_w>G4UtV)ld_yh7r6CmT1$XY$h~+H3WH5D2EX4+wh)ERv&0nkB&{ zD0AF&NcN0FUyz82%XiCrf-}|B-*;Zli%8C=sMw)7Ny~{8jqosX4Im#yVF`a~MDlkd zjlV)!76aV$Uz^75#ElKDgG%>(&1PcpZHc5mviGtXo--YZf)`Q{gAeS6CH_5rdZ!+U zTN8KasWT5ERG`k_74{xY^^sZd^)E8*0k4q446XGhyGpA~}jP z+}l|DDq-JpRs(lMNeV8LbV)ybz^Q~*FE$M>Jk+=2iweg7gGlrhecx#T+3&LPN=H%VSDTx8h5hiY zrqT~ybExh+_aSdNM(-ivf$8zp6c*TmlmD12T1Y8VqbTHp$amVS(H~4?)c$u|it`{{ zj8ix=HZ({R?#m1hbR)XdVJXf=nTrnJBQ$p1;a~Ad0%3<@ zaWd~>TnN!{uYV9fC?J>K7dS5qvi=j4c02smprSy7Ez;SVSR0Gd3$69f`c}cPzdZZh zGKu-V@~a<4_v1!bPvwA|ci59=Y`x$=$l$!bZ7A$Xqi29J*N(LzRs+OO6MhSSOq#E* z-8*+Zd)8vTK$T!BdM)->F+UNp$DM;~NT&+P_7|v;a6@BZe{K9@@s>E$tz4Uz zp{3hgciRJwUMqzvSgy6a&=Ec(FzBIabC4xrO_~5@JtSu;KZYDj8q6RBKEv@I!IQAU zD2a1mhZd5|r8+fG?O*Y&=PW(n&+p`UWQmi@=W4z`rF=`jELgXWd!PVVt@J9V;Uv^; z6SAFFc&G+d;a!siN^cnY95ITXS_TYqh8%*}c)^B-Fh?Ew_}ve~n;9v;aE&;sP#@m5 z8=6>fj@a(E!l&;+Kf#L?-bV8i^OqY+!!PL`z!tPrl!l+#zJ3{2pR&%#CdNt?AmH=J zWZJVDZPzRUOEq6xifYl%mNPGSzwoe_qnsyjbq9Mb#8Emsc6DXJryC}AaO*2t5}SyY zBB{Fx{;10Iq#D&vU+H{9=G|2YQG)!^tl2BMNHOze+8*;KPZFJ}8UfhaMe6n0P}oT; z;@!g`BJgm?%C)iD{A${^Ed85RZI=(*Wh>Uu=LPRRP&L5YG6wJyMmo-hfnXgE^eM_G z*r#khe9)F{MK=CsR~!3}dFu)hYRMb*7%j-l)R~!Ag7;Ciq+1qcwD(^A7)TGXo)i16 zwm*y)Fcw)$MQPVY@h2N+ypvMD*9{T#?2F?cvM8*R8sE61$9!$IM9Y!go_$29rsdlS+{ce0W1jqnp};SUcG zGg2R%e=Yp-kGy_24f4lxFSY-oUB4I-J&bM9>ccFST_y>Y5vN=}&hF|fR48SYTWO@s z-c`st5&VjL^*#ROUB?^O1s{Cs7L@9;oyPb=Sus6^AWHB7D^7JKRf430rzU+kVSPn> zDbe}a!QQ%V2&j+_79W)6;u|KIM7OX(m;`;c>%!NgLlG9*f#hk(Uw`sDTDv}!A_pW~B9a>d2{^v+ zp@9cW9zW@)h&8dn)%T8xJuTy}Z?u^F7|fefwHWSL<&RCX8KrZy69esN*`Q%bAjQ02 zs6ZS*JW@Xp{D$xib8;Jx3++04(Zgjt@qDY=(6=-j_JyFsIjTT1x$z?aG7Ond5hr3a zCK9_OXn7s5P$J#a4VI_|JQ_cAoZUBEwnqxh{{5>X*=(9Bm7nFUr{ckVeXb9!OWH^d zrf^|;EJA!cTnPt!+inBnu>^75!$F>8&#?Z}Hz_CmD_-fp!hBu7RjTQdaXF9&5_Abx ziQvMl)_`>pS`HORT#30qH{`@L#BYVf!+AmBZRZE#aq`(z*VpQM#xDI~r>@_15^)w; z82cFfmb6Ldz!Do)p@T@S!th&__i)&dA3MGDzA$*s*@-QnW_j{Ej5V{5 zHFpBUO(O%O{~NhQ=#zYp4JFb7#lzoDU1?1i&b!L${84Jz0;G%LR27C#|8%GXfu*Ov0N3Vg2@sxH>3)a5p1y$HV3vz`A3 zm%chs{;#(u_UXDAZy@&ve(8yV7G2{7!fz;LS- z7%n;^_0}wku?^mjlK?7vXJ_ZSr0a>cv4OwAcEjvOt1;0s>TJgyspTXOGfoSKjrqn{ zFJ}sPwR$d15ezdZr-8-Qmel2)Zj1!0mbeO^<| z%PJz(6%G_p?wh_xgA$EK;W$(TX&W>D7|!LN-G}AFf#+i0Ax)@_g*P97?9ZHDp}(oksT9ojpO3R&hxsOq zeA%!>j}cx&DQ@m^pZ#?1fKkb^2oYtx4^&kev&qldfT;4?lus%4{^wG@o$ zD_3vXeb)@pdsgeXn!mSnp;!3V2;bYKZ*~v^HNw!|#3~{M$6zQ3to7pfsMl8W6}*6w zV!+L|;U{+}XTHfE@?;x#k^h6YyT`+&Iv&x(5NXMnP_! zh9^?bk^%o>vI+knACO(ruIAxblD1~-j}@8t>#AqumYo>%tR%9???`367?8Aq z+2gGZcPD3%V7sv-x2fxg9VYm6;raP{bO|7)((hEu%!S6CIG@)_cCSX=ak(6t-mIe0 za2vD)>eAOP1%Tg_WS$Tz8ilw-5=_y}ejUYdPGQvVxusY|!GqyXW)U0BBI|^XFgklD zVL}+?PK!Yc0JVc&JPn|eSLh0~Cp7Y%>%6E!q*U;Sp@4`uPeu!A{j%3BO_5?rH}y#e zmmq^G7MH{;9z4(%U}`r|yTo`~qTjABNZ^(1LYnhWIfy=w{QLw{U2%7CE6Ic@B@M){ z7^(BawKBA>P$6(;c5@;nOg=sm!|cyjeql7T*+JsH33t8`>zAb8^_r9<(gYxP7rHA* zssi)CpSIpTYG0gGm3Woih}RYUzC#p4Uf5k`tKDBL*!ZQ-#d5 z9a|xo%3GH#qdEGcA9dLU=NhI)5ER|!Y^U}D0&5RSTom`J34V85GoCqKHa*e6cL{Pb z)4!Tl5TJh(IuNCU;u5v38J9^P2~FMeX_33S^QZy6zVPSf=dQ5*r#F-FMjB+BCqDQ+ zXb)Yci}vE(WT2sn`{m94_yxZj4KAg{VML==Y<4L0?1y08j-jaSbDHP_RosDgipPiA z?76Oq+qnL}N)t7+U`&2;^Fz=;P2|F2^&%K%w<$Gw%ZqjZLm6OBXWJntz$K`crUR*_ zq+bhsf6b%S4Xi=82E7esXsu^2`VTAorty#nUkZF=tL-0TPlwn}=d3RB1sje^kdDrQ zLQYC+wh00qxd>MOt3y(1NcE?#pND=7-*Er&XI<|W?A7tH+A_jgV>)v*_(ICBjw;vh zBcnpHnU>?0<(0Xm)~6@QzshMXmNf-ebZZc@k-x*Hx69sqNRod}6M7y?w5?6Teh5s#)pwl%0wNVG*X6yd+%}?=9hf>m*Jm3RR5Pcf_;u&!zHWlY zNg8>cdRH6Tr$agCokSzuUEKU?WW3}4dmH@SklS;+vQ(aleq1Ab023E>8rEM8H6bB( zJIUr&mAGfDygiKE+0gTK1!sp8Brld53t0^J{2d}Y0iG0n0M;h~pZN#zwpPGH6VbA6 zmgo7Ssy(hMzIr3*`YhXbFWr=9H_2E9my!suDh3ZAPK6qBP?rHhCRu2Z;ElZA?S@UOUO3 zY(5<~(N_x#Pww5qeU7w>#HtS7{hc{^>C14`Jl9>os|KOvEMK#_GM`6aM|kg= zB6wjTraWgME%P5lc|dv@?rRH3ChTbt8OmOxPU;7i8exxCIF?e)KZAqzMb3wqgBkQ5 zdh}7|)=GnNU%UUSJ1rkuh=X}zbrx|FnnD?jRb|hOPVo5uS|Nky5nJ97fxq5T50tEm z^A9xdJdH9pecd?vIV>zOc0f8(`31aohqc#M&>h&so5yR6y8K95ah*g4zO%fd2yV5qsumF=dOL(m4cf#=f@VV%^&a3y%J`L`C|1N=ch;CTYgs;XCdF<<+B1Jo@m4R1 z*vz`btzTOv(09zbV>2W+$%mnZK#-g2-{1V+vE@pO`0L<;>MLgj{Yztgdy2Wk9w+IX zR1M!U*H40oD(+pelnfauhNP9I2ud5URzsMdt-hg5wk+*fJd9E?5&;!y3{>gZD~1P? zS?`FJ>PqegUy>=MHV33l-LbHmbTu?M1t(_b#m7jkdJEZRacE^+s;*5a>Hx*ZY}Gs8G-@QBV>UQRgvX7?%$o^yBmq2SsEV zZ?3gm2)0@*9Mb|Xh=5q3!I(Uy^!Y>cx4-vHNJH>A$cZ~fM@63twTuQ;ETe-{K$)X` z`0T}@ZDiX|ZLvU(;n5mt z#{ycnb4BO(+g9;%NKiq(JmS=F=^B%4n0j&0TnU6v<%Zm#@QxX`&ekQ#V|9q+S)0e? z<0Ac-$VY{1b0t!5vZgm@qlHh2O@jWjFWv-=oxpmeKZe+tubJHDG1#&dtE2gj1iXS@ zfjIOWjGZ|p^}Lx&{*-|=L@B&@F@31|Tx9To9QYS_VBWJY2Oo!kk5Tz+h|Zo*OkSJV zJD`YcAq1Nc3!8+MxTYv?`SEO^#+~{FWv%O)@lMkqY6*IEzfayGqn`}=2^VmMV|EX~a#|Aj9}a+*#>^^i26a}ePhB;9$ei@( zwNmo?a5uPoDWBKkI76CdqJ-&lA!7l2J)1-z2u1=*vfb8q*ZQVBas5$DHl%_Q|5Cu& zZnlHBVvB637@7DkUPpcB^;?%s>nUm2tBY5=z85}cAWP(xow~jJ4{|}eqHH@iXF>Yc zyFcErC$)KK6|lfIe@ zvbos(3xAGX1P<)sh%Shy2zJyp!j3%BPzH##1R^3d^T7!aFM#QBORFdZyfy^6)SYua z^xLUl=^puU@awPP)Bq&Po`F4mHkw6eJ?YQl+5uHiYjtyXEsRJPk940=&Ld=JC^g}Y z-fd}}F7p3S_~a$BsKqSPQbySBg{i%T3{fCs;|rJ)Bj_ymbkDB&wP6O3i2gSH!@inu zo6+vJr^}K=LT~xxSh?%7LnKFK#o?*uW-%o~ngG+D;=BP{@z7ZA4?|Yeed5I48Dc`; zvOe){g+v1$Bj{>g?lXL7UikyYPDkP%4c z?^WAQ7I#xD#bUp{Aezr&k^Wq9@7))N#4cWbVy``YUQ!&w6m3ws0SCD$MCZV#;J8vd zJg`T|YAxRY6|CO8%uf=kz`HL(q2cu|Gx=FI(`Qr6gI*dt#tRN=HUxN3z;1=0HvvlS zyYyxn7vr0zLvY;N!jBpXncrsX`H749q{Q1r92IhB*%8UuDlfN*NN>L|jjV{~Nh^+F z@&RrAf@;~4BlX8aZH+%5(}(2%y@=q&_+g{Fq=^%QFrEnl0tQ5d;CL6dgNMXyG4Hm? zmO=L}nc5BNc(=zr6BB??e!=T}_6|u_(FH;B|L^r(J?whMY-W%txj++ z`R5LO<89X1{a~yjp!ci&P3mVhy9W;#j+7)j8!{*CT3T>*Z;;vSYp&iwKR@-cl4@|5 zvuCYD-Zi@1OcP+skmRT<7vXrX4Pp4=JgkW$I$~m4K&Rco9MWSL?>$w(+jUmB&nY{| zckbbi+Riz%6hHqmnY*jYf#wo+HnHeOsF~nZC{-Y*2hk{-r0=l0cY1Ni@Fb9oALD8o zP3H=4>C1U6QJQ^qjL&mO?5B_~s#_R!znhN%w6cxzFZcVfBeW;*wNg5!O5`L@pvE2bZhZbGpO$Pel1Vme@+8ae>ZoA;$Zfr)6;f(<(;g5%eZB49Ze z-MxUcp&mmn9EpKA*4Zl@Z?)NyQtbZK>RFE;2WnCCxi8ZUYcvw&9ObXEpYIoZ_GtmK zqoKQI7QExP*d6v28XCV;zisqgPdv-r?s4&C6_QRONSBxif5K)|iT|(`|-@i9rO+JhJ-Qj$`)IxvIQL%?PTGEaM`|Ka&mQfWb zhv{G@wN1D+Of`ZWw{T?#V&+ekZ{8CW&whUPY&*R7^Mu zI^8*QYw4Y+f$KZXNy|1ZN15G+5aa$7+E?PJa~lcTr_8;AusIeE%k``}yfU=R!4Zk^%is;qVeEH6GE7PsVkFifC;&3m2L?Qyw`PCfl-}qlj zE?mI%RN5|tfjtxqea_EIiFmL(U*qbg+;6)J?2cC=BLdF4yB0PvOYEQ0>~=c4bRbnT z`Tde1?Pf+&tziZ-GWf*QqPw+0h2n{H5uc|8CG4j>D9+=!hJO&MAovJFxD+>^=&~<# z8LB?$gH?T}lSoTX)0Yn|#Pap<9lYF?xU;Bo`OLhknEdz>Q=0c67=3JOaojjn?~!H^m{cv6lpKzL@!; zwY$}%K01H;Q{1U4eYL=dl7Z3{z+u1(mgV{7^`zO&$oqAkMrWJ4PcrnWb`1=R<*v}c zRO9_0V*?!KEuU$g*0&B_3lsr%`n9|+fT!crUynAK# zmKr}`jNpK!=&@`s0+2o4T_L~{A15SsID%93q1cecJT7IEud*{uIR52SUD4!^XPTN) zh;NVEpJ$YkGF2J=1Gk-_Y}=T+^uZO;a8_GVM}Zb#RdOXaC%#FU4PjCh{+RCWoc;CS z=#R9k(Bc#6&uSv6GPl0jUt*opElu{RVnfgQAs->v`As+a>mzJ*Ro78f2Id;d^Y5&( zv=dMKU_7OnIuQKEr$*IQ;x`GpyG03W**jZ`9$Fx`6Vk;1Xg`K`_W(VIA3@2H9~o+O zB%4^}C9GWh&h_U0SkQ^#E2Gg2(z$!+3z(kUhP+gH5-usE=>Opc%p*C>1JC*T!3e^% z-Ip2MJ?4!2djB_PrRzRYu|~Ebiu}uxuf4nf#tWG{Hrm6r48S2+1zG`G6g^LXgUiPg zH@!lL&(dtb!*>l-qg*?T_##BT8?8cpa$V6g(Cnn*5C0WE?UXa< z(AP_bDE?Wf>kRQA@yPNh%G<*4Yw?rP=2wh-*Uja+pBtZskPGL%-|#PFICES-+*jBL z1kNk(MG?~H5qQ?P5L|+j+oQK6Tf$sY6$*+MoW9Z9Bo~w2PVzp0Zni<*7 zbyN<5b%`x*Aq;Qcmj~jNa9R~wDjZan=lce_UnXhhm9)g`TL+1(kkuP`-hQxr=G&jF z=@70L|9zU__|b#1G+c?~_mmhTaxEW#r67G*8iYae#DOpsF?B$dB?JnX-?^o)P-Yxj zZNKJ{D>+f-YT~mY`cbeyT4Yq-amhk#*12Z_kT2^~PVo{V?X4q-+2P*qz|B&_1_I(F zNOozR)Ywz~ddu(W#&{@~%>D93FV#;rEDu5R+r;_;(WqOT^VP^79WiQRVO)H#y&Yy8 zCnUulswHiyce;&-MI{SB)dHVK3xB@PkUiuO%@P%dI*Z2wN_inC9@FS8b^T!t#{Od%2jG}NRKti9cPclde zso!1*%`Wq6g=d8_T1()+>^(3uuSP!x^9ydZA$x@bRF9v=4k^cIITu`L?QCoLFr_pY z8yw+Z?7`Vu)SB-2G|peDVq+k&8U35!Wu+9=f=BE{6j5)`+EKE1V9997@O#KGeCTo) zmb(eelA5ni_vaSd^(Ohn2b1&6pW@R0w^K1jjm0&ctgZ$-R)0l5;c#H>l|{ zTyMg%ZW&5-Hhr8{@6`Rf(SOjAOPD{K$@1Im#uaQ((jTU)VE!UeS*EGTn@AY-0vs?7 zMf%fu7nPrl;YA0C)-!){hI!7_IFCw>QE*xkUJt|em=kPC$8-+xZnm$&1`q;(CCC*( ztA7MAVh@47Ys7@uz|d;7-ZKP#hH{VI67QS5e1g`ym6_c)R++YAOj3Ak175lv7`Jqo zynxUhP4Gkg*8GIczCGvw`t)>lQ#v#}vzN=^Dy~IlAOflHFJu2!fA994n{Ooiefcxq zpamGZH>Gk}xD6t|v?uD5OKy*=U4onmdNY|ZvM9J$#a?zFT2XZF* z&QmcV$FQYOc{&FiK;?hCLN0{n3)^*0h9Xr%KQ?qWzJ6OU>!{AyT9x&T?tcfUeWz*obYTK|lX|Q#r3Q{~kbDG3iXab@!zWC(hu*9kh>|gam zfqn9>lf;j@6XX0~!jmdUAXB zzG+MHKUFFYF;DSWAljCx z;yR8ys%CySw&+vHYv83WMY+;CnxJ4JckvYNneFF{2A>kMGJCaTN>xVz8j>(y*cY`7z~=9B}282&--e2G!}lJ@k0{{cJ|OEeYN zTEq6u(`D(ybd_eHZZ&6g33wEdt<2+FW46XA&3wpqs#iPBSaXkWGYDWBkPw=t@)AVH zq}`@!k5Zj&U-X6Ei3}hg3r%uY$OJG+LyZ|~ofWHNs?ERQ?Ja%R{59xuooE_$LRPIP zt;p4BKyz{C%CRj~)h%i<0tPCfLE5F*gRb+$On{SINHohK3HKf+Nn%}ErV6utyk56| zc+Tdtllk?}ZH8W*S{G6Rl~;l637$tPV7563Z?Nr84&?H((4P;X__@)0S@oY5jNf&y z)b#$=458E!a033ppy|&9FZB!wS%bq{hdC3MTW*ILVM;wm{DXuiErul}*0c?FCTjaW zy~y`yt}HSfb%DEd8XmMB$pwj43!1AIR~m&rwtj5se|)e6GBrSCKSm1u6;U6%w6p@8 zTN_y5>ar!{;*#$@K#IE(9Qpep>kWhN4gA+XzKx71(BUo0RCRnA&yg|B`)zKkb{FLB zvg)FofGgQ;hyeqB8{l=Z`BN>D;8bY%T&dN&S9iO2y`8*g{)|PH6{BZXJ~&eaGX-J? zg(UbskQ}$;RKg3EeBHXeyDv{mhw_OXUCvLZXpCOiR2x+v!ha-meK$b=ra#gx%DVHq zwDqK|3@)=28;mHK*qp_^htBIQs!=#b8>LUxgg{8nUY6&ZGNRSIivfA4Nq0Cdf9rl%zEFRY_L|}U70vAcN2zW z>N%)2b*A4F;oiiSM1XTIYswYCfviWv>N}Fu2Ip|Mh$~O;pC+%@oNOo%(v~_AJvk~Z z z)s^3iz0P6#oMunY`(BWJG*vTJU)_eMzRtEFm_I1K%^WHTD~|ZnN+{rVUQwR}Ad!<~ zn5IqeWHh%0+)eMsaqWcdd@}hR0OIn@NMMK5q%ThLb^!E;MOZ-u%?t%MU zjrTZ(GMZ|Zi2PLpeL>7ru+W0e^Pog~k7{q`o=;>>w8K z6gTx^g)Met~!PgG|qD6n9!Vh;7@1}YT0(5nmBJR({j|#cJ z#0YG>L5G6tw`5|+I3f&RKprGt(u+HB8?DX+Dh`q$#%9na(fY`wKkok}UIHR%g%xZh z0#8w`0Y{RmE#o~bccWG3U4MCXQAPVjcb8sV&Y9NKf^!^cg3q7i{CW97<*;Q}rxvu^ zXkMZ6>-8c|qqHa{u03ljyx$OBMtGVnxEf|l%fToti1E_hB<&y1w4Y!ivKQR7C2vK; zuPz=)*f5Lsh}44Z%j#1c(W%;s&e23M2nT;2(B(=`phm7L8Y^mf#CNOCd$M24J%u;F z8{QJ}u+r@FHpgKa?-Lu{PJq*_t(Rj|cESrcVNS!{^*I(hbi8ypCt8f^WtPQA%#%P z2_>~$dmV!TvZ59Rei*Gx&qhjt?26P zcWH;F8&Ya2X&?gS7<$rq!_0%_d?5_o^1szJ2w6*ldbjDok&6M|HvJm=y@!V-Mk^Xm>jy;H5YWkxa#5KEU#qn`A0#x($7<;sj9))TnsDUo%YQyMCtm`nyW)dB)*9>}aU8437)b?hS@( zOOvf^<`u#plD}zRK20*cn z%pn3)baEIffVgEP&U=LewlI@?48xW+<`|cec7i&OJCi@Wq!Y-*29zSs&a4h z#Crx6QA@j-W1+)L^DE#~^4q}`+bx6+h@xKUkR>%Ata8^9Y@e(MC{h^+w(NdkZ5bar zh_uoQKLV53Ez`K=5aobxsWYCcYOQZxc9UyQ? z;CCl`P}ho=dUI6t9~53cmtcIH3woVK8BgB6$vmu!rh-uhIxzhKz6(_g#N>fIpE)-oz&!UWrFb_n1BwF=! zoh3s1BDs_YmQ~wGWE{&{X!S(tqTkpfD?Ug|^R?6GV+0too{Y%mLUxQ-`ONR#ebiF7 zXTq>_cvL%b0LYu~!70{g&<4%HHX3)b5t_Cq8)InbzjG^NWOYww8F-}G6)$A*!|_&pY1s6M>w4_p2IIV;j!IUgQXBe@KIaOwzJ`FFjh0;prXp#k6pgXBFUV)it|T zFt=M@a@3q*&>jAu%YohX*DFz55pZT3HvfM~Wkn*aUH%|st}rnGPw(PQUM&Mf2R>1E zM=z1m`_o0axPSdUw2HNBX{>WsNKPz$dDomH;{uEEb(S-lX*O)E^OtXY`F{A#c^yX; z^d~>3m4Xg0XgDpO3NJx_nOi;=m`p#X%TgH>wpuO3;QP*9`*Zfs=FJndigEEgQFGZh ze>eB6|3SF?Dbn;w1Va;-)|v@utzx^TEWU*nCWYo7#2&S~@9FC0#hE_04upJG zoYs;b4g3Rv8;`Kk|A52i#s{|Il`nz0)fK*Q3Vwi|gI5;$9?pqqT|kDmM`&^hX|y5l?8vV37E);{sWP>B}Od^gZ^lcS|S{ zl7Q|B;nP~N>UOyqs>}VqbjR)wL^Rk#;DZmXqZo?j>9ZIH%xW2oLtm#Bb)^+t5+gh~ z`?!{zzd}7j))YRi_`Nfwen4=E{2@9~CPrE;y6~1b+o~{xQu)-4VM$I}np7y>srLq4 zr+YD~{Cn1t^QRS}>&l{*J1rh}19mFBwrArm@T6jhmVv(=KOb=0eO@|3A~f;kOZo(o zYv$|8s@4|0C#P!C>Fm~&yJdTSC=jYP@LJ}pRrPl6y#@bIPn<`m_eNrN)3b$%xCWG1!MgU& zYq)P@C6*m1>zmvm`E#!33uNrJJiy86qFQb~<3egTkIBNI61bM;{ zAQ30*fP^(grnRu1z-P{xF3Y1q_-`S|U=!bzyC$LwyeG?Uzn!kX+0kOmgCAzyt`r*@ z5N+YTd|P$>ArzcVf6p3#u<7XN+kfPo$_-Fk{9PihYsWA$(J6aAc-j#z5S~5NvwUh= zBg*(1b9|1!;*ZY>SS9T*?JcYCti?Yp=W^6{o&;v;Hgq{iVro$MK6vfCEU+r&!Rx zbji^^qV=g_0O@^rFt}DC*DLW{BBscX$u_|~W~*kAVw4liD^vcVayXJs%&6Y4$eYM`EUn0+p8Wp2l34f6lg5X)$WwhWq4-zLYc@ z`;cv1qj^${4>|xN2H^G_NC$KdVWP4#&_Z%1e3(@qgL}0Rf8*k-^)<@oP0pk(EF&7N zf(4B2=RbCvE*M_TobQIvl%&r+ubp^9LcGKhAK32sQ$Qwhk3kcLF}wwj85H$b=AxR; z#mDHwqwvwbI@=Pze?beLK4CJhfDWQ6c_-85-Hcr{P}1qXusOT$M8j}bg5BTh9B ze}}C{3w~JH{p!y8D&L~Q?I@Suh1^{>L(B2T{k&E!KgPqi5U;^*B#0FZl}4BzgD!S( zBrXhQPLK2}Ujv&KtywUf%i#!{jx?sT9TS^G7Pr#S@y7ixyaq}#w zD{ZQ>9|{VPN_8^DsL5u_Ek1u!0hWsk`AZBIJJ~yCHc`wdQBp%cripcM1yC?7Ax_|1 z`kxt!QpJg7vFdwaUemVm_e1sK+dt-RYwztX5Y#9m%pU`6rd?jGW->vd*26`p@~)V$ zVZlEJ;1_?vmYhYBr3&o~VCqNdUv#UyM8K*s4y>wN5v;y<>8@@Iv&a%-zqh7Rl6MEmC-3J{t`e{Rg?9&gu^*I$#9o^H^R(rk-E|h9&*!U-Ovz zq1NYR{V}KSJ!*SC9k5n+3%6x0{@$QYWnCykFic2r3EXq@Um9eF`@^4L8T8K(dw&9% zbmb&3?k=ot%ap7W@%lByU|wLDq`PZ9;oIe*phHne`8k|5{UTK(j}X>l9oF;LtFsa~ z2MW(J2Pd4qtP36I>eWBba({lYDm8}HBt?P?)p-X@Du+NH2{Qoh!$EdwQST9t(>kEU zAK{-a~ymOzXGmhv{yFhWW za`E~#+S|q268{j_ktAPTd-}4&H#H+(=^0^guN3KEFBrbZL=;}z05xVQ3qyTSagS{; z*-1r1MwA{gKfkoL{5zT^rB$y{b)16?>GH^08vJTr_a^%)A3U&e&$LQLS}&umwp!Sb zyUu;SDN@hsX0R^XMyt!qJE7u&c?UdDqlIX=H0G8qpD&D`Ziwko4-9Ev}=Oxji)7H6JspCr52vOA{EmFBNx6Ci8(_re)dFiDh_tc_B0jzPhxb44&g{K+Yw5 zpr*V?f0fG%U;rS@C?xpBwM8y~wHzeH6vtZKb^9~vTn37JoG=}bvE<~PS{1~bOk=chH5ST^u@KB z*Q|@T=haYG(INny2~aBjJD_GGoTJ^Fa_GcJK*^-FPR&IwveVO39L$C0)#0w2Y|XYe z1-aCoE}rQMc8olc)n*wV527yH`P(X=W`K412;($M%iN=2Loiw%bJ`^vO&aeS`2*Zt zT-{C6PrtA3{;bgSx*XOO4L1gw!DFC8GaWTh#&(>=;*05WBkI&aU1PFfYDFyvOJjAo zh4NCCzWnI!*Jo^w4fe`{_nAMmKRa4k>H!@rAs|^$fhO(Pfr>?ob|;?98tTSj&FH)O@Cu_c@8h!z4GBAoi#uX*zgCmZ z?XXGWlWvK?XqAU=k^YGssge2Idj9xSM#vKn?`lgx3f2nd6RtEa$*Hu0GP$dIm+I0l zJY|`nmY=+KjZGoJIgw3h6?w#(D&&2YnFtzV5xnV z|8XP0CwNNjs#ovqvRWs_#bu5;`gl0%z-lFGh2*qC-~DP%tc9Ti!Ex69&;ERpnM!1c(l=!Mbjmqw zu?GgCydmYfA(; zE@eSLk1Eg(V0-!eb5VrU{dTUKt_Z65{0i5r*?976zNsJeZyXTfP%P(s%`_OX{bs|{ zq#-k1d&HW$qEO(k%_J*s4HKpaj9=Xd!1X;3$Rmo&F_FFyWw(hW3_!t9*u@X$@}k|r zX5RG^En800J$^RX*L=L@AX$0mV+A{FoOaHlYhNxY1`*l_qc_up+yE1*H1~stzR1I4 znVZLcW?h+m{=3R~w)o)d=|Jl$xvbl$rVr2nlPoMJ-hs6260e}yRsqB4(A$CicL^7b ztDEW@o?}BnFP8wc!7-m4Y@dGkrD$`sS(*3yI)d&aP4z1wc2cy_#`&+VuRUQ5V2dMN z&Yv#qzpG4bBZRgGa;MPT3Vzm&0@{>fS3n&8&O+|byN25i9M!79 zJY9$5#Qk{N(W0?sg>Pdmj>LNReTF}6RBQS;1_mZI9W#UcoF9i_fnHfaF%kGwvC7nD ziw|B&Q^!L6E$VAtis&{*Z^wRgzxBj)`oUKQ#)dXXwvEP*!l!PzksT=C#xSx_USEd@ za2dt)EI$|ryf>0`3tm7!{&jcy1ogs?vzphfhV-Bz+#>to#Zh%}WEZ(VOH(t;QupPp z8u%T>lX+7NXS zDM{b=8`^DB?Ppu{?{JgwpAK14**Z$LB;3i5z^gF)%u^@Vy(xXQ3F>h#PNCxV*8>*r zvh`nazNyqE%K2FHOaKlzKee)PNZ(XQ4siMsuq+D}<)PMd6px0nhYL6SmQ$u&uI}uL zSZuqs@4ay@%0Z}_j*`q7vi;*pY|h=uD9DHAu%V6epm8fV*rs)zw>i+$y

Wi2kB9vvK-Kn(WvvTD}Y_TktzT ziFUB2mCX%?crd!Qe|C}H>*4ehGh%#P z)Z}^?jbCD39imTltPAwiL`GgQpBK2XX^&|~)vI*k{%Hf>W*><4w5s}0MBu$DEWQ}& zn5(4Lf^;s+UMInjY(AGkPu7Jg?;tUfY3lJ6Nv3Byl@rqiDf9VCJOVGO*j3Z5E&iql zI_^9aH`sHGru&LoRBjtxz2`3AeS_`2lavCyssKKKoA#t&rdk;=KaeEr%A=vHy_Q&k z(2%8&qskoj$Goc5LtFyOdN`EQ!Me0eNeiO=j z#&?2qu}DRDzp`d8Yfu}XS^-0Y!V#9Y0AnK2{|Inq2bNUeKJLoR5EGHAa@?m$<`th+ z#CvZR363AWyldZI@?Llb-zm(SHV~i&U?!1OW;A}jj$@mD^;e6@8HB^$M43!Hjsx<& zJaziOH?e+k#8@M@=+g&bhf&zR!z5N10J9bVb z2$;Q2iJj)G0;~c{t=WZ8w((&m*eG7N9oI7=4)dY)NH&ij;*NQr`wy19y-$s>wmE`A zsbu;F28nO+^;UfyY58Jx@}^Szj=gC_Mpv+b7&J$AqQpS)AgG)K@{KXb!6lWbbGSGa zJzwRtBIi(#y9CR}V<+=EXW_N;rXJn{w(COAqL*GMVd`v{ow9xzt6Qj_Ep;+u$c)-J zqHpy6Ve;|e^Pdt%dnadmhq$HU7Eh&U_Xhqdi)51qHqoyyl$z0kA3);qR7TOO9`(P_ zJfy!H$kTrEiW}$DYGTBNsvBiLUi~|%)#F;8Iuk?ZV4<3BwCW$e)fk?swt`-H6!_4O zPg!Ra4VOS=L!*6LV{=MFg`Q{ols*)so)CBLp8J~rd}?!|;(2yuEq~SU=*I<`%2>1I zy)<7<&78+_ohEUs0+~$QMzG~uVIypWz8;&$b|9O(g8{&;@IzNN0R+kx<|H&gb8pH| zP*3zTSnEELll6*{>@qntD%X9bTIoZE^3NarR~{7bJE{Qj79A8&0RQ zDA#L-oBsR{On=9DJ6xYVq9ZH>?8B*UTr7^c%@;AN*e$_-SgdDBMbNI+PcSc@B=seJ z*b2gO)6P5Aq;CB!&U$(O!`P{>cTP_JdQ?z&=S;wtgFg=w%@1T+kJk}?UEtAZPexA~ zKH#rAYq($5)b{vcDzCfCJZ?QK=&&clzwTegNqFE!QhTNWk)MHxB3<%-x~KqnQD8J# zvRQJo)D651zi*yI^aj=V^JmQ~lNZ)*B+TzIHZealePkrFII+{cdyU=b*XtDTQw==7 z2Ky-m+R>o7&356FDu|(n=|?LPe@uP><5?>`g33f^3kp()V~?V!AX9Z*s$1 zx%Ih%u45Oyu}?9c%DRWMl@Mxqcy)LQ)%gu2;5t|Vr8*c$kb-k%vqLZ^`L4N{!~CnA z&ztY<9Fa;Ylf3OM&L16YpoSR@$v@VtavM1R@EL69DH74_#qfIyCFnZlgT!41m`R6NI+FyI6j(KlRaGa zE5xica&QyR_pU<0Kh(lqW#z}8a<%CZs1GgqW?k8SV9E zLNEQS@R>!cQ6f}$e{l93ZHHVL71S2DDAV)>*;$}08EV}T&IMxCx4OO4AXIq%N+ z(%CJS5|KygV+i=eRL;#yd6&Xs_gQzXujujlV}%4YFTBnRE{pm8uDSGP^`#PM|F8wQ znv&sOHA8FltPoRnd<8&1F`Xs9K;nbNy;iIgSvleGEp!0v;(<^OqHH{<)vn&{5jyc{ z+;-5@yUe=jWA=TxjSL;9T)K0`V;7Z=0MMd5ILgz*%vl2V7+QN$!|EhwM%hn}!*b70 zSW%&~WR>RknF~<_F;?V4iKBdQRw@~{5*Q=i)fk7`y<9Yp%`x~z)BH2Ng!Qz64h?z^;P8@I8m_IFecU?Dw+o}8?I!9pU* z^rg?oCQLPP^K$ERi6be!6mz1%_b%+@2~>rU@Md3QB@1{HvoRg52=M*?)fENfDgelQmH1U&7y`h znJbXy*!%PNq^bn4OK+(NfaqW!0#ZV3P56<)u{NB%8$(wFKA*)nVFImExPjuX)jlu^ zB@gnZ(a28px2Yy#n^1d~97`H+-4{|1O#2Kq-bo$b9DG81b&-Pm4nL zy6ECDu*db}i7Jqs8BGvQ34^|ns7saI za+u?^Q=f8hE@Z8zHAkCun71VxulrpeXLb@~e|NR_igFP-(h7zt4%O;TgQhw~+^Yrh zic8*=dkYX;HjY>NhTM}jT#}7#_bR%!loqLA#FplAsmDey;9?2Kj)Lx&pw_x{nGR>$ z(!V?jFk1Lw2^ODRAE z6@CVJw+tLB_Ji##HC~f>`^!)JzKFghf2}^8M5Wvr$KJDnl=|Y3KE4QmiqV1PHV_Oo z+SfOrcDr2qYw(7sbkVWp$v1)6e#<8nZ=-2Zr+>T&HenZjWD6e^uqw>+K5MjT{#_fP zTA7Cge7$wnAcq$uIquK|NJN5DcO_!uZ^Sr8Y^+&v$#UV2+{Xz=k93M_XpU}Z=CWzm zvmu^NMFDLX2!;}5w>_x3AdFUu^&|fdF zyOx@@dLn_!K|eA_(AC9Go3*#{ppKv~kuZ(;Ly1Xo3|ttpry4gljjvtFqlC_GXt<`G z8k;vC<}=_(o<6PiXOFzsyLk362LREzVpFjk25gv-ML81u$$l*G=($r+xSdOPtVhT10@h4PZrW%}#_02hl!8`m$y+0AphlVG zoR$imLi~$U-PA1k5B651RT8|>1~mV};_Ki-1FcFb>8OmF)T*~%Z_2F8B>EqNnmVV< zqj2_L0MD`D&W4Una zA||PiX{X-%-nboP{`=ZDUyi=Jg@c@WllDuf>o<$IIp|z-WI!hdhh!@wTX{fm1|+MX z7@)a!iqke0{*)$Ix~}=4aKqtiGRt;12lEH}RkfA)J?+4HpR5AzUHXv@uy$3{fvz1f z-?Q^Xny2`>>FVSw1wpC28)=G4%^BNAp3TKEG`}AgQ=@$!+K%Pq!L+e6@lC0ZgsAo? z-?wS#fuViD_RRhXr2A{2ABrOh4&d(+4wv;d{F4%Y8m|+c6)xLB$?u;6QJszsjNUjJ z?B~o>D2DW^vy}lVCANt`YnN(8&=P{AD<$Q#rf>hsc<8S*b1`2!eiWWHyz1dS7stE{ zk`G$t+=r0lu$g;o8hhZxQjO3Bvd$A)PyD1{cmWEXeqe8_ z&_s=uETFNEt*jj=YRVZh9~+`dTi2~OnfzH%XbDJHHALj%NQS6XETS1K=gcXjomf%^ zUR~mBW~~xxOXAcd+U>@>b)HMVtpk-l1%C-jP7yvYvt8nwwAu>dQPC>c4tW(UJ|`cp z=T{!u)t!~63{{X(4sNKPu;lc!Gg5_@esuXaI=(c%Hm|GWIDd;;AXj+`*cEqZMwMuZ z!+3T7m^>ZGmmJcLQTq|ZWqp|Xq#0i3Jsb3$cX+kjEaEgH*K~%2>$ygSX?s@R{rZnq zFx}n8%9kKvTM=ll%KxpsFaKGQio{_^X}J5<7x=Uw!7Os$h8cZ{Je5E{3HeYjj@|`H zb6mzQCg~1LkkHq1Z2 zZ}pw-p6;}eKF}ZmfL0WxE`#C(=FA5~5>aFoeEs%`g3!WFJKU`M9LS}5JGJY#WAD28 z^haUn>3IRwp@O(sV^*Qwqq}V&`qu|Iko|$B7hF|S4N@7imrB~UMsMfiizl4W%!{8&PLd@L0{h)Zr62LYH146iN@sR&uR%26@ z50Nf^aCG)u@->?N|5hA6DCYktjy_PF{C|qG(g+l1#EN2`>o@i=aqp{L7*YCWq(K09 zj851U?mSK+-NU7io;84a`wh2=%E%`frp~I_9fGL@P~=Ws*rzx)q!*4m^p)|%0Y|F5 z0WTCsQG8$W;%iE}>r4XzUCLFn5^=_N8f3{~XG=hd>~<^|YAD&gq7(g}AmKL6>WHlVCPvTul7$?#mVVe@gB zzv=ApE2^QPHi}RW>lfucSIy%0rpi!km?j$B2%ewDl7V?L%H5XgYmV3sJobs1YxW z(%8td1lcDv%fgN0A_mz@a1QrP>)))5Pbt}TPe&*9M@xoJFh|ZYWu9#AqaVqnUf7h- z0!kr{)+UE^!4pwj&7P#G&a88%_T3c#NMPpe(5$pRpY%DmVcs73AoICz^U}ip&tsJg zC3yZtQBITAXaaBw0%XDjlyF4ocK^lpA(+<}6@ECRQaF zux+TWCEaNg1xM2PkhFgS)>8#iM0M;Yp;&9S-ROd}OQ6b&T8a26at*H3ni0WTDBj+h zF3o(Zn&))fd06cC!UrEZd?|+V+5j`+f2`WRR7hLX>K+18Ox==L6p6psNM?kdrsN(k zQGYfw5O7#n^2b*%t+2_|RWzaem|A3*)0P2i7B2EbXZx<|~f6vn(A=^PT zn4<@Pneh=$gb;M8h`8DIxF9#vnPpea^aYb%-bqs@)I#g>?6O!SyifxtfEmq3kesnI z!!}UWJCKsE$E2J_@P3#N8P_U)nZCDc62EJ*xnRP_cH!4|AE~+cXK^0S^51S54hZ}* z>w!ZGZSfD`%nO@gTJ5{DsGhtNV->~O|GX_-0>`6wuitP8<%MgcNcOQ0H{kiY9%{DX z6X}v*2TGk*GbgL`zJl|6BY4n`mA+xEh3_^g^~bZre7=>=3te{&e+b@2CjAg&yN{ETq>6hI!Jem@h62EEWHy={OoRI-+(`4|n=RPml=A**b z=n3lie;tdbHs6`+`;z17+&tvAM4+t5StKPK!EJC9&b}PmjHrzN@e$Ds@nfH|ssrr96hP@Fz73e7zWnI13lnDu4{7qAq@3 z(Dvwp-y;zn)QcPA{CCAvJKEXq9KGqK6e(})kreb1MHK8wL`1v8TF4T`C87CFwV&m< zO5$ro2mNhdRI@PZT51=){39i;%zvv2#%Bl|Pty5ZW1>mwwhh%E+xcb`Be?M(|B38& zNzdQfgC{9Abv~_5&y|$kEtdR#RWHyhC_K;y_=`yATfzxiPXQ*aO9NDwW<(aGAICkB zn#4yXy1ERtd9o_r&7j5#@%(!5%G&w?2E_fMMbPeK|1E#2IcBVBxEn-_YD(hwZ4z5$ zQc)ux3nAwU_nR7xZGmFzEY)WX<{He~rQ(^BI1hQK!E_vo$3QLg%aUi8e-x7SKbTjt z$Z!wtn7VVbH-VZO?=l>}y}zKUQZak~PR^nh>WtjpZNK*?ECil%2S;L_|6i6~r3OfE z^nQZ9je1;fAd6+C8IiX~0$K7B@i=Zb->}%_Mc1a%lm*cN<4-Z_#UA#TkAIVsL`Y&r z5p}~IjY#|l-tywMMyM=&G z5+!8Gtd&F?L9ij(;RnskKQ}csxA;X*?hpjx98;#9JFDUs zc@a&FuhDP*E#w)!iy*~1x*E*^Es<>S0Eu;1G_-B$Ve&huW8WX<^Vxeg>$R}+jpzeE zTImeFx$SZD?eTlIt}8t-%r};SK?cK{VXF6%&v0>U)?|a& zptnB3A4)hs&(6PX!pFqH%DEk19P|D7Nbgg3^nG2<{I8Kn;kLWLfD3)qfh7guy12|b z7Wr&MHc4pR0RO_)a)}qp#?BjMYujhb75wZ6`*r404xE-&4%=vAz;Zy4jjN!a0yP@zwDx?JR$%*~V5x`yh z0l<;10_duts@twtf85=F6@?}Eoin*?bFS-p?#c!1$*a!Hae|WDXBZ3-Wp_j5n3qSz z%to7aDME+UD~F#kVkYl5aGZ2`ngUrPvqq)-XR9iymXa>{vWu~g5|Oj9aTX1F)|(HC-# zdlk1+ycuI}{V~Pb1B0Vzk$oH^jqT8F)$>*ZEImkg#{3;ae!QNHUc94;TK<~(jI)SjoVnW zg~$bFH8X=(EbW$A$xef@lusGj)Va%yHjos8CV=a}ksSObLVL%qK|%hcIZ7m{?iE-5 zlWiBVLF^6Yox*P~f84noM<5tZ{z~eeV&bGur;eE1Gw;DX0^(vlk^_6dd*tY!l3zPi zvZUm(^gE7*Ep~+hJ7<-veeNFknF^$Q+dh{$@kV!BZ{RfFE&H;5cHnS7o*!h$NX6X7SN}EuqU4WGl6c9KACUHvlqbty<6)MY&t9+YQZO4 zcU^3(3wOQ1wEemLWD1xBuJp56e}Z?UBN+%-(t{Ro_T$?Ym(LjAldQ6&5tpSGAzbBu zbR#Twq04GT?CF(_*;7aB^-5*6i2~FdZo|sHd`O3Y5~Ba0;5cY-(%KTDw^G7q5k|f~ zr4e2s*zQs>^djwuppS8w675x-zuJdO8^z97d`?0;{>XlSP4esz%Yvp(| zI<8F)i(lu!@ne2qKj-C=!k5-MaY-0m^6kom(?R8>`znk+*-dUOpIzo-5`S)gKile= zbT|2R>Aa}7y&NNMyNd{)ZL@~pRDI)euQv95Tx>l?k95gEvY9kgg$*e`4e+wDcbu~b zi~Q#9*y{LcxB9!NrmOiA8sfuY00~Tx#^2RtL1OEqv`Rv%)QfXIS?8?M7uNY`!eo}N zt^iK2;CNTfrwRR$^Ic}q7s~hA;I|Z=S?P z8?R(9VjSu@xnSrh0!olRK?hD~jGwv_$vu%&hGbOf@=B-E$DG=*yvA=|R0Sv)POdYW zpV-q%Q}naydC=}b328r@O_E7`oqqSabOvB|n`}d3@!9!x3nd*$NY70W%xdNPXc7;aYPG5w7bpxn9qy>YTAFpp6 z>=$HAjaH*Q3FTNu$kKlDBYu1bAVE7TuO?cLt&Jl>hol}oejT7Um=F-9g7K|~~~!y)Z2AR$?xuU%!N~iZI zKdj8Wf7@IvN#vgtjpAMmmdk;YFRsQN6hepIe|G0b7|#rG$?N?eA!gKJT1LYsBehi2 z%V0V}gy&j5FyEMza}xV6WTR(F)Q)MdTjX|WuNyMa8inwVEox4F%GZgj?u0VGh2b0K zC24766)?ZW^{S1@cZ9jp<@aj>Dfn)4|7KPNUoOX*nGRFk@R)v$s&EhZlM3c%+s^UQ zo|Slm8iq)n<^5t|DQWv*&a97iDsbc7KXl5etx>aN#7bv zKHEEEhr?j&vXR$%YU7OWo7~*M9H0L{_DrHR3{)h#Htc%fL`#L%^L(uO{O^viaVdT2 zkeOl$*pPqBySP|8=kEOjWl}qSukYH;#s!BihuIyk-et-_0cT62_I@!Fe*$~PGvlz3 zV(mGgdWbx1ZDj&Ff*eTakV15dBZ;`gR!->TFn*yek^DAS-(c!}t`A{k$G2Y^p$@V? z|ASrD-wOwP*qdbd5|+jv-h*U0P)RnU#6pj5fKygF?_9DE;cJHmz|m0p8Flxg@@Yqp z?4iW(&!j3}XfiUsKh=z+v+EK7r&%YK=zgi16Dukj(wi!71%yAC&tAU$)- zluJl4{&SMJ+Go|JH-^1qL@ES%>h^6MKDesj%!qfi6N#NJyELV5EfQZTU7bxawN)aQ z0($P3ax9+er7ZBSeUtWNXopc9{s;213C(}_Ve3H2xa04~FIw>r5nSI05gy%IO?s3Z zU%+cvft~B3OhF*d#3FQ`)ttx`}T zF_kCOpNj1I>oWij_M!`_zeE#OS1J09HI`TLp$o+vzX4Vtyt-8v+hK(fr*T2`<&f~h z{3mp9G92f(y{l(@8$Wym<%=Vw7=e1N^gr7~`Q!Wcgfo|7F@7&3Co1{Cf^6N&0eMg^ zk=ANJWsV+&7j^d>M0a5FQUvVNwz_L! z`P*IOwa&$J5D~dGlqWX1efQxoSEzAkomo#lkKbU?O%C#-K;MA>mEorA_4|J^+|auK z6`TGIkK1}hJ%%X?vj!GQoJuQT1Fb?M= zIFw)!Q=~pR`z5S53R3vpg=2zlZuUut{E#9Yug4NlY*2pJZxNX{exVuDubfNjnv!pI zq&Nqk<4w*IK4q;RPiLo+GrIo=kcZEV$d;@pT0ZY(AV;D+o&qo=jO7--Dz8L#Z1JVq zH+9%5&};a%*~E54EsiEwzLZx%8lunjgqdcND&5CQ$eQ`07A2@5mD9_bpWHm|uCio0 zQ;uKOGUWSmUjhU`SqP-@EMRe(siana>#*Uh>O<}=D!@FS04w@E7LN4nApP!G)ciuW zE+O;v-Tr%{}V!Zf?`53&ml>^&x)j%9ji^opI{^N)h zq6rYH-nc=D79-7$`0#BNTe5y;S=`H-hFqK3BHJJI6`NPooZS^1HLj}D880L>G-=jH=oq~HnRT7Idi=z>S|ln z?`@k>Z!pbvso>ZSjuHE3i)<$X?&hkAwQ-s8eM`mRV>GpzgyDxy_M2H=jas~qg155S zKhE?Xy;0wRHYUSOkR&tz6fE~E+XN)Q=Ak6UxnWJ7opl3QZqSfc-J@eszrI#AUJ+*V z?jS@OP@sPqlvgJXKm`PVCyx4C5{veSN7EFS%EP}XcHuYsCueumf`4l{2|m8-DsR7V zWzpktCjIhFYsEQ`&=>uTrjkfuM%Gj{gUo`ZE4k9XSZ^xRVf9uoN)hQvovY8w+%s8A ztjT8q4mY{8i`~HAGy^Ncf z06RPZLNf0+l|TcBfTEt@g?|z*zJ_8PPXe5=T2#T9-^fh^)IG8&_$TEpRU{(-U+lvJ-kSSRgcJXtM-d!feLT+Jm$!KlKiK7Qc z?+qv)th80dcTylF5G6fBdg}H-1u~^6y<2~)bekAqI&b19a^&MB=cKQZ%zlqQUdE>- z?EmNln4;$kk;ys@h**WFnzhCvreHIGnR|pyFUtHiOsCMhV>P+GbkmS+aVWqvpid@S zH@o&ES8H4(%nUsVz{TSKg2gg^b}hDc(0(+`C5GXy@(=&Pru$0&gZ;8%q94(qDk1)q zIH4fB4J<7Kb>s1H3rkF}u)sRt;#)=R^C4;MsMSaL(=S5~nT-E~F{J)MNaRD3eOvW; z24dh;ZprL_T4MX3mMBm_5U9+N059o)2IZ)JpO2%iX?PB@ZV&$VXg-}qWe>h}=?gvT z4IpUlqot;@(T;giT#0n0i7DiP>OTR@XxiWtuK!@t8*2#4|9phHQ8jG?8xJsqM+VHKFi7>eDyc`fBGdR;%C%Eli#6LZ3;p zvFd|-mhoG&cT&9tl8ba_Xzy#jmnx`p=gWX3y@XL&D+^-NrzU0#tP)H(eK zJJAKd_=}h9Y*Az_+w5vD88-RFJ9`j z$`lF6L_KxZA$z}RQ?R2c(Sbg1do;t=RZOa%LFwB?% z(2MiNi;nAu7BhP;Mv5@3{T)%Ae|_k^_Gj$AU`QBDbXEL}SwRtd`(9E`06Hq=T~3H+ z#`p+hr9&k5xzV(Y*9kTR820Z^r2G&I?1Wu@@9TMZXQtt7;QT1V7b$>%%*>Q$SY!tih^beONxFZ0a@7dzB=HWsiZ;o&Z@Sz?P$N@pnRvo&FT6PV}&_(j9}s*{<| z8<-#NT71(IZyI4LlssrJ(o2BrUBC6CX1fQ*j3*}93o7oHoV!tabUg?5|9%><|Gzv8 zd9ffwu8f7)Yg=mq*NM9tFma*POmpcxZ`XSaVyig@zxiSG^2M}QALWJeOZXG_{zbhx zl2WwaJdfHn-MSu9e6MI3pgA+)9}dEPodr(mMU!QW`iBUM0w%qKO{04O>VEgYvZ9=q~A;Z`G z0#(^Gqt(m@jcYmw1q>-RcWupCqXzUI7TuB_0YKcFrsp0O8HSHlrFb!XFxnl21q=|f z8B$DG8hY9IVGIo(e@0kw7q9Ku7+dMR286i>I{Gd0rb53F?)%*O@~ln(-SCL^Kp(#c z$LMqPw0cP-z=oCymDm>p^sj?#IWQ*PNBkcigx`$)y7qc)@?XaVuxDH+#BC}#ALm?P zYk?_){%^uD-qU^RFE}5_F~Eo!J#wOP$lU*tpm9DgF7g;x>+_(0{laA>o-cvILnhAf z)XFd~qO6#G(nlNCsJ-hWFl})zCgI@Lv}MUjQX1p6@E{}sCj#{s5~GS z_dL@e(`ui!7lzyH;t}QmLk^iyR@hL4LWHhugy!}B?i8ZlWf*ONRsI8qs#K)D98*b_ zE|zDO_1fw7TSUf2>b8NELBz+3s1ag?O#UO8{4<#re?2Q628Y}a&6?vZ`wX|EN60a~ zc-XI=)L=Kf;-nFA(n^`QI$(oxDP#SSF&Zf~%ky-ZY$F>rs7?wT7pX2@f8bNkV0PqO z4lHGHX7@ISknH%B!&@(&^#V1n-Q6LD2&84aWulj(^0_L3+zKf7`uF>uA-|h!S>sA7 zOiDVt&!|j$o@|V;an7(qsvUzHayd0*u4M$i&KOvI(T;fzfL{#Yq?kyHmGi^^wBTl0v1lR8 z!|P|SFGbOObAHflCkH>@{k%UQo%7RAIsMm}e#`;Pp@phwpJ>nN`W)77^v zRo)AK?7{pzSqS0}%x5@tV+_SkAPsbc4vKHlHs6gmpU(DiIV~?saZI+VuVv? zc&4uFTH&tPTNCa2)3%R)^^`Y&8#E?@lA$ZTukcrQx;wp2@VN2bv5Mil1q+EbxW}rq zrC2cbQ10$CuCgKIlN6tg=ea1{Ung*|+~>n~Mf>-I!qBK3V}b$}u}rS}m(gNG?;F%kbp zXRKyh%~H^6)dnP(;rCwUzobMp94wD(waV1j<(NkCea~Qct)i0hQtG23`wrc;(ICWQ z%j$y1LjR`WaSf#_R;IT-*@@jlx2M5xW(h^7LwvjTJ~%{ zFHBuqbeU74Bc-wyWaQj*X2Rp zyf@d9+d)lL1A=Q%ekjoJ=of!PEL#$TMT2LaJoQ2(J_mL~hqgX5%PsddQUuOC`ZhW) zL5^YikDQdyUpo_+M);xEi#-#57!UQ$#lmHNK!7auJ7zI61pzMEyB-a^1$V1v8H}^) zTJhU586!3?mIMca`^)Z|Z(FAsqKf%#8KroW_luZdB60;ReL6)RQQ+%UogR(GzDe-o zZV$x8ODoCXwzVXO;OM}`1UVQoIAs&u)UcXnZNhI0>&oxVY$ZvI+ZnP*B2?c3)d%8= zRq)#~yuSXsSV{)8Qna>L;_0OgzVp)KyO_WZ`eO4~E=clN2klJk9=!D!1Pz((#44md z1H)2JU4(~S(ME$yMXoi_Kml{_RjRGfwmKLfC$>T%cLvEMT-R*7zsa5H6rAo}>tQwJh*sjyzSX*`vc@as{x%SzRC^BQeKB zhu_YSi5n|&h1wntdgPBp_kux6d1)6ssIfC-4TxEMo|FSbxc9OsSQ=cgv7T-o^NUe}x1uemr6ExXDAM9E@#$1J9z$~&QEh=`YE^-A7eFNr&jNqOQBZo`= z&##AoS%h^zT%v#YC+xo#z%cT>K?`yPOxKG*3!vdMv)aX=1@O&*(7~I&0q{}68F|nG zxaj?3$`%G%0J6frwk7L~$o2nwoTF!%LIxT7*}w={j51|gU?6*)feojwwFf#h)xNHK z`Pui$Dakh-U})kIrSy?%#_M7{*Xke^&S!h;z1AAa`6dAk8iA6{CwwWtQg3 zqu=1y{bZR!xv#99VK*BOgfmmaz#I}4t=u1n|1g0m>-^Kv=|5_A@iOi$@83p>=Xncx z$^y}u)QrU!zyp;Ih^5IF?i^YHjcQmxG7d;8Jo#6@X@mJmA6rzu&jdBf5dTm8ODayPo(`M_606u+-S!Q8 zUYB+zxsDF*7h4$oHecuV*+;bpbk`h>dCrsuevouuZax^?u6#CflM&lQ Define data, use random, +##-- or do help(data=index) for the standard data sets. + +## The function is currently defined as +function(evalRoot) { + fileAlleleCountStats = paste(evalRoot, ".AlleleCountStats.csv", sep=""); + fileCompOverlap = paste(evalRoot, ".Comp_Overlap.csv", sep=""); + fileCountVariants = paste(evalRoot, ".Count_Variants.csv", sep=""); + fileGenotypeConcordance = paste(evalRoot, ".Genotype_Concordance.csv", sep=""); + fileMetricsByAc = paste(evalRoot, ".MetricsByAc.csv", sep=""); + fileMetricsBySample = paste(evalRoot, ".MetricsBySample.csv", sep=""); + fileQuality_Metrics_by_allele_count = paste(evalRoot, ".Quality_Metrics_by_allele_count.csv", sep=""); + fileQualityScoreHistogram = paste(evalRoot, ".QualityScoreHistogram.csv", sep=""); + fileSampleStatistics = paste(evalRoot, ".Sample_Statistics.csv", sep=""); + fileSampleSummaryStatistics = paste(evalRoot, ".Sample_Summary_Statistics.csv", sep=""); + fileSimpleMetricsBySample = paste(evalRoot, ".SimpleMetricsBySample.csv", sep=""); + fileTi_slash_Tv_Variant_Evaluator = paste(evalRoot, ".Ti_slash_Tv_Variant_Evaluator.csv", sep=""); + fileTiTvStats = paste(evalRoot, ".TiTvStats.csv", sep=""); + fileVariant_Quality_Score = paste(evalRoot, ".Variant_Quality_Score.csv", sep=""); + + eval = list( + AlleleCountStats = NA, + CompOverlap = NA, + CountVariants = NA, + GenotypeConcordance = NA, + MetricsByAc = NA, + MetricsBySample = NA, + Quality_Metrics_by_allele_count = NA, + QualityScoreHistogram = NA, + SampleStatistics = NA, + SampleSummaryStatistics = NA, + SimpleMetricsBySample = NA, + TiTv = NA, + TiTvStats = NA, + Variant_Quality_Score = NA, + + CallsetNames = c(), + CallsetOnlyNames = c(), + CallsetFilteredNames = c() + ); + + eval$AlleleCountStats = .attemptToLoadFile(fileAlleleCountStats); + eval$CompOverlap = .attemptToLoadFile(fileCompOverlap); + eval$CountVariants = .attemptToLoadFile(fileCountVariants); + eval$GenotypeConcordance = .attemptToLoadFile(fileGenotypeConcordance); + eval$MetricsByAc = .attemptToLoadFile(fileMetricsByAc); + eval$MetricsBySample = .attemptToLoadFile(fileMetricsBySample); + eval$Quality_Metrics_by_allele_count = .attemptToLoadFile(fileQuality_Metrics_by_allele_count); + eval$QualityScoreHistogram = .attemptToLoadFile(fileQualityScoreHistogram); + eval$SampleStatistics = .attemptToLoadFile(fileSampleStatistics); + eval$SampleSummaryStatistics = .attemptToLoadFile(fileSampleSummaryStatistics); + eval$SimpleMetricsBySample = .attemptToLoadFile(fileSimpleMetricsBySample); + eval$TiTv = .attemptToLoadFile(fileTi_slash_Tv_Variant_Evaluator); + eval$TiTvStats = .attemptToLoadFile(fileTiTvStats); + eval$Variant_Quality_Score = .attemptToLoadFile(fileVariant_Quality_Score); + + uniqueJexlExpressions = unique(eval$TiTv$jexl_expression); + eval$CallsetOnlyNames = as.vector(uniqueJexlExpressions[grep("FilteredIn|Intersection|none", uniqueJexlExpressions, invert=TRUE, ignore.case=TRUE)]); + eval$CallsetNames = as.vector(gsub("-only", "", eval$CallsetOnlyNames)); + eval$CallsetFilteredNames = as.vector(c()); + eval; + } +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsa.read.gatkreport.Rd b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd new file mode 100644 index 000000000..67c2c7b28 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.gatkreport.Rd @@ -0,0 +1,55 @@ +\name{gsa.read.gatkreport} +\alias{gsa.read.gatkreport} +\title{ +gsa.read.gatkreport +} +\description{ +Reads a GATKReport file - a multi-table document - and loads each table as a separate data.frame object in a list. +} +\usage{ +gsa.read.gatkreport(filename) +} +\arguments{ + \item{filename}{ +The path to the GATKReport file. +} +} +\details{ +The GATKReport format replaces the multi-file output format used by many GATK tools and provides a single, consolidated file format. This format accomodates multiple tables and is still R-loadable - through this function. + +The file format looks like this: +\preformatted{##:GATKReport.v0.1 TableName : The description of the table +col1 col2 col3 +0 0.007451835696110506 25.474613284804366 +1 0.002362777171937477 29.844949954504095 +2 9.087604507451836E-4 32.87590975254731 +3 5.452562704471102E-4 34.498999090081895 +4 9.087604507451836E-4 35.14831665150137 +} + +} +\value{ +Returns a list object, where each key is the TableName and the value is the data.frame object with the contents of the table. If multiple tables with the same name exist, each one after the first will be given names of "TableName.v1", "TableName.v2", ..., "TableName.vN". +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +report = gsa.read.gatkreport("/path/to/my/output.gatkreport"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd new file mode 100644 index 000000000..0a8b37843 --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.squidmetrics.Rd @@ -0,0 +1,48 @@ +\name{gsa.read.squidmetrics} +\alias{gsa.read.squidmetrics} +\title{ +gsa.read.squidmetrics +} +\description{ +Reads metrics for a specified SQUID project into a dataframe. +} +\usage{ +gsa.read.squidmetrics("C315") +} +\arguments{ + \item{project}{ +The project for which metrics should be obtained. +} + \item{bylane}{ +If TRUE, obtains per-lane metrics rather than the default per-sample metrics. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +Returns a data frame with samples (or lanes) as the row and the metric as the column. +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +This method will only work within the Broad Institute internal network. +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Obtain metrics for project C315. +d = gsa.read.squidmetrics("C315"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.read.vcf.Rd b/public/R/src/gsalib/man/gsa.read.vcf.Rd new file mode 100644 index 000000000..cffd35e8f --- /dev/null +++ b/public/R/src/gsalib/man/gsa.read.vcf.Rd @@ -0,0 +1,53 @@ +\name{gsa.read.vcf} +\alias{gsa.read.vcf} +\title{ +gsa.read.vcf +} +\description{ +Reads a VCF file into a table. Optionally expands genotype columns into separate columns containing the genotype, separate from the other fields specified in the FORMAT field. +} +\usage{ +gsa.read.vcf(vcffile, skip=0, nrows=-1, expandGenotypeFields = FALSE) +} +\arguments{ + \item{vcffile}{ +The path to the vcf file. +} + \item{skip}{ +The number of lines of the data file to skip before beginning to read data. +} + \item{nrows}{ +The maximum number of rows to read in. Negative and other invalid values are ignored. +} + \item{expandGenotypeFields}{ +If TRUE, adds an additional column per sample containing just the genotype. +} +} +\details{ +The VCF format is the standard variant call file format used in the GATK. This function reads that data in as a table for easy analysis. +} +\value{ +Returns a data.frame object, where each column corresponds to the columns in the VCF file. +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +vcf = gsa.read.vcf("/path/to/my/output.vcf"); +} +\keyword{ ~kwd1 } diff --git a/public/R/src/gsalib/man/gsa.warn.Rd b/public/R/src/gsalib/man/gsa.warn.Rd new file mode 100644 index 000000000..0b9770b5c --- /dev/null +++ b/public/R/src/gsalib/man/gsa.warn.Rd @@ -0,0 +1,46 @@ +\name{gsa.warn} +\alias{gsa.warn} +\title{ +GSA warn +} +\description{ +Write a warning message to standard out with the prefix '[gsalib] Warning:'. +} +\usage{ +gsa.warn(message) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{message}{ +The warning message to write. +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +Kiran Garimella +} +\note{ +%% ~~further notes~~ +} + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +## Write message to stdout +gsa.warn("This is a warning message"); +} +\keyword{ ~kwd1 } +\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line diff --git a/public/R/src/gsalib/man/gsalib-package.Rd b/public/R/src/gsalib/man/gsalib-package.Rd new file mode 100644 index 000000000..2b8d6db9f --- /dev/null +++ b/public/R/src/gsalib/man/gsalib-package.Rd @@ -0,0 +1,68 @@ +\name{gsalib-package} +\alias{gsalib-package} +\alias{gsalib} +\docType{package} +\title{ +GATK utility analysis functions +} +\description{ +Utility functions for analyzing GATK-processed NGS data +} +\details{ +This package contains functions for working with GATK-processed NGS data. These functions include a command-line parser that also allows a script to be used in interactive mode (good for developing scripts that will eventually be automated), a proportional Venn diagram generator, convenience methods for parsing VariantEval output, and more. +} +\author{ +Genome Sequencing and Analysis Group + +Medical and Population Genetics Program + +Maintainer: Kiran Garimella +} +\references{ +GSA wiki page: http://www.broadinstitute.org/gsa/wiki + +GATK help forum: http://www.getsatisfaction.com/gsa +} +\examples{ +## get script arguments in interactive and non-interactive mode +cmdargs = gsa.getargs( list( + requiredArg1 = list( + value = NA, + doc = "Documentation for requiredArg1" + ), + + optionalArg1 = list( + value = 3e9, + doc = "Documentation for optionalArg1" + ) +) ); + +## plot a proportional Venn diagram +gsa.plot.venn(500, 250, 0, 100); + +## read a GATKReport file +report = gsa.gatk.report("/path/to/my/output.gatkreport"); + +## emit a message +gsa.message("This is a message"); + +## emit a warning message +gsa.message("This is a warning message"); + +## emit an error message +gsa.message("This is an error message"); + +## read the SQUID metrics for a given sequencing project (internal to the Broad only) +s = gsa.read.squidmetrics("C427"); + +## read command-line arguments +cmdargs = gsa.getargs( + list( + file = list(value="/my/test.vcf", doc="VCF file"), + verbose = list(value=0, doc="If 1, set verbose mode"), + test2 = list(value=2.3e9, doc="Another argument that does stuff") + ), + doc="My test program" +); +} +\keyword{ package } From 6ebd83478b45b478535159589ccfa77df634f356 Mon Sep 17 00:00:00 2001 From: Kiran V Garimella Date: Wed, 27 Jul 2011 12:37:00 -0400 Subject: [PATCH 045/186] Fixed build.xml to reflect path changes for gsalib --- build.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.xml b/build.xml index a9348eae3..37fe3c289 100644 --- a/build.xml +++ b/build.xml @@ -1057,7 +1057,7 @@ - + From 60db6cc83621818c61ab4d638d4e51235cd847a9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 27 Jul 2011 12:39:12 -0400 Subject: [PATCH 046/186] Warnings for old ROD system use. Removed unused class GATKRODFeature --- .../sting/gatk/CommandLineExecutable.java | 17 ++++++++ .../sting/gatk/refdata/utils/GATKFeature.java | 40 ------------------- 2 files changed, 17 insertions(+), 40 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index ec3c96d83..035ce1cbc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk; +import org.apache.log4j.Logger; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.filters.ReadFilter; @@ -66,6 +67,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram { */ private final Collection argumentSources = new ArrayList(); + protected static Logger logger = Logger.getLogger(CommandLineExecutable.class); + /** * this is the function that the inheriting class can expect to have called * when the command line system has initialized. @@ -98,6 +101,20 @@ public abstract class CommandLineExecutable extends CommandLineProgram { argumentSources.add(walker); Collection newStyle = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser); + + // todo: remove me when the old style system is removed + if ( getArgumentCollection().RODBindings.size() > 0 ) { + logger.warn("################################################################################"); + logger.warn("################################################################################"); + logger.warn("Deprecated -B rod binding syntax detected. This syntax will be retired in GATK 1.2."); + logger.warn("Please use arguments defined by each specific walker instead."); + for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) { + logger.warn(" -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags()); + } + logger.warn("################################################################################"); + logger.warn("################################################################################"); + } + Collection oldStyle = ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings, getArgumentCollection().DBSNPFile, parser); oldStyle.addAll(newStyle); engine.setReferenceMetaDataFiles(oldStyle); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java index 59e8471a3..ba00eb431 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java @@ -102,44 +102,4 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation { return feature; } } - - /** - * wrapping a old style rod into the new GATK feature style - */ - public static class RODGATKFeature extends GATKFeature { - - // our data - private ReferenceOrderedDatum datum; - - public RODGATKFeature(ReferenceOrderedDatum datum) { - super(datum.getName()); - this.datum = datum; - } - - @Override - public GenomeLoc getLocation() { - return datum.getLocation(); - } - - @Override - public Object getUnderlyingObject() { - return datum; - } - - @Override - public String getChr() { - return datum.getLocation().getContig(); - } - - @Override - public int getStart() { - return (int)datum.getLocation().getStart(); - } - - @Override - public int getEnd() { - return (int)datum.getLocation().getStop(); - } - } - } From 64aad67b5fe0388a31c6acc4e198beeffd5f122f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 27 Jul 2011 16:13:45 -0400 Subject: [PATCH 047/186] Fixing dbSNP adaptor for complex indels (wasn) --- .../sting/gatk/refdata/VariantContextAdaptors.java | 6 ++++-- .../sting/gatk/refdata/utils/helpers/DbSNPHelper.java | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 1d622e2c7..dedd2f26e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -122,14 +122,16 @@ public class VariantContextAdaptors { Map attributes = new HashMap(); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); - if ( DbSNPHelper.isDeletion(dbsnp) ) { + + boolean vcIsDeletion = DbSNPHelper.isDeletion(dbsnp) || DbSNPHelper.isComplexIndel(dbsnp); + if ( vcIsDeletion ) { int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; if ( index < 0 ) return null; // we weren't given enough reference context to create the VariantContext attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index])); } Collection genotypes = null; - VariantContext vc = new VariantContext(name, dbsnp.getChr(),dbsnp.getStart() - (DbSNPHelper.isDeletion(dbsnp) ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); + VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (vcIsDeletion ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); return vc; } else return null; // can't handle anything else diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java index 3201769e0..35b0f73c6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/helpers/DbSNPHelper.java @@ -117,7 +117,11 @@ public class DbSNPHelper { } public static boolean isIndel(DbSNPFeature feature) { - return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || feature.getVariantType().contains("in-del"); + return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || DbSNPHelper.isComplexIndel(feature); + } + + public static boolean isComplexIndel(DbSNPFeature feature) { + return feature.getVariantType().contains("in-del"); } public static boolean isHapmap(DbSNPFeature feature) { From f3ad4ec94b2d5f12fd96cb3f4a0583d32aee652b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 27 Jul 2011 22:06:23 -0400 Subject: [PATCH 049/186] Removed annoying FastaSequenceIndexBuilderProgressListener infrastructure that was just a boolean switch on whether to print progress or not. --- .../picard/reference/FastaSequenceIndexBuilder.java | 13 ++++++------- .../sting/gatk/GenomeAnalysisEngine.java | 3 --- .../datasources/reference/ReferenceDataSource.java | 13 ++----------- .../FastaSequenceIndexBuilderUnitTest.java | 10 ++++------ 4 files changed, 12 insertions(+), 27 deletions(-) diff --git a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java index 8825c3767..6c8fe1834 100644 --- a/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java +++ b/public/java/src/net/sf/picard/reference/FastaSequenceIndexBuilder.java @@ -25,7 +25,6 @@ package net.sf.picard.reference; -import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*; @@ -39,8 +38,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException; * Produces fai file with same output as samtools faidx */ public class FastaSequenceIndexBuilder { - public File fastaFile; - ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file + final public File fastaFile; + final boolean printProgress; // keep track of location in file long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file; @@ -55,10 +54,10 @@ public class FastaSequenceIndexBuilder { public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT } Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum? - public FastaSequenceIndexBuilder(File fastaFile, ReferenceDataSourceProgressListener progress) { - this.progress = progress; + public FastaSequenceIndexBuilder(File fastaFile, boolean printProgress) { this.fastaFile = fastaFile; fileLength = fastaFile.length(); + this.printProgress = printProgress; } /** @@ -252,8 +251,8 @@ public class FastaSequenceIndexBuilder { if (System.currentTimeMillis() - lastTimestamp > 10000) { int percentProgress = (int) (100*bytesRead/fileLength); - if (progress != null) - progress.percentProgress(percentProgress); + if (printProgress) + System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percentProgress)); lastTimestamp = System.currentTimeMillis(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index a414d24aa..c5b2a840c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -926,9 +926,6 @@ public class GenomeAnalysisEngine { GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) { RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType); - // try and make the tracks given their requests - // create of live instances of the tracks - List tracks = new ArrayList(); List dataSources = new ArrayList(); for (RMDTriplet fileDescriptor : referenceMetaDataFiles) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java index ef69a8e5f..c8c79bb14 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java @@ -41,7 +41,7 @@ import java.io.File; * Loads reference data from fasta file * Looks for fai and dict files, and tries to create them if they don't exist */ -public class ReferenceDataSource implements ReferenceDataSourceProgressListener { +public class ReferenceDataSource { private IndexedFastaSequenceFile index; /** our log, which we want to capture anything from this class */ @@ -75,7 +75,7 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener // get exclusive lock if (!indexLock.exclusiveLock()) throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile); - FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this); + FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true); FastaSequenceIndex sequenceIndex = faiBuilder.createIndex(); FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile); } @@ -194,13 +194,4 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener public IndexedFastaSequenceFile getReference() { return this.index; } - - /** - * Notify user of progress in creating fai file - * @param percent Percent of fasta file read as a percent - */ - public void percentProgress(int percent) { - System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percent)); - } - } diff --git a/public/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java b/public/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java index 27b76537f..cf0f9051e 100644 --- a/public/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java +++ b/public/java/test/net/sf/picard/reference/FastaSequenceIndexBuilderUnitTest.java @@ -27,7 +27,6 @@ package net.sf.picard.reference; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -40,7 +39,6 @@ import java.io.FileNotFoundException; public class FastaSequenceIndexBuilderUnitTest extends BaseTest { private FastaSequenceIndexBuilder builder; - private ReferenceDataSourceProgressListener progress; private File fastaFile; private FastaSequenceIndex controlIndex; @@ -58,7 +56,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest { logger.warn("Executing unixFileTest"); fastaFile = new File(validationDataLocation + "exampleFASTA.fasta"); - builder = new FastaSequenceIndexBuilder(fastaFile, progress); + builder = new FastaSequenceIndexBuilder(fastaFile, false); FastaSequenceIndex index = builder.createIndex(); controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0)); @@ -75,7 +73,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest { logger.warn("Executing windowsFileTest"); fastaFile = new File(validationDataLocation + "exampleFASTA-windows.fasta"); - builder = new FastaSequenceIndexBuilder(fastaFile, progress); + builder = new FastaSequenceIndexBuilder(fastaFile, false); FastaSequenceIndex index = builder.createIndex(); controlIndex.add(new FastaSequenceIndexEntry("chr2", 7, 29, 7, 9,0)); @@ -91,7 +89,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest { logger.warn("Executing combinedWindowsUnix"); fastaFile = new File(validationDataLocation + "exampleFASTA-combined.fasta"); - builder = new FastaSequenceIndexBuilder(fastaFile, progress); + builder = new FastaSequenceIndexBuilder(fastaFile, false); FastaSequenceIndex index = builder.createIndex(); controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 100000, 60, 61,0)); controlIndex.add(new FastaSequenceIndexEntry("chr2", 101680, 29, 7, 9,1)); @@ -108,7 +106,7 @@ public class FastaSequenceIndexBuilderUnitTest extends BaseTest { logger.warn("Executing threeVariableLengthContigs"); fastaFile = new File(validationDataLocation + "exampleFASTA-3contigs.fasta"); - builder = new FastaSequenceIndexBuilder(fastaFile, progress); + builder = new FastaSequenceIndexBuilder(fastaFile, false); FastaSequenceIndex index = builder.createIndex(); controlIndex.add(new FastaSequenceIndexEntry("chr1", 6, 17, 5, 6,0)); controlIndex.add(new FastaSequenceIndexEntry("chr2", 35, 21, 7, 8,1)); From 6230315ff2ae7e7a000d064d98b403d7ef6383ec Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 27 Jul 2011 22:51:21 -0400 Subject: [PATCH 050/186] Along with my half-written commit message from earlier, I also forgot to commit the integration test updates. This is what happens when you try to do things 30 seconds before you leave for the day. To finish up from before: complex events weren't being padded with the reference base as per the VCF spec. They are now. --- .../VariantContextIntegrationTest.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index a344817a0..ced2bf00b 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -19,14 +19,14 @@ public class VariantContextIntegrationTest extends WalkerTest { static HashMap expectations = new HashMap(); static { - expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92"); + expectations.put("-L 1:1-10000 --printPerLocus", "e9d96677a57bc3a10fb6d9ba942c19f0"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "8a1174d2b18b98e624abbe93e6af8fdd"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "3933f1fae5453c54c3f791a23de07599"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "c9cf2f01bf045a58dcc7649fd6ea2396"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "a103d856e8bc558c949c6e3f184e8913"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5f2265ac6c6d80d64dc6e69a05c1250b"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "06a3ae4c0afa23b429a9491ab7707f3c"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); } @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { // this really just tests that we are seeing the same number of objects over all of chr1 WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", 1, // just one output file - Arrays.asList("529f936aa6c303658b23caf4e527782f")); + Arrays.asList("2532234d2c934a5e14849655dd7b5f4f")); executeTest("testLargeScaleConversion", spec); } } From c83f9432ebb8455661d56092775a68cabb93baec Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 27 Jul 2011 23:25:52 -0400 Subject: [PATCH 051/186] Cleaned up RefMetaDataTracker Renamed many functions to more clearly state what they are actually doing Removed unnecessary / unused functionality, reducing interface complexity Updated all uses of this code in GATK Added generic, type-safe accessors to RefMetaDataTracker such as public List getValues(final String name, Class clazz) Added standard refMetaDataTracker accessors to RodBinding, so you can do everything you can for generic rods with the tracker directly with with the RodBinding --- .../sting/commandline/RodBinding.java | 25 ++- .../commandline/VariantContextRodBinding.java | 24 +++ .../datasources/providers/RodLocusView.java | 2 +- .../ReferenceDataSourceProgressListener.java | 30 ---- .../gatk/refdata/RefMetaDataTracker.java | 151 ++++++------------ .../sting/gatk/refdata/utils/GATKFeature.java | 2 + .../sting/gatk/walkers/PileupWalker.java | 4 +- .../sting/gatk/walkers/PrintRODsWalker.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 4 +- .../genomicannotator/GenomicAnnotation.java | 2 +- .../TranscriptToGenomicInfo.java | 2 +- .../beagle/BeagleOutputToVCFWalker.java | 6 +- .../coverage/CompareCallableLociWalker.java | 2 +- .../filters/VariantFiltrationWalker.java | 2 +- .../walkers/phasing/AnnotateMNPsWalker.java | 2 +- .../sting/gatk/walkers/qc/CountIntervals.java | 2 +- .../walkers/qc/RodSystemValidationWalker.java | 4 +- .../walkers/qc/ValidatingPileupWalker.java | 2 +- .../recalibration/CountCovariatesWalker.java | 2 +- .../validation/ValidationAmplicons.java | 9 +- .../varianteval/stratifications/Novelty.java | 2 +- .../variantutils/ValidateVariants.java | 4 +- .../walkers/variantutils/VariantsToVCF.java | 6 +- .../ReferenceOrderedViewUnitTest.java | 8 +- 24 files changed, 134 insertions(+), 165 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index 86b1be162..8ad92e2d1 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -25,8 +25,11 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.List; +import java.util.*; /** * @@ -50,8 +53,24 @@ public class RodBinding { return source; } - public List getAll(RefMetaDataTracker tracker) { - return tracker.getReferenceMetaData(variableName); + public List getValues(RefMetaDataTracker tracker) { + return tracker.getValues(variableName); + } + + public List getValues(RefMetaDataTracker tracker, Class clazz) { + return tracker.getValues(variableName, clazz); + } + + public T getFirstValue(RefMetaDataTracker tracker, Class clazz) { + return tracker.getFirstValue(variableName, clazz); + } + + public boolean hasValues(RefMetaDataTracker tracker) { + return tracker.hasValues(variableName); + } + + public List getValuesAsGATKFeatures(RefMetaDataTracker tracker) { + return tracker.getValuesAsGATKFeatures(variableName); } public Tags getTags() { diff --git a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java index 29b97d07b..a9ce824f8 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java @@ -43,4 +43,28 @@ public class VariantContextRodBinding extends RodBinding { public VariantContext getVariantContext(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc) { return tracker.getVariantContext(ref, variableName, loc); } + +// public Collection getAllVariantContexts(ReferenceContext ref) { +// } +// +// public Collection getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) { +// } +// +// public Collection getAllVariantContexts(ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { +// } +// +// public Collection getVariantContexts(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { +// } +// +// public Collection getVariantContexts(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { +// } +// +// public Collection getVariantContextsByPrefix(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { +// } +// +// public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) { +// } +// +// public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) { +// } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 39c632539..50c10c26e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -140,7 +140,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { private RefMetaDataTracker createTracker( Collection allTracksHere ) { RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size()); for ( RODRecordList track : allTracksHere ) { - if ( ! t.hasROD(track.getName()) ) + if ( ! t.hasValues(track.getName()) ) t.bind(track.getName(), track); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java deleted file mode 100644 index 8dace8fe4..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSourceProgressListener.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.datasources.reference; - -public interface ReferenceDataSourceProgressListener { - public void percentProgress(int percent); -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index d03b122e2..926158a36 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -38,61 +38,45 @@ public class RefMetaDataTracker { map = new HashMap(nBindings); } + /** + * No-assumption version of getValues(name, class). Returns Objects. + */ + public List getValues(final String name) { + return getValues(name, Object.class); + } + /** * get all the reference meta data associated with a track name. * @param name the name of the track we're looking for + * @param clazz the expected class of the elements bound to rod name * @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a * dbSNP RMD this will be a RodDbSNP, etc. * * Important: The list returned by this function is guaranteed not to be null, but may be empty! */ - public List getReferenceMetaData(final String name) { - RODRecordList list = getTrackDataByName(name, true); - List objects = new ArrayList(); - if (list == null) return objects; - for (GATKFeature feature : list) - objects.add(feature.getUnderlyingObject()); - return objects; - } + public List getValues(final String name, Class clazz) { + RODRecordList list = getTrackDataByName(name); - /** - * get all the reference meta data associated with a track name. - * @param name the name of the track we're looking for - * @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with - * the passed in parameter (false). - * @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a - * dbSNP rod this will be a RodDbSNP, etc. - * - * Important: The list returned by this function is guaranteed not to be null, but may be empty! - */ - public List getReferenceMetaData(final String name, boolean requireExactMatch) { - RODRecordList list = getTrackDataByName(name, requireExactMatch); - List objects = new ArrayList(); - if (list == null) return objects; - for (GATKFeature feature : list) - objects.add(feature.getUnderlyingObject()); - return objects; - } - - /** - * get all the GATK features associated with a specific track name - * @param name the name of the track we're looking for - * @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with - * the passed in parameter (false). - * @return a list of GATKFeatures for the target rmd - * - * Important: The list returned by this function is guaranteed not to be null, but may be empty! - */ - public List getGATKFeatureMetaData(final String name, boolean requireExactMatch) { - List feat = getTrackDataByName(name,requireExactMatch); - return (feat == null) ? new ArrayList() : feat; // to satisfy the above requirement that we don't return null + if (list == null) + return Collections.emptyList(); + else { + List objects = new ArrayList(); + for (GATKFeature feature : list) { + final Object obj = feature.getUnderlyingObject(); + if (!(clazz.isAssignableFrom(obj.getClass()))) + throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString() + + " it's of type " + obj.getClass()); + objects.add((T)obj); + } + return objects; + } } /** * get a singleton record, given the name and a type. This function will return the first record at the current position seen, * and emit a logger warning if there were more than one option. * - * WARNING: this method is deprecated, since we now suppport more than one RMD at a single position for all tracks. If there are + * WARNING: we now suppport more than one RMD at a single position for all tracks. If there are * are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets * picked may change from time to time! BE WARNED! * @@ -101,22 +85,18 @@ public class RefMetaDataTracker { * @param the type to parameterize on, matching the clazz argument * @return a record of type T, or null if no record is present. */ - @Deprecated - public T lookup(final String name, Class clazz) { - RODRecordList objects = getTrackDataByName(name, true); + public T getFirstValue(final String name, Class clazz) { + RODRecordList objects = getTrackDataByName(name); - // if emtpy or null return null; + // if empty or null return null; if (objects == null || objects.size() < 1) return null; - if (objects.size() > 1) - logger.info("lookup is choosing the first record from " + (objects.size() - 1) + " options"); - Object obj = objects.get(0).getUnderlyingObject(); if (!(clazz.isAssignableFrom(obj.getClass()))) throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString() + " it's of type " + obj.getClass()); - - return (T)obj; + else + return (T)obj; } /** @@ -125,7 +105,7 @@ public class RefMetaDataTracker { * @param name the name of the rod * @return true if it has the rod */ - public boolean hasROD(final String name) { + public boolean hasValues(final String name) { return map.containsKey(canonicalName(name)); } @@ -136,14 +116,25 @@ public class RefMetaDataTracker { * * @return collection of all rods */ - public Collection getAllRods() { + public Collection getAllValuesAsGATKFeatures() { List l = new ArrayList(); for ( RODRecordList rl : map.values() ) { - if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether? - l.addAll(rl); + if ( rl != null ) + l.addAll(rl); } return l; + } + /** + * get all the GATK features associated with a specific track name + * @param name the name of the track we're looking for + * @return a list of GATKFeatures for the target rmd + * + * Important: The list returned by this function is guaranteed not to be null, but may be empty! + */ + public List getValuesAsGATKFeatures(final String name) { + List feat = getTrackDataByName(name); + return (feat == null) ? new ArrayList() : feat; // to satisfy the above requirement that we don't return null } /** @@ -163,23 +154,16 @@ public class RefMetaDataTracker { } /** - * @return the number of ROD bindings (name -> value) where value is not empty in this tracker + * The number of tracks with at least one value bound here + * @return */ - public int getNBoundRodTracks() { - return getNBoundRodTracks(null); - } - - public int getNBoundRodTracks(final String excludeIn ) { - final String exclude = excludeIn == null ? null : canonicalName(excludeIn); - + public int getNumberOfTracksWithValue() { int n = 0; for ( RODRecordList value : map.values() ) { if ( value != null && ! value.isEmpty() ) { - if ( exclude == null || ! value.getName().equals(exclude) ) - n++; + n++; } } - return n; } @@ -276,20 +260,7 @@ public class RefMetaDataTracker { Collection contexts = new ArrayList(); for ( String name : names ) { - RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match - - if ( rodList != null ) - addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); - } - - return contexts; - } - - public Collection getVariantContextsByPrefix(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - Collection contexts = new ArrayList(); - - for ( String name : names ) { - RODRecordList rodList = getTrackDataByName(name,false); // require that the name is an exact match + RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match if ( rodList != null ) addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); @@ -332,7 +303,6 @@ public class RefMetaDataTracker { return getVariantContext(ref, name, null, curLocation, true); } - private void addVariantContexts(Collection contexts, RODRecordList rodList, ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { for ( GATKFeature rec : rodList ) { if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) { @@ -367,29 +337,11 @@ public class RefMetaDataTracker { * for instance, on locus traversal, location is usually expected to be a single base we are currently looking at, * regardless of the presence of "extended" RODs overlapping with that location). * @param name track name - * @param requireExactMatch do we require an exact match of the rod name? * @return track data for the given rod */ - private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) { - //logger.debug(String.format("Lookup %s%n", name)); - + private RODRecordList getTrackDataByName(final String name) { final String luName = canonicalName(name); - RODRecordList trackData = null; - - if ( requireExactMatch ) { - if ( map.containsKey(luName) ) - trackData = map.get(luName); - } else { - for ( Map.Entry datum : map.entrySet() ) { - final String rodName = datum.getKey(); - if ( datum.getValue() != null && rodName.startsWith(luName) ) { - if ( trackData == null ) trackData = new RODRecordListImpl(name); - //System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation()); - ((RODRecordListImpl)trackData).add(datum.getValue(), true); - } - } - } - return trackData; + return map.get(luName); } /** @@ -398,6 +350,7 @@ public class RefMetaDataTracker { * @return canonical name of the rod */ private final String canonicalName(final String name) { + // todo -- remove me after switch to RodBinding syntax return name.toLowerCase(); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java index ba00eb431..6f8c9680f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/GATKFeature.java @@ -57,6 +57,7 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation { public abstract GenomeLoc getLocation(); + // TODO: this should be a Feature public abstract Object getUnderlyingObject(); /** @@ -98,6 +99,7 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation { return feature.getEnd(); } + // TODO: this should be a Feature, actually public Object getUnderlyingObject() { return feature; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 5db4fb417..1484841b3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -112,14 +112,14 @@ public class PileupWalker extends LocusWalker implements TreeR */ private String getReferenceOrderedData( RefMetaDataTracker tracker ) { ArrayList rodStrings = new ArrayList(); - for ( GATKFeature datum : tracker.getAllRods() ) { + for ( GATKFeature datum : tracker.getAllValuesAsGATKFeatures() ) { if ( datum != null && datum.getUnderlyingObject() instanceof ReferenceOrderedDatum ) { rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it } } String rodString = Utils.join(", ", rodStrings); - DbSNPFeature dbsnp = tracker.lookup(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class); + DbSNPFeature dbsnp = tracker.getFirstValue(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class); if ( dbsnp != null) rodString += DbSNPHelper.toMediumString(dbsnp); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java index 158992a22..88fdd0f69 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java @@ -61,7 +61,7 @@ public class PrintRODsWalker extends RodWalker { if ( tracker == null ) return 0; - Iterator rods = tracker.getAllRods().iterator(); + Iterator rods = tracker.getAllValuesAsGATKFeatures().iterator(); while ( rods.hasNext() ) { Object rod = rods.next().getUnderlyingObject(); if (VariantContextAdaptors.canBeConvertedToVariantContext(rod) ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index f9b1563b4..3cf96d443 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -194,9 +194,9 @@ public class VariantAnnotatorEngine { String rsID = null; if (vc.isSNP()) - rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); else if (vc.isIndel()) - rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null ); // annotate dbsnp id if available and not already there if ( rsID != null && (!vc.hasID() || vc.getID().equals(VCFConstants.EMPTY_ID_FIELD)) ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java index 0e8360484..1dbc09b94 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotation.java @@ -146,7 +146,7 @@ public class GenomicAnnotation extends InfoFieldAnnotation { //iterate over each record that overlaps the current locus, and, if it passes certain filters, //add its values to the list of annotations for this locus. final Map annotations = new HashMap(); - for(final GATKFeature gatkFeature : tracker.getAllRods()) + for(final GATKFeature gatkFeature : tracker.getAllValuesAsGATKFeatures()) { final String name = gatkFeature.getName(); if( name.equals("variant") || name.equals("interval") ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java index 0bbfa51b4..dbf051ac0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java @@ -213,7 +213,7 @@ public class TranscriptToGenomicInfo extends RodWalker { if ( rods.size() == 0 ) return 0; - final List transcriptRODs = tracker.getReferenceMetaData(ROD_NAME); + final List transcriptRODs = tracker.getValues(ROD_NAME); //there may be multiple transcriptRODs that overlap this locus for ( Object transcriptRodObject : transcriptRODs ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 21c8ec430..0720e5a16 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -130,7 +130,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { vcfWriter.add(vc_input, ref.getBase()); return 1; } - List r2rods = tracker.getReferenceMetaData(R2_ROD_NAME); + List r2rods = tracker.getValues(R2_ROD_NAME); // ignore places where we don't have a variant if ( r2rods.size() == 0 ) @@ -138,7 +138,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { BeagleFeature beagleR2Feature = (BeagleFeature)r2rods.get(0); - List gProbsrods = tracker.getReferenceMetaData(PROBS_ROD_NAME); + List gProbsrods = tracker.getValues(PROBS_ROD_NAME); // ignore places where we don't have a variant if ( gProbsrods.size() == 0 ) @@ -146,7 +146,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { BeagleFeature beagleProbsFeature = (BeagleFeature)gProbsrods.get(0); - List gPhasedrods = tracker.getReferenceMetaData(PHASED_ROD_NAME); + List gPhasedrods = tracker.getValues(PHASED_ROD_NAME); // ignore places where we don't have a variant if ( gPhasedrods.size() == 0 ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java index 6b91b0198..79de45d16 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java @@ -92,7 +92,7 @@ public class CompareCallableLociWalker extends RodWalker bindings = tracker.getReferenceMetaData(track); + List bindings = tracker.getValues(track); if ( bindings.size() != 1 || ! (bindings.get(0) instanceof FullBEDFeature)) { throw new UserException.MalformedFile(String.format("%s track isn't a properly formated CallableBases object!", track)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 6c023573a..eb62190fb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -152,7 +152,7 @@ public class VariantFiltrationWalker extends RodWalker { Collection VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false); // is there a SNP mask present? - boolean hasMask = tracker.getReferenceMetaData("mask").size() > 0; + boolean hasMask = tracker.getValues("mask").size() > 0; if ( hasMask ) previousMaskPosition = ref.getLocus(); // multi-base masks will get triggered over all bases of the mask diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java index 9aa370d3f..22672b7a9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java @@ -172,7 +172,7 @@ public class AnnotateMNPsWalker extends RodWalker { } GenomeLoc stopLoc = locParser.createGenomeLoc(curLocus.getContig(), vcLoc.getStop()); - final List refSeqRODs = tracker.getReferenceMetaData(REFSEQ_ROD_NAME); + final List refSeqRODs = tracker.getValues(REFSEQ_ROD_NAME); for (Object refSeqObject : refSeqRODs) { AnnotatorInputTableFeature refSeqAnnotation = (AnnotatorInputTableFeature) refSeqObject; locusToRefSeqFeatures.putLocusFeatures(curLocus, refSeqAnnotation, stopLoc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java index 2bdd4558f..b4e13f879 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java @@ -36,7 +36,7 @@ public class CountIntervals extends RefWalker { return null; } - List checkIntervals = tracker.getGATKFeatureMetaData("check",false); + List checkIntervals = tracker.getValues("check"); return (long) checkIntervals.size(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java index 170630b77..5da5fefb8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java @@ -73,9 +73,9 @@ public class RodSystemValidationWalker extends RodWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { int ret = 0; - if (tracker != null && tracker.getAllRods().size() > 0) { + if (tracker != null && tracker.getAllValuesAsGATKFeatures().size() > 0) { out.print(context.getLocation() + DIVIDER); - Collection features = tracker.getAllRods(); + Collection features = tracker.getAllValuesAsGATKFeatures(); for (GATKFeature feat : features) out.print(feat.getName() + DIVIDER); out.println(";"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java index e1e6c4b69..0054354c7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java @@ -130,7 +130,7 @@ public class ValidatingPileupWalker extends LocusWalker { } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null || ! tracker.hasROD("ProbeIntervals")) { return null; } + if ( tracker == null || ! tracker.hasValues("ProbeIntervals")) { return null; } - GenomeLoc interval = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getLocation(); + TableFeature feature = tracker.getFirstValue("ProbeIntervals", TableFeature.class); + GenomeLoc interval = feature.getLocation(); //logger.debug(interval); if ( prevInterval == null || ! interval.equals(prevInterval) ) { // we're in a new interval, we should: @@ -129,8 +130,8 @@ public class ValidationAmplicons extends RodWalker { rawSequence = new StringBuilder(); sequenceInvalid = false; invReason = new LinkedList(); - logger.debug(Utils.join("\t",((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getAllValues())); - probeName = ((TableFeature) tracker.getReferenceMetaData("ProbeIntervals",true).get(0)).getValue(1); + logger.debug(Utils.join("\t",feature.getAllValues())); + probeName = feature.getValue(1); indelCounter = 0; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index a0973a088..1b302425d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -34,7 +34,7 @@ public class Novelty extends VariantStratifier implements StandardStratification if (tracker != null) { for (String knownName : knownNames) { - if (tracker.hasROD(knownName)) { + if (tracker.hasValues(knownName)) { EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); if (eval != null) { allowableTypes.add(eval.getType()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 044fc6533..5e779097a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -142,8 +142,8 @@ public class ValidateVariants extends RodWalker { // get the RS IDs Set rsIDs = null; - if ( tracker.hasROD(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { - List dbsnpList = tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME); + if ( tracker.hasValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { + List dbsnpList = tracker.getValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME); rsIDs = new HashSet(); for ( Object d : dbsnpList ) { if (d instanceof DbSNPFeature ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 37fd0d547..4f5a5652f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -85,7 +85,7 @@ public class VariantsToVCF extends RodWalker { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) return 0; - String rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + String rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); Collection contexts = getVariantContexts(tracker, ref); @@ -112,7 +112,7 @@ public class VariantsToVCF extends RodWalker { private Collection getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref) { // we need to special case the HapMap format because indels aren't handled correctly - List features = tracker.getReferenceMetaData(INPUT_ROD_NAME, true); + List features = tracker.getValues(INPUT_ROD_NAME); if ( features.size() > 0 && features.get(0) instanceof HapMapFeature ) { ArrayList hapmapVCs = new ArrayList(features.size()); for ( Object feature : features ) { @@ -217,7 +217,7 @@ public class VariantsToVCF extends RodWalker { samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(INPUT_ROD_NAME)); if ( samples.isEmpty() ) { - List rods = tracker.getReferenceMetaData(INPUT_ROD_NAME); + List rods = tracker.getValues(INPUT_ROD_NAME); if ( rods.size() == 0 ) throw new IllegalStateException("No rod data is present"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index cb156b682..21be24a85 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -70,7 +70,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10)); - Assert.assertEquals(tracker.getAllRods().size(), 0, "The tracker should not have produced any data"); + Assert.assertEquals(tracker.getAllValuesAsGATKFeatures().size(), 0, "The tracker should not have produced any data"); } /** @@ -88,7 +88,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); - TableFeature datum = tracker.lookup("tableTest",TableFeature.class); + TableFeature datum = tracker.getFirstValue("tableTest", TableFeature.class); Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect"); Assert.assertEquals(datum.get("COL2"),"D","datum parameter for COL2 is incorrect"); @@ -114,13 +114,13 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); - TableFeature datum1 = tracker.lookup("tableTest1",TableFeature.class); + TableFeature datum1 = tracker.getFirstValue("tableTest1", TableFeature.class); Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect"); Assert.assertEquals(datum1.get("COL2"),"D","datum1 parameter for COL2 is incorrect"); Assert.assertEquals(datum1.get("COL3"),"E","datum1 parameter for COL3 is incorrect"); - TableFeature datum2 = tracker.lookup("tableTest2", TableFeature.class); + TableFeature datum2 = tracker.getFirstValue("tableTest2", TableFeature.class); Assert.assertEquals(datum2.get("COL1"),"C","datum2 parameter for COL1 is incorrect"); Assert.assertEquals(datum2.get("COL2"),"D","datum2 parameter for COL2 is incorrect"); From f7a126722b0fca143386135ff4789390082e8bec Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 28 Jul 2011 00:16:34 -0400 Subject: [PATCH 052/186] Cleaned up VariantContext accessors in RefMetaDataTracker It's no longer possible to provided allowed types, as this was a very rarely used feature in the engine. These get methods have been removed and local uses replaced with tests directly in their code. This simplified the RefMetaDataTracker significantly VariantContextRodBinding now forwards along all of the RefMetaDataTracker methods, so it is possible to create a full equivalent VariantContextRodBinding now as a walker field variable. All walkers updated to the new RefMetaDataTracker function call style --- .../sting/commandline/RodBinding.java | 16 ++-- .../commandline/VariantContextRodBinding.java | 68 ++++++++------ .../gatk/refdata/RefMetaDataTracker.java | 90 +++++++++++-------- .../walkers/annotator/VariantAnnotator.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 4 +- .../genomicannotator/GenomicAnnotator.java | 2 +- .../beagle/BeagleOutputToVCFWalker.java | 4 +- .../beagle/ProduceBeagleInputWalker.java | 4 +- .../VariantsToBeagleUnphasedWalker.java | 2 +- .../filters/VariantFiltrationWalker.java | 2 +- ...elGenotypeLikelihoodsCalculationModel.java | 12 +-- ...NPGenotypeLikelihoodsCalculationModel.java | 2 +- .../walkers/genotyper/UGCallVariants.java | 2 +- .../walkers/phasing/AnnotateMNPsWalker.java | 2 +- .../phasing/MergeAndMatchHaplotypes.java | 4 +- .../gatk/walkers/phasing/MergeMNPsWalker.java | 2 +- ...ergeSegregatingAlternateAllelesWalker.java | 2 +- .../walkers/phasing/PhaseByTransmission.java | 2 +- .../phasing/ReadBackedPhasingWalker.java | 2 +- .../varianteval/stratifications/Novelty.java | 9 +- .../varianteval/util/VariantEvalUtils.java | 27 +----- .../ApplyRecalibration.java | 2 +- .../VariantDataManager.java | 2 +- .../VariantRecalibrator.java | 2 +- .../walkers/variantutils/CombineVariants.java | 2 +- .../variantutils/FilterLiftedVariants.java | 2 +- .../variantutils/LeftAlignVariants.java | 2 +- .../variantutils/LiftoverVariants.java | 2 +- .../variantutils/RandomlySplitVariants.java | 2 +- .../walkers/variantutils/SelectVariants.java | 6 +- .../variantutils/ValidateVariants.java | 2 +- .../walkers/variantutils/VariantsToVCF.java | 30 ++++--- 32 files changed, 163 insertions(+), 151 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index 8ad92e2d1..ec2117127 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -26,13 +26,13 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.utils.exceptions.UserException; -import java.util.*; +import java.util.List; /** + * A RodBinding representing a walker argument that gets bound to a ROD track. * + * There is no constraint on the type of the ROD bound. */ public class RodBinding { final String variableName; @@ -53,23 +53,23 @@ public class RodBinding { return source; } - public List getValues(RefMetaDataTracker tracker) { + public List getValues(final RefMetaDataTracker tracker) { return tracker.getValues(variableName); } - public List getValues(RefMetaDataTracker tracker, Class clazz) { + public List getValues(final RefMetaDataTracker tracker, final Class clazz) { return tracker.getValues(variableName, clazz); } - public T getFirstValue(RefMetaDataTracker tracker, Class clazz) { + public T getFirstValue(final RefMetaDataTracker tracker, final Class clazz) { return tracker.getFirstValue(variableName, clazz); } - public boolean hasValues(RefMetaDataTracker tracker) { + public boolean hasValues(final RefMetaDataTracker tracker) { return tracker.hasValues(variableName); } - public List getValuesAsGATKFeatures(RefMetaDataTracker tracker) { + public List getValuesAsGATKFeatures(final RefMetaDataTracker tracker) { return tracker.getValuesAsGATKFeatures(variableName); } diff --git a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java index a9ce824f8..f5e29986e 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java @@ -29,42 +29,54 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.File; -import java.util.List; +import java.util.Collection; /** - * + * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts */ public class VariantContextRodBinding extends RodBinding { + /** + * Create a new RodBinding specialized to provide VariantContexts. + * @param variableName the name of the field in the walker that we will bind the ROD track too + * @param sourceFile the data source from which we will read the VCs + * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type + */ protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { super(variableName, sourceFile, parser); } - public VariantContext getVariantContext(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc) { - return tracker.getVariantContext(ref, variableName, loc); + /** + * Forwarding method to identical tracker method + */ + public Collection getVariantContexts(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { + return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly); } -// public Collection getAllVariantContexts(ReferenceContext ref) { -// } -// -// public Collection getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) { -// } -// -// public Collection getAllVariantContexts(ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { -// } -// -// public Collection getVariantContexts(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { -// } -// -// public Collection getVariantContexts(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { -// } -// -// public Collection getVariantContextsByPrefix(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { -// } -// -// public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) { -// } -// -// public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) { -// } + /** + * Forwarding method to identical tracker method + * @param tracker + * @param ref + * @param curLocation + * @param requireStartHere + * @return + */ + public VariantContext getVariantContext(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final GenomeLoc curLocation, + final boolean requireStartHere ) { + return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere); + } + + /** + * Forwarding method to identical tracker method + */ + public VariantContext getVariantContext(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final GenomeLoc curLocation) { + return tracker.getVariantContext(ref, variableName, curLocation); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 926158a36..40cf4cbd4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -54,7 +54,7 @@ public class RefMetaDataTracker { * * Important: The list returned by this function is guaranteed not to be null, but may be empty! */ - public List getValues(final String name, Class clazz) { + public List getValues(final String name, final Class clazz) { RODRecordList list = getTrackDataByName(name); if (list == null) @@ -73,10 +73,10 @@ public class RefMetaDataTracker { } /** - * get a singleton record, given the name and a type. This function will return the first record at the current position seen, - * and emit a logger warning if there were more than one option. + * get a singleton record, given the name and a type. This function will return the first record at the + * current position seen. The object is cast into a type clazz, or thoses an error if this isn't possible. * - * WARNING: we now suppport more than one RMD at a single position for all tracks. If there are + * * WARNING: we now suppport more than one RMD at a single position for all tracks. If there are * are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets * picked may change from time to time! BE WARNED! * @@ -85,7 +85,7 @@ public class RefMetaDataTracker { * @param the type to parameterize on, matching the clazz argument * @return a record of type T, or null if no record is present. */ - public T getFirstValue(final String name, Class clazz) { + public T getFirstValue(final String name, final Class clazz) { RODRecordList objects = getTrackDataByName(name); // if empty or null return null; @@ -172,6 +172,8 @@ public class RefMetaDataTracker { * Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal * system to provide access to RMDs in a structured way to the walkers. * + * DO NOT USE THIS FUNCTION UNLESS YOU ARE THE GATK ENGINE + * * @param name the name of the track * @param rod the collection of RMD data */ @@ -180,6 +182,13 @@ public class RefMetaDataTracker { map.put(canonicalName(name), rod); } + // ------------------------------------------------------------------------------------------ + // + // + // VariantContext helpers + // + // + // ------------------------------------------------------------------------------------------ /** * Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number @@ -189,8 +198,8 @@ public class RefMetaDataTracker { * @param ref reference context * @return variant context */ - public Collection getAllVariantContexts(ReferenceContext ref) { - return getAllVariantContexts(ref, null, null, false, false); + public Collection getAllVariantContexts(final ReferenceContext ref) { + return getAllVariantContexts(ref, null, false, false); } /** @@ -199,8 +208,9 @@ public class RefMetaDataTracker { * @param curLocation * @return */ - public Collection getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) { - return getAllVariantContexts(ref, null, curLocation, true, false); + public Collection getAllVariantContexts(final ReferenceContext ref, + final GenomeLoc curLocation) { + return getAllVariantContexts(ref, curLocation, true, false); } /** @@ -215,17 +225,19 @@ public class RefMetaDataTracker { * The name of each VariantContext corresponds to the ROD name. * * @param ref reference context - * @param allowedTypes allowed types * @param curLocation location * @param requireStartHere do we require the rod to start at this location? * @param takeFirstOnly do we take the first rod only? * @return variant context */ - public Collection getAllVariantContexts(ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { + public Collection getAllVariantContexts(final ReferenceContext ref, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { List contexts = new ArrayList(); for ( RODRecordList rodList : getBoundRodTracks() ) { - addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly); + addVariantContexts(contexts, rodList, ref, curLocation, requireStartHere, takeFirstOnly); } return contexts; @@ -239,31 +251,30 @@ public class RefMetaDataTracker { * @param ref ReferenceContext to enable conversion to variant context * @param name name * @param curLocation location - * @param allowedTypes allowed types * @param requireStartHere do we require the rod to start at this location? * @param takeFirstOnly do we take the first rod only? * @return variant context */ -// public Collection getVariantContexts(String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { -// return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly); -// } - - public Collection getVariantContexts(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { - return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly); + public Collection getVariantContexts(final ReferenceContext ref, + final String name, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { + return getVariantContexts(ref, Arrays.asList(name), curLocation, requireStartHere, takeFirstOnly); } -// public Collection getVariantContexts(Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { -// return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly); -// } - - public Collection getVariantContexts(ReferenceContext ref, Collection names, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { + public Collection getVariantContexts(final ReferenceContext ref, + final Collection names, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { Collection contexts = new ArrayList(); for ( String name : names ) { RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match if ( rodList != null ) - addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); + addVariantContexts(contexts, rodList, ref, curLocation, requireStartHere, takeFirstOnly ); } return contexts; @@ -275,12 +286,14 @@ public class RefMetaDataTracker { * * @param name name * @param curLocation location - * @param allowedTypes allowed types * @param requireStartHere do we require the rod to start at this location? * @return variant context */ - public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) { - Collection contexts = getVariantContexts(ref, name, allowedTypes, curLocation, requireStartHere, false ); + public VariantContext getVariantContext(final ReferenceContext ref, + final String name, + final GenomeLoc curLocation, + final boolean requireStartHere ) { + Collection contexts = getVariantContexts(ref, name, curLocation, requireStartHere, false ); if ( contexts.size() > 1 ) throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation); @@ -299,24 +312,27 @@ public class RefMetaDataTracker { * @param curLocation * @return */ - public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) { - return getVariantContext(ref, name, null, curLocation, true); + public VariantContext getVariantContext(final ReferenceContext ref, + final String name, + final GenomeLoc curLocation) { + return getVariantContext(ref, name, curLocation, true); } - private void addVariantContexts(Collection contexts, RODRecordList rodList, ReferenceContext ref, EnumSet allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) { + private void addVariantContexts(final Collection contexts, + final RODRecordList rodList, + final ReferenceContext ref, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { for ( GATKFeature rec : rodList ) { if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) { // ok, we might actually be able to turn this record in a variant context - VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref); + final VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref); if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted continue; - // now, let's decide if we want to keep it - boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType()); - boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart(); - - if ( goodType && goodPos ) { // ok, we are going to keep this thing + if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing contexts.add(vc); if ( takeFirstOnly ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index acbeee3b2..207c4118d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -202,7 +202,7 @@ public class VariantAnnotator extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); if ( VCs.size() == 0 ) return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 3cf96d443..73249d343 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -203,7 +203,7 @@ public class VariantAnnotatorEngine { infoAnnotations.put(VariantContext.ID_KEY, rsID); } else { boolean overlapsComp = false; - for ( VariantContext comp : tracker.getVariantContexts(ref, dbSet.getKey(), null, ref.getLocus(), false, false) ) { + for ( VariantContext comp : tracker.getVariantContexts(ref, dbSet.getKey(), ref.getLocus(), false, false) ) { if ( !comp.isFiltered() ) { overlapsComp = true; break; @@ -216,7 +216,7 @@ public class VariantAnnotatorEngine { private void annotateExpressions(RefMetaDataTracker tracker, ReferenceContext ref, Map infoAnnotations) { for ( VAExpression expression : requestedExpressions ) { - Collection VCs = tracker.getVariantContexts(ref, expression.bindingName, null, ref.getLocus(), false, true); + Collection VCs = tracker.getVariantContexts(ref, expression.bindingName, ref.getLocus(), false, true); if ( VCs.size() == 0 ) continue; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java index b42310780..82d2af283 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java @@ -244,7 +244,7 @@ public class GenomicAnnotator extends RodWalker implements Tre return 0; Set results = new LinkedHashSet(); - for (VariantContext vc : tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false)) { + for (VariantContext vc : tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false)) { if ( (vc.isFiltered() && IGNORE_FILTERED_SITES) || (vc.isVariant() && !vc.isBiallelic()) ) { results.add(vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 0720e5a16..5499c99b0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -119,9 +119,9 @@ public class BeagleOutputToVCFWalker extends RodWalker { return 0; GenomeLoc loc = context.getLocation(); - VariantContext vc_input = tracker.getVariantContext(ref,INPUT_ROD_NAME, null, loc, true); + VariantContext vc_input = tracker.getVariantContext(ref,INPUT_ROD_NAME, loc, true); - VariantContext vc_comp = tracker.getVariantContext(ref,COMP_ROD_NAME, null, loc, true); + VariantContext vc_comp = tracker.getVariantContext(ref,COMP_ROD_NAME, loc, true); if ( vc_input == null ) return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 3eed12992..b20b44cd6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -121,8 +121,8 @@ public class ProduceBeagleInputWalker extends RodWalker { public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext variant_eval = tracker.getVariantContext(ref, ROD_NAME, null, loc, true); - VariantContext validation_eval = tracker.getVariantContext(ref,VALIDATION_ROD_NAME,null,loc, true); + VariantContext variant_eval = tracker.getVariantContext(ref, ROD_NAME, loc, true); + VariantContext validation_eval = tracker.getVariantContext(ref,VALIDATION_ROD_NAME, loc, true); if ( goodSite(variant_eval,validation_eval) ) { if ( useValidation(validation_eval, ref) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index f6cd1d636..41b35321e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -102,7 +102,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext vc = tracker.getVariantContext(ref, ROD_NAME, null, loc, true); + VariantContext vc = tracker.getVariantContext(ref, ROD_NAME, loc, true); if ( ProduceBeagleInputWalker.canBeOutputToBeagle(vc) ) { // do we want to hold back this site? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index eb62190fb..2e96fc172 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -149,7 +149,7 @@ public class VariantFiltrationWalker extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation(), true, false); // is there a SNP mask present? boolean hasMask = tracker.getValues("mask").size() > 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 60ea601d5..c76fea34f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -293,6 +293,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood return aList; } + + private final static EnumSet allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED); + public Allele getLikelihoods(RefMetaDataTracker tracker, ReferenceContext ref, Map contexts, @@ -318,11 +321,10 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood haplotypeMap.clear(); if (getAlleleListFromVCF) { - EnumSet allowableTypes = EnumSet.of(VariantContext.Type.INDEL); - allowableTypes.add(VariantContext.Type.MIXED); - for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", - allowableTypes, ref.getLocus(), false, false) ) { - if( vc_input != null && ref.getLocus().getStart() == vc_input.getStart()) { + for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", ref.getLocus(), false, false) ) { + if( vc_input != null && + allowableTypes.contains(vc_input.getType()) && + ref.getLocus().getStart() == vc_input.getStart()) { vc = vc_input; break; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 3e3cd128b..30a86249d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -63,7 +63,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC VariantContext vc = null; // search for usable record - for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", null, ref.getLocus(), true, false) ) { + for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", ref.getLocus(), true, false) ) { if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) { if ( vc == null ) { vc = vc_input; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 68d8f9b54..6e03088e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -96,7 +96,7 @@ public class UGCallVariants extends RodWalker { List VCs = new ArrayList(); for ( String name : trackNames ) { - Collection vc = tracker.getVariantContexts(ref, name, null, context.getLocation(), true, true); + Collection vc = tracker.getVariantContexts(ref, name, context.getLocation(), true, true); VCs.addAll(vc); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java index 22672b7a9..263dffb6c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/AnnotateMNPsWalker.java @@ -155,7 +155,7 @@ public class AnnotateMNPsWalker extends RodWalker { boolean requireStartHere = false; // see EVERY site of the MNP boolean takeFirstOnly = false; // take as many entries as the VCF file has - for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) { + for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, context.getLocation(), requireStartHere, takeFirstOnly)) { GenomeLoc vcLoc = VariantContextUtils.getLocation(locParser, vc); boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart(); boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java index 298d8d6c8..a4b49b8ff 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java @@ -44,8 +44,8 @@ public class MergeAndMatchHaplotypes extends RodWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker != null) { - Collection pbts = tracker.getVariantContexts(ref, "pbt", null, ref.getLocus(), true, true); - Collection rbps = tracker.getVariantContexts(ref, "rbp", null, ref.getLocus(), true, true); + Collection pbts = tracker.getVariantContexts(ref, "pbt", ref.getLocus(), true, true); + Collection rbps = tracker.getVariantContexts(ref, "rbp", ref.getLocus(), true, true); VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null; VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java index 5bd438605..d36da9835 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java @@ -103,7 +103,7 @@ public class MergeMNPsWalker extends RodWalker { boolean requireStartHere = true; // only see each VariantContext once boolean takeFirstOnly = false; // take as many entries as the VCF file has - for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, null, context.getLocation(), requireStartHere, takeFirstOnly)) + for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, context.getLocation(), requireStartHere, takeFirstOnly)) writeVCF(vc); return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java index be15d4541..73d7fa529 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java @@ -140,7 +140,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker != null) { - Collection vcs = tracker.getVariantContexts(ref, ROD_NAME, null, context.getLocation(), true, true); + Collection vcs = tracker.getVariantContexts(ref, ROD_NAME, context.getLocation(), true, true); for (VariantContext vc : vcs) { Map genotypeMap = vc.getGenotypes(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 9702fd18c..f96cec1b7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -209,7 +209,7 @@ public class ReadBackedPhasingWalker extends RodWalker knownComps = tracker.getVariantContexts(ref, knownName, allowableTypes, ref.getLocus(), true, true); - - isNovel = knownComps.size() == 0; + Collection knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, true); + for ( VariantContext c : knownComps ) + if ( allowableTypes.contains(c.getType()) ) { + isNovel = false; + break; + } break; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 0a915db37..7beb94593 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -270,30 +270,7 @@ public class VariantEvalUtils { Set compNames, Set evalNames, boolean dynamicSelectTypes ) { - if ( dynamicSelectTypes ) { // todo -- this code is really conceptually broken - EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); - - if (tracker != null) { - Collection evalvcs = tracker.getVariantContexts(ref, evalNames, null, ref.getLocus(), true, false); - - for (VariantContext vc : evalvcs) { - allowableTypes.add(vc.getType()); - } - - if (allowableTypes.size() == 1) { - // We didn't find any variation in the eval track, so now let's look at the comp track for allowable types - Collection compvcs = tracker.getVariantContexts(ref, compNames, null, ref.getLocus(), true, false); - - for (VariantContext vc : compvcs) { - allowableTypes.add(vc.getType()); - } - } - } - - return allowableTypes; - } else { - return EnumSet.allOf(VariantContext.Type.class); - } + return EnumSet.allOf(VariantContext.Type.class); } /** @@ -359,7 +336,7 @@ public class VariantEvalUtils { for (String trackName : trackNames) { HashMap vcs = new HashMap(); - Collection contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, allowableTypes, ref.getLocus(), true, true); + Collection contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, ref.getLocus(), true, true); VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null; // First, filter the VariantContext to represent only the samples for evaluation diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index b195fd35f..5fb738944 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -168,7 +168,7 @@ public class ApplyRecalibration extends RodWalker { return 1; } - for( VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) { + for( VariantContext vc : tracker.getVariantContexts(ref, inputNames, context.getLocation(), true, false) ) { if( vc != null ) { if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { String filterString = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 67d54a408..c76efc2dd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -258,7 +258,7 @@ public class VariantDataManager { datum.consensusCount = 0; for( final TrainingSet trainingSet : trainingSets ) { - for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, false ) ) { + for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, context.getLocation(), false, false ) ) { if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 76c888640..7e1d931c9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -163,7 +163,7 @@ public class VariantRecalibrator extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getAllVariantContexts(ref, null, context.getLocation(), true, false); + Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation(), true, false); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index b45ee1b67..97cb9a6e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -85,7 +85,7 @@ public class FilterLiftedVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); for ( VariantContext vc : VCs ) filterAndWrite(ref.getBases(), vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 2ebd183f4..566333d3f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -68,7 +68,7 @@ public class LeftAlignVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); int changedSites = 0; for ( VariantContext vc : VCs ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 4f05c8aac..061c3b256 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -143,7 +143,7 @@ public class LiftoverVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); for ( VariantContext vc : VCs ) convertAndWrite(vc, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index f0756d884..99e12c836 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -97,7 +97,7 @@ public class RandomlySplitVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false); + Collection vcs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation(), true, false); for ( VariantContext vc : vcs ) { int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000); if ( random < iFraction ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index e1a3659b8..0efed393a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -317,7 +317,7 @@ public class SelectVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getVariantContexts(ref, variantRodName, null, context.getLocation(), true, false); + Collection vcs = tracker.getVariantContexts(ref, variantRodName, context.getLocation(), true, false); if ( vcs == null || vcs.size() == 0) { return 0; @@ -345,12 +345,12 @@ public class SelectVariants extends RodWalker { break; } if (DISCORDANCE_ONLY) { - Collection compVCs = tracker.getVariantContexts(ref, discordanceRodName, null, context.getLocation(), true, false); + Collection compVCs = tracker.getVariantContexts(ref, discordanceRodName, context.getLocation(), true, false); if (!isDiscordant(vc, compVCs)) return 0; } if (CONCORDANCE_ONLY) { - Collection compVCs = tracker.getVariantContexts(ref, concordanceRodName, null, context.getLocation(), true, false); + Collection compVCs = tracker.getVariantContexts(ref, concordanceRodName, context.getLocation(), true, false); if (!isConcordant(vc, compVCs)) return 0; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 5e779097a..756a00731 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -86,7 +86,7 @@ public class ValidateVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", null, context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); for ( VariantContext vc : VCs ) validate(vc, tracker, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 4f5a5652f..8f0fdd907 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -90,21 +90,23 @@ public class VariantsToVCF extends RodWalker { Collection contexts = getVariantContexts(tracker, ref); for ( VariantContext vc : contexts ) { - Map attrs = new HashMap(vc.getAttributes()); - if ( rsID != null && !vc.hasID() ) { - attrs.put(VariantContext.ID_KEY, rsID); - vc = VariantContext.modifyAttributes(vc, attrs); - } + if ( ALLOWED_VARIANT_CONTEXT_TYPES.contains(vc.getType()) ) { + Map attrs = new HashMap(vc.getAttributes()); + if ( rsID != null && !vc.hasID() ) { + attrs.put(VariantContext.ID_KEY, rsID); + vc = VariantContext.modifyAttributes(vc, attrs); + } - // set the appropriate sample name if necessary - if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(INPUT_ROD_NAME) ) { - Genotype g = Genotype.modifyName(vc.getGenotype(INPUT_ROD_NAME), sampleName); - Map genotypes = new HashMap(); - genotypes.put(sampleName, g); - vc = VariantContext.modifyGenotypes(vc, genotypes); - } + // set the appropriate sample name if necessary + if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(INPUT_ROD_NAME) ) { + Genotype g = Genotype.modifyName(vc.getGenotype(INPUT_ROD_NAME), sampleName); + Map genotypes = new HashMap(); + genotypes.put(sampleName, g); + vc = VariantContext.modifyGenotypes(vc, genotypes); + } - writeRecord(vc, tracker, ref.getBase()); + writeRecord(vc, tracker, ref.getBase()); + } } return 1; @@ -160,7 +162,7 @@ public class VariantsToVCF extends RodWalker { } // for everything else, we can just convert to VariantContext - return tracker.getVariantContexts(ref, INPUT_ROD_NAME, ALLOWED_VARIANT_CONTEXT_TYPES, ref.getLocus(), true, false); + return tracker.getVariantContexts(ref, INPUT_ROD_NAME, ref.getLocus(), true, false); } private DbSNPFeature getDbsnpFeature(String rsID) { From 1afc49a2973b35eab207e6d7e13707975742dace Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 28 Jul 2011 13:55:58 -0400 Subject: [PATCH 053/186] There are some really 'interesting' (but apparently valid) records in the Mus musculus dbSNP file. Generalized the handling of complex cases in the dbSNP adaptor to handle it all. I just grabbed the actual Mus musculus dbSNP file as a test, ran it whole genome, and confirmed that we finally produce a valid VCF on it. Should be the last commit needed on this adaptor. --- .../sting/gatk/refdata/VariantContextAdaptors.java | 11 +++++++---- .../variantcontext/VariantContextIntegrationTest.java | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index dedd2f26e..ba9a10d8b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -112,26 +112,29 @@ public class VariantContextAdaptors { alleles.add(refAllele); // add all of the alt alleles + boolean sawNullAllele = false; for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) { if ( ! Allele.acceptableAlleleBases(alt) ) { //System.out.printf("Excluding dbsnp record %s%n", dbsnp); return null; } - alleles.add(Allele.create(alt, false)); + Allele altAllele = Allele.create(alt, false); + alleles.add(altAllele); + if ( altAllele.isNull() ) + sawNullAllele = true; } Map attributes = new HashMap(); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); - boolean vcIsDeletion = DbSNPHelper.isDeletion(dbsnp) || DbSNPHelper.isComplexIndel(dbsnp); - if ( vcIsDeletion ) { + if ( sawNullAllele ) { int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; if ( index < 0 ) return null; // we weren't given enough reference context to create the VariantContext attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index])); } Collection genotypes = null; - VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (vcIsDeletion ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); + VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); return vc; } else return null; // can't handle anything else diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index ced2bf00b..6ed00f0ea 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { // this really just tests that we are seeing the same number of objects over all of chr1 WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", 1, // just one output file - Arrays.asList("2532234d2c934a5e14849655dd7b5f4f")); + Arrays.asList("045a5b02c86aeb9301dc0b724da0c8f7")); executeTest("testLargeScaleConversion", spec); } } From 7c5c656b46a56cd7d968c830cb0d412c74111322 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 28 Jul 2011 14:19:27 -0400 Subject: [PATCH 054/186] Uncovered fundamental accounting bug in VariantEval. Will be fixed by dev. team Problem is that Novelty sees multiple records at a site (SNP, INDEL) to calculate whether a site is novel, but VariantEvalWalker makes an arbitrary decision which to use for analysis and CompOverlap may not see a comp record of the same type as eval. So you get lines where the stratification is known but there are 10 novel sites! --- .../commandline/ArgumentTypeDescriptor.java | 5 +- .../sting/commandline/RodBinding.java | 22 +-- .../commandline/VariantContextRodBinding.java | 164 +++++++++--------- .../varianteval/evaluators/CompOverlap.java | 4 +- .../varianteval/stratifications/Novelty.java | 43 ++--- .../VariantEvalIntegrationTest.java | 10 +- 6 files changed, 114 insertions(+), 134 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 9b751cc3a..2e5cb4d62 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -299,8 +299,9 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); try { - Constructor ctor = type.getConstructor(String.class, String.class, ParsingEngine.class); - RodBinding result = (RodBinding)ctor.newInstance(source.field.getName(), value, parsingEngine); + // TODO: determine type of internal value via Parameter + Constructor ctor = type.getConstructor(Class.class, String.class, String.class, ParsingEngine.class); + RodBinding result = (RodBinding)ctor.newInstance(null, source.field.getName(), value, parsingEngine); Tags tags = getArgumentTags(matches); parsingEngine.addTags(result,tags); return result; diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index ec2117127..d7d086824 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.commandline; +import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; @@ -34,12 +35,14 @@ import java.util.List; * * There is no constraint on the type of the ROD bound. */ -public class RodBinding { +public class RodBinding { final String variableName; final String source; final ParsingEngine parser; + final Class type; - protected RodBinding(final String variableName, final String source, final ParsingEngine parser) { + protected RodBinding(Class type, final String variableName, final String source, final ParsingEngine parser) { + this.type = type; this.variableName = variableName; this.source = source; this.parser = parser; @@ -53,16 +56,16 @@ public class RodBinding { return source; } - public List getValues(final RefMetaDataTracker tracker) { - return tracker.getValues(variableName); + public List getValues(final RefMetaDataTracker tracker) { + return tracker.getValues(variableName, type); } - public List getValues(final RefMetaDataTracker tracker, final Class clazz) { - return tracker.getValues(variableName, clazz); - } +// public List getValues(final RefMetaDataTracker tracker, final Class clazz) { +// return tracker.getValues(variableName, clazz); +// } - public T getFirstValue(final RefMetaDataTracker tracker, final Class clazz) { - return tracker.getFirstValue(variableName, clazz); + public T getFirstValue(final RefMetaDataTracker tracker) { + return tracker.getFirstValue(variableName, type); } public boolean hasValues(final RefMetaDataTracker tracker) { @@ -80,5 +83,4 @@ public class RodBinding { public String toString() { return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource()); } - } diff --git a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java index f5e29986e..a01149cb0 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java @@ -1,82 +1,82 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.commandline; - -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Collection; - -/** - * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts - */ -public class VariantContextRodBinding extends RodBinding { - /** - * Create a new RodBinding specialized to provide VariantContexts. - * @param variableName the name of the field in the walker that we will bind the ROD track too - * @param sourceFile the data source from which we will read the VCs - * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type - */ - protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { - super(variableName, sourceFile, parser); - } - - /** - * Forwarding method to identical tracker method - */ - public Collection getVariantContexts(final RefMetaDataTracker tracker, - final ReferenceContext ref, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { - return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly); - } - - /** - * Forwarding method to identical tracker method - * @param tracker - * @param ref - * @param curLocation - * @param requireStartHere - * @return - */ - public VariantContext getVariantContext(final RefMetaDataTracker tracker, - final ReferenceContext ref, - final GenomeLoc curLocation, - final boolean requireStartHere ) { - return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere); - } - - /** - * Forwarding method to identical tracker method - */ - public VariantContext getVariantContext(final RefMetaDataTracker tracker, - final ReferenceContext ref, - final GenomeLoc curLocation) { - return tracker.getVariantContext(ref, variableName, curLocation); - } -} +///* +// * Copyright (c) 2011, The Broad Institute +// * +// * Permission is hereby granted, free of charge, to any person +// * obtaining a copy of this software and associated documentation +// * files (the "Software"), to deal in the Software without +// * restriction, including without limitation the rights to use, +// * copy, modify, merge, publish, distribute, sublicense, and/or sell +// * copies of the Software, and to permit persons to whom the +// * Software is furnished to do so, subject to the following +// * conditions: +// * +// * The above copyright notice and this permission notice shall be +// * included in all copies or substantial portions of the Software. +// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// * OTHER DEALINGS IN THE SOFTWARE. +// */ +// +//package org.broadinstitute.sting.commandline; +// +//import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +//import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +//import org.broadinstitute.sting.utils.GenomeLoc; +//import org.broadinstitute.sting.utils.variantcontext.VariantContext; +// +//import java.util.Collection; +// +///** +// * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts +// */ +//public class VariantContextRodBinding extends RodBinding { +// /** +// * Create a new RodBinding specialized to provide VariantContexts. +// * @param variableName the name of the field in the walker that we will bind the ROD track too +// * @param sourceFile the data source from which we will read the VCs +// * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type +// */ +// protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { +// super(variableName, sourceFile, parser); +// } +// +// /** +// * Forwarding method to identical tracker method +// */ +// public Collection getVariantContexts(final RefMetaDataTracker tracker, +// final ReferenceContext ref, +// final GenomeLoc curLocation, +// final boolean requireStartHere, +// final boolean takeFirstOnly ) { +// return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly); +// } +// +// /** +// * Forwarding method to identical tracker method +// * @param tracker +// * @param ref +// * @param curLocation +// * @param requireStartHere +// * @return +// */ +// public VariantContext getVariantContext(final RefMetaDataTracker tracker, +// final ReferenceContext ref, +// final GenomeLoc curLocation, +// final boolean requireStartHere ) { +// return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere); +// } +// +// /** +// * Forwarding method to identical tracker method +// */ +// public VariantContext getVariantContext(final RefMetaDataTracker tracker, +// final ReferenceContext ref, +// final GenomeLoc curLocation) { +// return tracker.getVariantContext(ref, variableName, curLocation); +// } +//} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 255a54737..2ea64c49c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -76,9 +76,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { boolean evalIsGood = eval != null && eval.isVariant(); - boolean expectingIndels = eval != null && eval.isIndel(); - - boolean compIsGood = expectingIndels ? comp != null && comp.isNotFiltered() && comp.isIndel() : comp != null && comp.isNotFiltered() && comp.isSNP() ; + boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType()); if (compIsGood) nCompVariants++; // count the number of comp events if (evalIsGood) nEvalVariants++; // count the number of eval events diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index c1611649f..5bdec837e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -5,24 +5,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatchExp; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.ArrayList; -import java.util.Collection; -import java.util.EnumSet; -import java.util.Set; +import java.util.*; public class Novelty extends VariantStratifier implements StandardStratification { // needs the variant contexts and known names private Set knownNames; - private ArrayList states; + final private ArrayList states = new ArrayList(Arrays.asList("all", "known", "novel")); + @Override public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames, Set contigNames) { this.knownNames = knownNames; - - states = new ArrayList(); - states.add("all"); - states.add("known"); - states.add("novel"); } public ArrayList getAllStates() { @@ -30,32 +23,18 @@ public class Novelty extends VariantStratifier implements StandardStratification } public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { - boolean isNovel = true; - - if (tracker != null) { - for (String knownName : knownNames) { - if (tracker.hasValues(knownName)) { - EnumSet allowableTypes = EnumSet.of(VariantContext.Type.NO_VARIATION); - if (eval != null) { - allowableTypes.add(eval.getType()); + if (tracker != null && eval != null) { + for (final String knownName : knownNames) { + final Collection knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, false); + for ( final VariantContext c : knownComps ) { + // loop over sites, looking for something that matches the type eval + if ( eval.getType() == c.getType() ) { + return new ArrayList(Arrays.asList("all", "known")); } - - Collection knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, true); - for ( VariantContext c : knownComps ) - if ( allowableTypes.contains(c.getType()) ) { - isNovel = false; - break; - } - - break; } } } - ArrayList relevantStates = new ArrayList(); - relevantStates.add("all"); - relevantStates.add(isNovel ? "novel" : "known"); - - return relevantStates; + return new ArrayList(Arrays.asList("all", "novel")); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 23c606ad0..38663ad42 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -249,7 +249,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000"; for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec")); + 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42")); executeTestParallel("testSelect1", spec); } } @@ -299,7 +299,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -D " + dbsnp + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -313,7 +313,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -330,13 +330,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("55a1c53bced20701c56accfc3eb782a7")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1")); executeTestParallel("testMultipleCompTracks",spec); } @Test public void testPerSampleAndSubsettedSampleHaveSameResults() { - String md5 = "454a1750fd36525f24172b21af5f49de"; + String md5 = "9d61f6e2c8592dcf616712a2c587b2af"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( From 39b4e76fde5e13453d3a66d126ebdfcd29bdaed1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 28 Jul 2011 17:48:28 -0400 Subject: [PATCH 055/186] Continuing refactoring of RefMetaDataTracker. On the path towards converging getVariantContext() and getValues() in tracker so that we can have a single approach to get values from RODs with the new RodBinding() types --- .../commandline/VariantContextRodBinding.java | 160 +++++++------ .../ManagingReferenceOrderedView.java | 5 +- .../providers/ReferenceOrderedView.java | 3 +- .../datasources/providers/RodLocusView.java | 41 ++-- .../gatk/refdata/RefMetaDataTracker.java | 213 +++++++++++------- .../sting/gatk/traversals/TraverseLoci.java | 6 +- .../walkers/annotator/VariantAnnotator.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 4 +- .../genomicannotator/GenomicAnnotator.java | 2 +- .../beagle/BeagleOutputToVCFWalker.java | 4 +- .../beagle/ProduceBeagleInputWalker.java | 4 +- .../VariantsToBeagleUnphasedWalker.java | 2 +- .../fasta/FastaAlternateReferenceWalker.java | 2 +- .../filters/VariantFiltrationWalker.java | 2 +- ...elGenotypeLikelihoodsCalculationModel.java | 2 +- ...NPGenotypeLikelihoodsCalculationModel.java | 2 +- .../walkers/genotyper/UGCallVariants.java | 2 +- .../indels/RealignerTargetCreator.java | 2 +- .../walkers/phasing/AnnotateMNPsWalker.java | 11 +- .../phasing/MergeAndMatchHaplotypes.java | 4 +- .../gatk/walkers/phasing/MergeMNPsWalker.java | 11 +- ...ergeSegregatingAlternateAllelesWalker.java | 11 +- .../walkers/phasing/PhaseByTransmission.java | 2 +- .../phasing/ReadBackedPhasingWalker.java | 11 +- .../walkers/qc/RodSystemValidationWalker.java | 2 +- .../validation/ValidationAmplicons.java | 4 +- .../varianteval/stratifications/Novelty.java | 2 +- .../varianteval/util/VariantEvalUtils.java | 2 +- .../ApplyRecalibration.java | 2 +- .../VariantDataManager.java | 2 +- .../VariantRecalibrator.java | 2 +- .../walkers/variantutils/CombineVariants.java | 3 +- .../variantutils/FilterLiftedVariants.java | 2 +- .../variantutils/LeftAlignVariants.java | 2 +- .../variantutils/LiftoverVariants.java | 2 +- .../variantutils/RandomlySplitVariants.java | 2 +- .../walkers/variantutils/SelectVariants.java | 15 +- .../variantutils/ValidateVariants.java | 2 +- .../VariantValidationAssessor.java | 2 +- .../walkers/variantutils/VariantsToTable.java | 2 +- .../VariantsToTableNewRodStyle.java | 2 +- .../walkers/variantutils/VariantsToVCF.java | 2 +- .../ReferenceOrderedViewUnitTest.java | 6 +- 43 files changed, 291 insertions(+), 275 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java index a01149cb0..a1bc05ef6 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java @@ -1,82 +1,78 @@ -///* -// * Copyright (c) 2011, The Broad Institute -// * -// * Permission is hereby granted, free of charge, to any person -// * obtaining a copy of this software and associated documentation -// * files (the "Software"), to deal in the Software without -// * restriction, including without limitation the rights to use, -// * copy, modify, merge, publish, distribute, sublicense, and/or sell -// * copies of the Software, and to permit persons to whom the -// * Software is furnished to do so, subject to the following -// * conditions: -// * -// * The above copyright notice and this permission notice shall be -// * included in all copies or substantial portions of the Software. -// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// * OTHER DEALINGS IN THE SOFTWARE. -// */ -// -//package org.broadinstitute.sting.commandline; -// -//import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -//import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -//import org.broadinstitute.sting.utils.GenomeLoc; -//import org.broadinstitute.sting.utils.variantcontext.VariantContext; -// -//import java.util.Collection; -// -///** -// * A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts -// */ -//public class VariantContextRodBinding extends RodBinding { -// /** -// * Create a new RodBinding specialized to provide VariantContexts. -// * @param variableName the name of the field in the walker that we will bind the ROD track too -// * @param sourceFile the data source from which we will read the VCs -// * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type -// */ -// protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { -// super(variableName, sourceFile, parser); -// } -// -// /** -// * Forwarding method to identical tracker method -// */ -// public Collection getVariantContexts(final RefMetaDataTracker tracker, -// final ReferenceContext ref, -// final GenomeLoc curLocation, -// final boolean requireStartHere, -// final boolean takeFirstOnly ) { -// return tracker.getVariantContexts(ref, variableName, curLocation, requireStartHere, takeFirstOnly); -// } -// -// /** -// * Forwarding method to identical tracker method -// * @param tracker -// * @param ref -// * @param curLocation -// * @param requireStartHere -// * @return -// */ -// public VariantContext getVariantContext(final RefMetaDataTracker tracker, -// final ReferenceContext ref, -// final GenomeLoc curLocation, -// final boolean requireStartHere ) { -// return tracker.getVariantContext(ref, variableName, curLocation, requireStartHere); -// } -// -// /** -// * Forwarding method to identical tracker method -// */ -// public VariantContext getVariantContext(final RefMetaDataTracker tracker, -// final ReferenceContext ref, -// final GenomeLoc curLocation) { -// return tracker.getVariantContext(ref, variableName, curLocation); -// } -//} +/* +* Copyright (c) 2011, The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +* OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.commandline; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.Collection; + +/** +* A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts +*/ +public class VariantContextRodBinding extends RodBinding { + /** + * Create a new RodBinding specialized to provide VariantContexts. + * @param variableName the name of the field in the walker that we will bind the ROD track too + * @param sourceFile the data source from which we will read the VCs + * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type + */ + protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { + super(VariantContext.class, variableName, sourceFile, parser); + } + + /** + * Forwarding method to identical tracker method + */ + public Collection getVariantContexts(final RefMetaDataTracker tracker, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { + return tracker.getVariantContexts(variableName, curLocation, requireStartHere, takeFirstOnly); + } + + /** + * Forwarding method to identical tracker method + * @param tracker + * @param curLocation + * @param requireStartHere + * @return + */ + public VariantContext getVariantContext(final RefMetaDataTracker tracker, + final GenomeLoc curLocation, + final boolean requireStartHere ) { + return tracker.getVariantContext(variableName, curLocation, requireStartHere); + } + + /** + * Forwarding method to identical tracker method + */ + public VariantContext getVariantContext(final RefMetaDataTracker tracker, + final GenomeLoc curLocation) { + return tracker.getVariantContext(variableName, curLocation); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index 223659a46..f75f358e1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; @@ -49,8 +50,8 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { * @param loc Locus at which to track. * @return A tracker containing information about this locus. */ - public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { - RefMetaDataTracker tracks = new RefMetaDataTracker(states.size()); + public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) { + RefMetaDataTracker tracks = new RefMetaDataTracker(states.size(), referenceContext); for ( ReferenceOrderedDataState state: states ) tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) ); return tracks; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java index 2d46a85ac..939cbfe35 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedView.java @@ -1,8 +1,9 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; public interface ReferenceOrderedView extends View { - RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ); + RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 50c10c26e..3db5bd19a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; @@ -45,7 +46,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { */ private RODMergingIterator rodQueue = null; - RefMetaDataTracker tracker = null; + Collection allTracksHere; + GenomeLoc lastLoc = null; RODRecordList interval = null; @@ -98,8 +100,17 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { //throw new StingException("RodLocusView currently disabled"); } - public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { - return tracker; + public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) { + RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size(), referenceContext); + for ( RODRecordList track : allTracksHere ) { + if ( ! t.hasValues(track.getName()) ) + t.bind(track.getName(), track); + } + + // special case the interval again -- add it into the ROD + if ( interval != null ) { t.bind(interval.getName(), interval); } + + return t; } public boolean hasNext() { @@ -122,10 +133,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n"); - // Update the tracker here for use - Collection allTracksHere = getSpanningTracks(datum); - tracker = createTracker(allTracksHere); - + allTracksHere = getSpanningTracks(datum); GenomeLoc rodSite = datum.getLocation(); GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart()); @@ -137,19 +145,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases); } - private RefMetaDataTracker createTracker( Collection allTracksHere ) { - RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size()); - for ( RODRecordList track : allTracksHere ) { - if ( ! t.hasValues(track.getName()) ) - t.bind(track.getName(), track); - } - - // special case the interval again -- add it into the ROD - if ( interval != null ) { t.bind(interval.getName(), interval); } - - return t; - } - private Collection getSpanningTracks(RODRecordList marker) { return rodQueue.allElementsLTE(marker); } @@ -197,10 +192,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { return getSkippedBases(getLocOneBeyondShard()); } - public RefMetaDataTracker getTracker() { - return tracker; - } - /** * Closes the current view. */ @@ -209,6 +200,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { state.dataSource.close( state.iterator ); rodQueue = null; - tracker = null; + allTracksHere = null; } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 40cf4cbd4..808ad5430 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -4,6 +4,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -18,7 +19,7 @@ import java.util.*; * The standard interaction model is: * * Traversal system arrives at a site, which has a bunch of RMDs covering it -Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs + Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs * Traversal passes tracker to the walker * walker calls lookup(name, default) to obtain the RMDs values at this site, or default if none was * bound at this site. @@ -29,15 +30,83 @@ Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs */ public class RefMetaDataTracker { final Map map; + final ReferenceContext ref; protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class); - public RefMetaDataTracker(int nBindings) { + public RefMetaDataTracker(int nBindings, ReferenceContext ref) { + this.ref = ref; if ( nBindings == 0 ) map = Collections.emptyMap(); else map = new HashMap(nBindings); } + + // ------------------------------------------------------------------------------------------ + // + // + // Special ENGINE interaction functions + // + // + // ------------------------------------------------------------------------------------------ + + /** + * Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal + * system to provide access to RMDs in a structured way to the walkers. + * + * DO NOT USE THIS FUNCTION UNLESS YOU ARE THE GATK ENGINE + * + * @param name the name of the track + * @param rod the collection of RMD data + */ + public void bind(final String name, RODRecordList rod) { + //logger.debug(String.format("Binding %s to %s", name, rod)); + map.put(canonicalName(name), maybeConvertToVariantContext(rod)); + } + + /** + * A private converter that transforms a RODRecordList of objects of type X into + * a list of VariantContexts, if possible. + * + * TODO: should be removed when Features like dbsnp and hapmap produce VCs directly + * + * @param bindings + * @return + */ + private final RODRecordList maybeConvertToVariantContext(RODRecordList bindings) { + List values = new ArrayList(bindings.size()); + + for ( GATKFeature rec : bindings ) { + if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) { + final VariantContext vc = VariantContextAdaptors.toVariantContext(bindings.getName(), rec.getUnderlyingObject(), ref); + if ( vc != null ) // it's possible that the conversion failed, but we continue along anyway + values.add(new GATKFeature.TribbleGATKFeature(ref.getGenomeLocParser(), vc, rec.getName())); + } + } + + return new RODRecordListImpl(bindings.getName(), values, bindings.getLocation()); + } + +// /** +// * Temporary setting for putting a reference context into the system. +// * +// * DO NOT USE THIS FUNCTION UNLESS YOU ARE THE GATK ENGINE +// * +// * @param ref +// */ +// public void setRef(final ReferenceContext ref) { +// this.ref = ref; +// } + + + // ------------------------------------------------------------------------------------------ + // + // + // Generic accessors + // + // + // ------------------------------------------------------------------------------------------ + /** * No-assumption version of getValues(name, class). Returns Objects. */ @@ -60,18 +129,11 @@ public class RefMetaDataTracker { if (list == null) return Collections.emptyList(); else { - List objects = new ArrayList(); - for (GATKFeature feature : list) { - final Object obj = feature.getUnderlyingObject(); - if (!(clazz.isAssignableFrom(obj.getClass()))) - throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString() - + " it's of type " + obj.getClass()); - objects.add((T)obj); - } - return objects; + return addValues(name, clazz, new ArrayList(), list, list.getLocation(), false, false); } } + /** * get a singleton record, given the name and a type. This function will return the first record at the * current position seen. The object is cast into a type clazz, or thoses an error if this isn't possible. @@ -79,7 +141,7 @@ public class RefMetaDataTracker { * * WARNING: we now suppport more than one RMD at a single position for all tracks. If there are * are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets * picked may change from time to time! BE WARNED! - * + * * @param name the name of the track * @param clazz the underlying type to return * @param the type to parameterize on, matching the clazz argument @@ -116,7 +178,7 @@ public class RefMetaDataTracker { * * @return collection of all rods */ - public Collection getAllValuesAsGATKFeatures() { + public List getAllValuesAsGATKFeatures() { List l = new ArrayList(); for ( RODRecordList rl : map.values() ) { if ( rl != null ) @@ -125,7 +187,7 @@ public class RefMetaDataTracker { return l; } - /** + /** * get all the GATK features associated with a specific track name * @param name the name of the track we're looking for * @return a list of GATKFeatures for the target rmd @@ -141,9 +203,9 @@ public class RefMetaDataTracker { * Get all of the RMD tracks at the current site. Each track is returned as a single compound * object (RODRecordList) that may contain multiple RMD records associated with the current site. * - * @return collection of all tracks + * @return List of all tracks */ - public Collection getBoundRodTracks() { + public List getBoundRodTracks() { LinkedList bound = new LinkedList(); for ( RODRecordList value : map.values() ) { @@ -167,21 +229,6 @@ public class RefMetaDataTracker { return n; } - - /** - * Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal - * system to provide access to RMDs in a structured way to the walkers. - * - * DO NOT USE THIS FUNCTION UNLESS YOU ARE THE GATK ENGINE - * - * @param name the name of the track - * @param rod the collection of RMD data - */ - public void bind(final String name, RODRecordList rod) { - //logger.debug(String.format("Binding %s to %s", name, rod)); - map.put(canonicalName(name), rod); - } - // ------------------------------------------------------------------------------------------ // // @@ -195,22 +242,20 @@ public class RefMetaDataTracker { * of entries per ROD. * The name of each VariantContext corresponds to the ROD name. * - * @param ref reference context * @return variant context */ - public Collection getAllVariantContexts(final ReferenceContext ref) { - return getAllVariantContexts(ref, null, false, false); + public List getAllVariantContexts() { + return getAllVariantContexts(null, false, false); } /** * Returns all of the variant contexts that start at the current location - * @param ref + * * @param curLocation * @return */ - public Collection getAllVariantContexts(final ReferenceContext ref, - final GenomeLoc curLocation) { - return getAllVariantContexts(ref, curLocation, true, false); + public List getAllVariantContexts(final GenomeLoc curLocation) { + return getAllVariantContexts(curLocation, true, false); } /** @@ -224,20 +269,19 @@ public class RefMetaDataTracker { * * The name of each VariantContext corresponds to the ROD name. * - * @param ref reference context + * * @param curLocation location * @param requireStartHere do we require the rod to start at this location? * @param takeFirstOnly do we take the first rod only? * @return variant context */ - public Collection getAllVariantContexts(final ReferenceContext ref, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { + public List getAllVariantContexts(final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly) { List contexts = new ArrayList(); for ( RODRecordList rodList : getBoundRodTracks() ) { - addVariantContexts(contexts, rodList, ref, curLocation, requireStartHere, takeFirstOnly); + addVariantContexts(contexts, rodList, curLocation, requireStartHere, takeFirstOnly); } return contexts; @@ -248,33 +292,31 @@ public class RefMetaDataTracker { * * see getVariantContexts for more information. * - * @param ref ReferenceContext to enable conversion to variant context + * * @param name name * @param curLocation location * @param requireStartHere do we require the rod to start at this location? * @param takeFirstOnly do we take the first rod only? * @return variant context */ - public Collection getVariantContexts(final ReferenceContext ref, - final String name, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { - return getVariantContexts(ref, Arrays.asList(name), curLocation, requireStartHere, takeFirstOnly); + public List getVariantContexts(final String name, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly) { + return getVariantContexts(Arrays.asList(name), curLocation, requireStartHere, takeFirstOnly); } - public Collection getVariantContexts(final ReferenceContext ref, - final Collection names, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { - Collection contexts = new ArrayList(); + public List getVariantContexts(final Collection names, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly) { + List contexts = new ArrayList(); for ( String name : names ) { RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match if ( rodList != null ) - addVariantContexts(contexts, rodList, ref, curLocation, requireStartHere, takeFirstOnly ); + addVariantContexts(contexts, rodList, curLocation, requireStartHere, takeFirstOnly ); } return contexts; @@ -284,16 +326,16 @@ public class RefMetaDataTracker { * Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not. * see getVariantContexts for more information. * + * * @param name name * @param curLocation location * @param requireStartHere do we require the rod to start at this location? * @return variant context */ - public VariantContext getVariantContext(final ReferenceContext ref, - final String name, + public VariantContext getVariantContext(final String name, final GenomeLoc curLocation, - final boolean requireStartHere ) { - Collection contexts = getVariantContexts(ref, name, curLocation, requireStartHere, false ); + final boolean requireStartHere) { + List contexts = getVariantContexts(name, curLocation, requireStartHere, false ); if ( contexts.size() > 1 ) throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation); @@ -306,46 +348,53 @@ public class RefMetaDataTracker { /** * Very simple accessor that gets the first (and only!) VC associated with name at the current location, or * null if there's no binding here. - * - * @param ref + * + * * @param name * @param curLocation * @return */ - public VariantContext getVariantContext(final ReferenceContext ref, - final String name, + public VariantContext getVariantContext(final String name, final GenomeLoc curLocation) { - return getVariantContext(ref, name, curLocation, true); + return getVariantContext(name, curLocation, true); } - private void addVariantContexts(final Collection contexts, + private void addVariantContexts(final List contexts, final RODRecordList rodList, - final ReferenceContext ref, final GenomeLoc curLocation, final boolean requireStartHere, final boolean takeFirstOnly ) { + addValues("xxx", VariantContext.class, contexts, rodList, curLocation, requireStartHere, takeFirstOnly); + } + + private static List addValues(final String name, + final Class type, + final List values, + final RODRecordList rodList, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { for ( GATKFeature rec : rodList ) { - if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) { - // ok, we might actually be able to turn this record in a variant context - final VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref); + if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing + Object obj = rec.getUnderlyingObject(); + if (!(type.isAssignableFrom(obj.getClass()))) + throw new UserException.CommandLineException("Unable to cast track named " + name + " to type of " + type.toString() + + " it's of type " + obj.getClass()); - if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted - continue; + values.add((T)obj); - if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing - contexts.add(vc); - - if ( takeFirstOnly ) - // we only want the first passing instance, so break the loop over records in rodList - break; - } + if ( takeFirstOnly ) + // we only want the first passing instance, so break the loop over records in rodList + break; } } + + return values; } /** * Finds the reference metadata track named 'name' and returns all ROD records from that track associated - * with the current site as a RODRecordList collection object. If no data track with specified name is available, + * with the current site as a RODRecordList List object. If no data track with specified name is available, * returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up * with track name set to 'name' and location set to null; otherwise the wrapper object will have name and * location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution, diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java index 232989fb0..08eb8f1d4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java @@ -65,13 +65,13 @@ public class TraverseLoci extends TraversalEngine,Locu referenceView.expandBoundsToAccomodateLoc(location); } - // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation()); - // create reference context. Note that if we have a pileup of "extended events", the context will // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). ReferenceContext refContext = referenceView.getReferenceContext(location); + // Iterate forward to get all reference ordered data covering this location + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); + final boolean keepMeP = walker.filter(tracker, refContext, locus); if (keepMeP) { M x = walker.map(tracker, refContext, locus); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 207c4118d..932317700 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -202,7 +202,7 @@ public class VariantAnnotator extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); if ( VCs.size() == 0 ) return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 73249d343..9dc0bbfe6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -203,7 +203,7 @@ public class VariantAnnotatorEngine { infoAnnotations.put(VariantContext.ID_KEY, rsID); } else { boolean overlapsComp = false; - for ( VariantContext comp : tracker.getVariantContexts(ref, dbSet.getKey(), ref.getLocus(), false, false) ) { + for ( VariantContext comp : tracker.getVariantContexts(dbSet.getKey(), ref.getLocus(), false, false) ) { if ( !comp.isFiltered() ) { overlapsComp = true; break; @@ -216,7 +216,7 @@ public class VariantAnnotatorEngine { private void annotateExpressions(RefMetaDataTracker tracker, ReferenceContext ref, Map infoAnnotations) { for ( VAExpression expression : requestedExpressions ) { - Collection VCs = tracker.getVariantContexts(ref, expression.bindingName, ref.getLocus(), false, true); + Collection VCs = tracker.getVariantContexts(expression.bindingName, ref.getLocus(), false, true); if ( VCs.size() == 0 ) continue; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java index 82d2af283..78057849c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java @@ -244,7 +244,7 @@ public class GenomicAnnotator extends RodWalker implements Tre return 0; Set results = new LinkedHashSet(); - for (VariantContext vc : tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false)) { + for (VariantContext vc : tracker.getVariantContexts("variant", context.getLocation(), true, false)) { if ( (vc.isFiltered() && IGNORE_FILTERED_SITES) || (vc.isVariant() && !vc.isBiallelic()) ) { results.add(vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 5499c99b0..19eafc872 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -119,9 +119,9 @@ public class BeagleOutputToVCFWalker extends RodWalker { return 0; GenomeLoc loc = context.getLocation(); - VariantContext vc_input = tracker.getVariantContext(ref,INPUT_ROD_NAME, loc, true); + VariantContext vc_input = tracker.getVariantContext(INPUT_ROD_NAME, loc, true); - VariantContext vc_comp = tracker.getVariantContext(ref,COMP_ROD_NAME, loc, true); + VariantContext vc_comp = tracker.getVariantContext(COMP_ROD_NAME, loc, true); if ( vc_input == null ) return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index b20b44cd6..353ebb82a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -121,8 +121,8 @@ public class ProduceBeagleInputWalker extends RodWalker { public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext variant_eval = tracker.getVariantContext(ref, ROD_NAME, loc, true); - VariantContext validation_eval = tracker.getVariantContext(ref,VALIDATION_ROD_NAME, loc, true); + VariantContext variant_eval = tracker.getVariantContext(ROD_NAME, loc, true); + VariantContext validation_eval = tracker.getVariantContext(VALIDATION_ROD_NAME, loc, true); if ( goodSite(variant_eval,validation_eval) ) { if ( useValidation(validation_eval, ref) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index 41b35321e..ee3dfb1df 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -102,7 +102,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext vc = tracker.getVariantContext(ref, ROD_NAME, loc, true); + VariantContext vc = tracker.getVariantContext(ROD_NAME, loc, true); if ( ProduceBeagleInputWalker.canBeOutputToBeagle(vc) ) { // do we want to hold back this site? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index efc101618..665ac539c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -57,7 +57,7 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { String refBase = String.valueOf((char)ref.getBase()); - Collection vcs = tracker.getAllVariantContexts(ref); + Collection vcs = tracker.getAllVariantContexts(); // Check to see if we have a called snp for ( VariantContext vc : vcs ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 2e96fc172..61991db2d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -149,7 +149,7 @@ public class VariantFiltrationWalker extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts(INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation(), true, false); // is there a SNP mask present? boolean hasMask = tracker.getValues("mask").size() > 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index c76fea34f..54bb888c8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -321,7 +321,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood haplotypeMap.clear(); if (getAlleleListFromVCF) { - for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", ref.getLocus(), false, false) ) { + for( final VariantContext vc_input : tracker.getVariantContexts("alleles", ref.getLocus(), false, false) ) { if( vc_input != null && allowableTypes.contains(vc_input.getType()) && ref.getLocus().getStart() == vc_input.getStart()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 30a86249d..4f784d37a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -63,7 +63,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC VariantContext vc = null; // search for usable record - for( final VariantContext vc_input : tracker.getVariantContexts(ref, "alleles", ref.getLocus(), true, false) ) { + for( final VariantContext vc_input : tracker.getVariantContexts("alleles", ref.getLocus(), true, false) ) { if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) { if ( vc == null ) { vc = vc_input; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 6e03088e7..1c5d55225 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -96,7 +96,7 @@ public class UGCallVariants extends RodWalker { List VCs = new ArrayList(); for ( String name : trackNames ) { - Collection vc = tracker.getVariantContexts(ref, name, context.getLocation(), true, true); + Collection vc = tracker.getVariantContexts(name, context.getLocation(), true, true); VCs.addAll(vc); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 488e37f26..38a1dcb8d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -110,7 +110,7 @@ public class RealignerTargetCreator extends RodWalker { @Argument(fullName = "emitOnlyMNPs", shortName = "emitOnlyMNPs", doc = "Only output MNP records; [default:false]", required = false) protected boolean emitOnlyMNPs = false; - private LinkedList rodNames = null; + private String rodName = "variant"; private GenomeLocParser locParser = null; private TreeMap> MNPstartToStops = null; // Must be TreeMap sorted by START sites! @@ -105,9 +105,6 @@ public class AnnotateMNPsWalker extends RodWalker { protected final static String REFSEQ_HAS_MULT_AA_CHANGES = "alleleHasMultAAchanges"; public void initialize() { - rodNames = new LinkedList(); - rodNames.add(VARIANT_ROD_NAME); - locParser = getToolkit().getGenomeLocParser(); MNPstartToStops = new TreeMap>(); // sorted by start sites @@ -125,8 +122,8 @@ public class AnnotateMNPsWalker extends RodWalker { hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); - Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames); - writer.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples()))); + Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(rodName)); + writer.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodName).getGenotypeSamples()))); } public boolean generateExtendedEvents() { @@ -155,7 +152,7 @@ public class AnnotateMNPsWalker extends RodWalker { boolean requireStartHere = false; // see EVERY site of the MNP boolean takeFirstOnly = false; // take as many entries as the VCF file has - for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, context.getLocation(), requireStartHere, takeFirstOnly)) { + for (VariantContext vc : tracker.getVariantContexts(rodName, context.getLocation(), requireStartHere, takeFirstOnly)) { GenomeLoc vcLoc = VariantContextUtils.getLocation(locParser, vc); boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart(); boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java index a4b49b8ff..ae7782434 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java @@ -44,8 +44,8 @@ public class MergeAndMatchHaplotypes extends RodWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker != null) { - Collection pbts = tracker.getVariantContexts(ref, "pbt", ref.getLocus(), true, true); - Collection rbps = tracker.getVariantContexts(ref, "rbp", ref.getLocus(), true, true); + Collection pbts = tracker.getVariantContexts("pbt", ref.getLocus(), true, true); + Collection rbps = tracker.getVariantContexts("rbp", ref.getLocus(), true, true); VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null; VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java index d36da9835..6e328c07e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java @@ -58,12 +58,9 @@ public class MergeMNPsWalker extends RodWalker { @Argument(fullName = "maxGenomicDistanceForMNP", shortName = "maxDistMNP", doc = "The maximum reference-genome distance between consecutive heterozygous sites to permit merging phased VCF records into a MNP record; [default:1]", required = false) protected int maxGenomicDistanceForMNP = 1; - private LinkedList rodNames = null; + private String rodName = "variant"; public void initialize() { - rodNames = new LinkedList(); - rodNames.add("variant"); - initializeVcfWriter(); } @@ -77,8 +74,8 @@ public class MergeMNPsWalker extends RodWalker { hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); - Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames); - vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples()))); + Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(rodName)); + vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodName).getGenotypeSamples()))); } public boolean generateExtendedEvents() { @@ -103,7 +100,7 @@ public class MergeMNPsWalker extends RodWalker { boolean requireStartHere = true; // only see each VariantContext once boolean takeFirstOnly = false; // take as many entries as the VCF file has - for (VariantContext vc : tracker.getVariantContexts(ref, rodNames, context.getLocation(), requireStartHere, takeFirstOnly)) + for (VariantContext vc : tracker.getVariantContexts(rodName, context.getLocation(), requireStartHere, takeFirstOnly)) writeVCF(vc); return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java index 73d7fa529..c747e35d0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java @@ -81,12 +81,9 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker rodNames = null; + private String rodName = "variant"; public void initialize() { - rodNames = new LinkedList(); - rodNames.add("variant"); - initializeVcfWriter(); } @@ -114,8 +111,8 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames); - vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples()))); + Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(rodName)); + vcMergerWriter.writeHeader(new VCFHeader(hInfo, new TreeSet(rodNameToHeader.get(rodName).getGenotypeSamples()))); } public boolean generateExtendedEvents() { @@ -140,7 +137,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker != null) { - Collection vcs = tracker.getVariantContexts(ref, ROD_NAME, context.getLocation(), true, true); + Collection vcs = tracker.getVariantContexts(ROD_NAME, context.getLocation(), true, true); for (VariantContext vc : vcs) { Map genotypeMap = vc.getGenotypes(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index f96cec1b7..165cef477 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -98,7 +98,7 @@ public class ReadBackedPhasingWalker extends RodWalker rodNames = null; + private String rodName = "variant"; public static final String PQ_KEY = "PQ"; @@ -123,9 +123,6 @@ public class ReadBackedPhasingWalker extends RodWalker(); - rodNames.add("variant"); - /* Since we cap each base quality (BQ) by its read's mapping quality (MQ) [in Read.updateBaseAndQuality()], then: if minBQ > minMQ, then we require that MQ be >= minBQ as well. @@ -175,8 +172,8 @@ public class ReadBackedPhasingWalker extends RodWalker rodNameToHeader = getVCFHeadersFromRods(getToolkit(), rodNames); - Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(rodNames.get(0)).getGenotypeSamples() : samplesToPhase); + Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(rodName)); + Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(rodName).getGenotypeSamples() : samplesToPhase); writer.writeHeader(new VCFHeader(hInfo, samples)); } @@ -209,7 +206,7 @@ public class ReadBackedPhasingWalker extends RodWalker { // if the argument was set, check for equivalence if (allRecordsVariantContextEquivalent && tracker != null) { - Collection col = tracker.getAllVariantContexts(ref); + Collection col = tracker.getAllVariantContexts(); VariantContext con = null; for (VariantContext contextInList : col) if (con == null) con = contextInList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 403401192..3d09ef785 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -138,8 +138,8 @@ public class ValidationAmplicons extends RodWalker { // step 3 (or 1 if not new): // build up the sequence - VariantContext mask = tracker.getVariantContext(ref,"MaskAlleles",ref.getLocus()); - VariantContext validate = tracker.getVariantContext(ref,"ValidateAlleles",ref.getLocus()); + VariantContext mask = tracker.getVariantContext("MaskAlleles",ref.getLocus()); + VariantContext validate = tracker.getVariantContext("ValidateAlleles",ref.getLocus()); if ( mask == null && validate == null ) { if ( indelCounter > 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index 5bdec837e..5e98350ed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -25,7 +25,7 @@ public class Novelty extends VariantStratifier implements StandardStratification public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { if (tracker != null && eval != null) { for (final String knownName : knownNames) { - final Collection knownComps = tracker.getVariantContexts(ref, knownName, ref.getLocus(), true, false); + final Collection knownComps = tracker.getVariantContexts(knownName, ref.getLocus(), true, false); for ( final VariantContext c : knownComps ) { // loop over sites, looking for something that matches the type eval if ( eval.getType() == c.getType() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 7beb94593..285e75ed8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -336,7 +336,7 @@ public class VariantEvalUtils { for (String trackName : trackNames) { HashMap vcs = new HashMap(); - Collection contexts = tracker == null ? null : tracker.getVariantContexts(ref, trackName, ref.getLocus(), true, true); + Collection contexts = tracker == null ? null : tracker.getVariantContexts(trackName, ref.getLocus(), true, true); VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null; // First, filter the VariantContext to represent only the samples for evaluation diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 5fb738944..b195256d8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -168,7 +168,7 @@ public class ApplyRecalibration extends RodWalker { return 1; } - for( VariantContext vc : tracker.getVariantContexts(ref, inputNames, context.getLocation(), true, false) ) { + for( VariantContext vc : tracker.getVariantContexts(inputNames, context.getLocation(), true, false) ) { if( vc != null ) { if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { String filterString = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index c76efc2dd..7f32882f4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -258,7 +258,7 @@ public class VariantDataManager { datum.consensusCount = 0; for( final TrainingSet trainingSet : trainingSets ) { - for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, context.getLocation(), false, false ) ) { + for( final VariantContext trainVC : tracker.getVariantContexts(trainingSet.name, context.getLocation(), false, false ) ) { if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 7e1d931c9..497a02baf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -163,7 +163,7 @@ public class VariantRecalibrator extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation(), true, false); + Collection vcs = tracker.getAllVariantContexts(context.getLocation(), true, false); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 97cb9a6e3..a55a53ff0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -85,7 +85,7 @@ public class FilterLiftedVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); for ( VariantContext vc : VCs ) filterAndWrite(ref.getBases(), vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 566333d3f..01c7ddc91 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -68,7 +68,7 @@ public class LeftAlignVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); int changedSites = 0; for ( VariantContext vc : VCs ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 061c3b256..38ac1e013 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -143,7 +143,7 @@ public class LiftoverVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); for ( VariantContext vc : VCs ) convertAndWrite(vc, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index 99e12c836..de194e93f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -97,7 +97,7 @@ public class RandomlySplitVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation(), true, false); + Collection vcs = tracker.getVariantContexts(INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation(), true, false); for ( VariantContext vc : vcs ) { int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000); if ( random < iFraction ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 0efed393a..3bc598e2d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -25,38 +25,29 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; -import java.lang.annotation.AnnotationFormatError; import java.util.*; /** @@ -317,7 +308,7 @@ public class SelectVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getVariantContexts(ref, variantRodName, context.getLocation(), true, false); + Collection vcs = tracker.getVariantContexts(variantRodName, context.getLocation(), true, false); if ( vcs == null || vcs.size() == 0) { return 0; @@ -345,12 +336,12 @@ public class SelectVariants extends RodWalker { break; } if (DISCORDANCE_ONLY) { - Collection compVCs = tracker.getVariantContexts(ref, discordanceRodName, context.getLocation(), true, false); + Collection compVCs = tracker.getVariantContexts(discordanceRodName, context.getLocation(), true, false); if (!isDiscordant(vc, compVCs)) return 0; } if (CONCORDANCE_ONLY) { - Collection compVCs = tracker.getVariantContexts(ref, concordanceRodName, context.getLocation(), true, false); + Collection compVCs = tracker.getVariantContexts(concordanceRodName, context.getLocation(), true, false); if (!isConcordant(vc, compVCs)) return 0; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 756a00731..3bf8bd9e9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -86,7 +86,7 @@ public class ValidateVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(ref, "variant", context.getLocation(), true, false); + Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); for ( VariantContext vc : VCs ) validate(vc, tracker, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 86bb3b0e8..e8a012e99 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -93,7 +93,7 @@ public class VariantValidationAssessor extends RodWalker { return 0; if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { - Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); + Collection vcs = tracker.getAllVariantContexts(context.getLocation()); for ( VariantContext vc : vcs) { if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java index 1b913e895..f5db48f0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java @@ -138,7 +138,7 @@ public class VariantsToTableNewRodStyle extends RodWalker { System.out.printf("VariantList binding %s tags=%s%n", binding, getToolkit().getTags(binding).getPositionalTags()); if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { - VariantContext vc = variants.getVariantContext(tracker, ref, context.getLocation()); + VariantContext vc = variants.getVariantContext(tracker, context.getLocation()); if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); out.println(Utils.join("\t", vals)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 8f0fdd907..170daf6cc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -162,7 +162,7 @@ public class VariantsToVCF extends RodWalker { } // for everything else, we can just convert to VariantContext - return tracker.getVariantContexts(ref, INPUT_ROD_NAME, ref.getLocus(), true, false); + return tracker.getVariantContexts(INPUT_ROD_NAME, ref.getLocus(), true, false); } private DbSNPFeature getDbsnpFeature(String rsID) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index 21be24a85..52d8fd4d0 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -69,7 +69,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.emptyList()); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10), null); Assert.assertEquals(tracker.getAllValuesAsGATKFeatures().size(), 0, "The tracker should not have produced any data"); } @@ -87,7 +87,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null); TableFeature datum = tracker.getFirstValue("tableTest", TableFeature.class); Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect"); @@ -113,7 +113,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2)); ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); - RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20)); + RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null); TableFeature datum1 = tracker.getFirstValue("tableTest1", TableFeature.class); Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect"); From c0d4110ffd2ccf75e28866d9b6f5b98161683148 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 29 Jul 2011 10:01:11 -0400 Subject: [PATCH 056/186] Correcting redundant warning text. --- .../genotyper/SNPGenotypeLikelihoodsCalculationModel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 3e3cd128b..9d917078d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -68,7 +68,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC if ( vc == null ) { vc = vc_input; } else { - logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record only"); + logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record"); } } } From 3b799db61aa276a7c5191e260e61d690744de4e9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 29 Jul 2011 13:23:17 -0400 Subject: [PATCH 057/186] RefMetaDataTracker cleanup and unit tests You know have to provide an explicit list of RODRecordLists upfront to the constructor. RefMetaDataTracker is now immutable. Changes in engine to incorporate these differences Extensive UnitTests for RefMetaDataTracker now. --- .../ManagingReferenceOrderedView.java | 10 +- .../datasources/providers/RodLocusView.java | 13 +- .../gatk/refdata/RefMetaDataTracker.java | 255 +++++++++------- .../refdata/RefMetaDataTrackerUnitTest.java | 275 ++++++++++++++++++ 4 files changed, 441 insertions(+), 112 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java index f75f358e1..d065635c8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/ManagingReferenceOrderedView.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import java.util.ArrayList; @@ -51,10 +52,13 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView { * @return A tracker containing information about this locus. */ public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) { - RefMetaDataTracker tracks = new RefMetaDataTracker(states.size(), referenceContext); + List bindings = states.isEmpty() ? Collections.emptyList() : new ArrayList(states.size()); + for ( ReferenceOrderedDataState state: states ) - tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) ); - return tracks; + // todo -- warning, I removed the reference to the name from states + bindings.add( state.iterator.seekForward(loc) ); + + return new RefMetaDataTracker(bindings, referenceContext); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java index 3db5bd19a..c38b09334 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RodLocusView.java @@ -96,21 +96,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView { } rodQueue = new RODMergingIterator(iterators); - - //throw new StingException("RodLocusView currently disabled"); } public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) { - RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size(), referenceContext); - for ( RODRecordList track : allTracksHere ) { - if ( ! t.hasValues(track.getName()) ) - t.bind(track.getName(), track); - } - // special case the interval again -- add it into the ROD - if ( interval != null ) { t.bind(interval.getName(), interval); } - - return t; + if ( interval != null ) { allTracksHere.add(interval); } + return new RefMetaDataTracker(allTracksHere, referenceContext); } public boolean hasNext() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 808ad5430..c47accb00 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -29,18 +29,13 @@ import java.util.*; * Time: 3:05:23 PM */ public class RefMetaDataTracker { + // TODO: this should be a list, not a map, actually + + private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY"); + final Map map; final ReferenceContext ref; - protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class); - - public RefMetaDataTracker(int nBindings, ReferenceContext ref) { - this.ref = ref; - if ( nBindings == 0 ) - map = Collections.emptyMap(); - else - map = new HashMap(nBindings); - } - + final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class); // ------------------------------------------------------------------------------------------ // @@ -50,18 +45,18 @@ public class RefMetaDataTracker { // // ------------------------------------------------------------------------------------------ - /** - * Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal - * system to provide access to RMDs in a structured way to the walkers. - * - * DO NOT USE THIS FUNCTION UNLESS YOU ARE THE GATK ENGINE - * - * @param name the name of the track - * @param rod the collection of RMD data - */ - public void bind(final String name, RODRecordList rod) { - //logger.debug(String.format("Binding %s to %s", name, rod)); - map.put(canonicalName(name), maybeConvertToVariantContext(rod)); + public RefMetaDataTracker(final Collection allBindings, final ReferenceContext ref) { + this.ref = ref; + if ( allBindings.isEmpty() ) + map = Collections.emptyMap(); + else { + map = new HashMap(allBindings.size()); + for ( RODRecordList rod : allBindings ) { + //logger.debug(String.format("Binding %s to %s", name, rod)); + if ( rod != null ) + map.put(canonicalName(rod.getName()), maybeConvertToVariantContext(rod)); + } + } } /** @@ -81,24 +76,13 @@ public class RefMetaDataTracker { final VariantContext vc = VariantContextAdaptors.toVariantContext(bindings.getName(), rec.getUnderlyingObject(), ref); if ( vc != null ) // it's possible that the conversion failed, but we continue along anyway values.add(new GATKFeature.TribbleGATKFeature(ref.getGenomeLocParser(), vc, rec.getName())); - } + } else + values.add(rec); } return new RODRecordListImpl(bindings.getName(), values, bindings.getLocation()); } -// /** -// * Temporary setting for putting a reference context into the system. -// * -// * DO NOT USE THIS FUNCTION UNLESS YOU ARE THE GATK ENGINE -// * -// * @param ref -// */ -// public void setRef(final ReferenceContext ref) { -// this.ref = ref; -// } - - // ------------------------------------------------------------------------------------------ // // @@ -107,58 +91,47 @@ public class RefMetaDataTracker { // // ------------------------------------------------------------------------------------------ - /** - * No-assumption version of getValues(name, class). Returns Objects. - */ - public List getValues(final String name) { - return getValues(name, Object.class); + public List getValues(Class type) { + return addValues(map.keySet(), type, new ArrayList(), null, false, false); + } + public List getValues(Class type, final GenomeLoc onlyAtThisLoc) { + return addValues(map.keySet(), type, new ArrayList(), onlyAtThisLoc, true, false); + } + public List getValues(Class type, final String name) { + return addValues(name, type, new ArrayList(), getTrackDataByName(name), null, false, false); + } + public List getValues(Class type, final String name, final GenomeLoc onlyAtThisLoc) { + return addValues(name, type, new ArrayList(), getTrackDataByName(name), onlyAtThisLoc, true, false); + } + public List getValues(Class type, final Collection names) { + return addValues(names, type, new ArrayList(), null, false, false); + } + public List getValues(Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { + return addValues(names, type, new ArrayList(), onlyAtThisLoc, true, false); } - /** - * get all the reference meta data associated with a track name. - * @param name the name of the track we're looking for - * @param clazz the expected class of the elements bound to rod name - * @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a - * dbSNP RMD this will be a RodDbSNP, etc. - * - * Important: The list returned by this function is guaranteed not to be null, but may be empty! - */ - public List getValues(final String name, final Class clazz) { - RODRecordList list = getTrackDataByName(name); - - if (list == null) - return Collections.emptyList(); - else { - return addValues(name, clazz, new ArrayList(), list, list.getLocation(), false, false); - } + public T getFirstValue(Class type) { + return safeGetFirst(getValues(type)); + } + public T getFirstValue(Class type, final GenomeLoc onlyAtThisLoc) { + return safeGetFirst(getValues(type, onlyAtThisLoc)); + } + public T getFirstValue(Class type, final String name) { + return safeGetFirst(getValues(type, name)); + } + public T getFirstValue(Class type, final String name, final GenomeLoc onlyAtThisLoc) { + return safeGetFirst(getValues(type, name, onlyAtThisLoc)); + } + public T getFirstValue(Class type, final Collection names) { + return safeGetFirst(getValues(type, names)); + } + public T getFirstValue(Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { + return safeGetFirst(getValues(type, names, onlyAtThisLoc)); } - - /** - * get a singleton record, given the name and a type. This function will return the first record at the - * current position seen. The object is cast into a type clazz, or thoses an error if this isn't possible. - * - * * WARNING: we now suppport more than one RMD at a single position for all tracks. If there are - * are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets - * picked may change from time to time! BE WARNED! - * - * @param name the name of the track - * @param clazz the underlying type to return - * @param the type to parameterize on, matching the clazz argument - * @return a record of type T, or null if no record is present. - */ - public T getFirstValue(final String name, final Class clazz) { - RODRecordList objects = getTrackDataByName(name); - - // if empty or null return null; - if (objects == null || objects.size() < 1) return null; - - Object obj = objects.get(0).getUnderlyingObject(); - if (!(clazz.isAssignableFrom(obj.getClass()))) - throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString() - + " it's of type " + obj.getClass()); - else - return (T)obj; + final private T safeGetFirst(List l) { + // todo: should we be warning people here? Throwing an error? + return l.isEmpty() ? null : l.get(0); } /** @@ -195,8 +168,7 @@ public class RefMetaDataTracker { * Important: The list returned by this function is guaranteed not to be null, but may be empty! */ public List getValuesAsGATKFeatures(final String name) { - List feat = getTrackDataByName(name); - return (feat == null) ? new ArrayList() : feat; // to satisfy the above requirement that we don't return null + return getTrackDataByName(name); } /** @@ -209,7 +181,7 @@ public class RefMetaDataTracker { LinkedList bound = new LinkedList(); for ( RODRecordList value : map.values() ) { - if ( value != null && value.size() != 0 ) bound.add(value); + if ( value.size() != 0 ) bound.add(value); } return bound; @@ -222,13 +194,79 @@ public class RefMetaDataTracker { public int getNumberOfTracksWithValue() { int n = 0; for ( RODRecordList value : map.values() ) { - if ( value != null && ! value.isEmpty() ) { + if ( ! value.isEmpty() ) { n++; } } return n; } + // ------------------------------------------------------------------------------------------ + // + // + // old style Generic accessors + // + // TODO -- DELETE ME + // + // + // ------------------------------------------------------------------------------------------ + + /** + * No-assumption version of getValues(name, class). Returns Objects. + */ + @Deprecated + public List getValues(final String name) { + return getValues(name, Object.class); + } + + /** + * get all the reference meta data associated with a track name. + * @param name the name of the track we're looking for + * @param clazz the expected class of the elements bound to rod name + * @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a + * dbSNP RMD this will be a RodDbSNP, etc. + * + * Important: The list returned by this function is guaranteed not to be null, but may be empty! + */ + @Deprecated + public List getValues(final String name, final Class clazz) { + RODRecordList list = getTrackDataByName(name); + + if (list.isEmpty()) + return Collections.emptyList(); + else { + return addValues(name, clazz, new ArrayList(), list, list.getLocation(), false, false); + } + } + + + /** + * get a singleton record, given the name and a type. This function will return the first record at the + * current position seen. The object is cast into a type clazz, or thoses an error if this isn't possible. + * + * * WARNING: we now suppport more than one RMD at a single position for all tracks. If there are + * are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets + * picked may change from time to time! BE WARNED! + * + * @param name the name of the track + * @param clazz the underlying type to return + * @param the type to parameterize on, matching the clazz argument + * @return a record of type T, or null if no record is present. + */ + @Deprecated + public T getFirstValue(final String name, final Class clazz) { + RODRecordList objects = getTrackDataByName(name); + + if (objects.isEmpty()) return null; + + Object obj = objects.get(0).getUnderlyingObject(); + if (!(clazz.isAssignableFrom(obj.getClass()))) + throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString() + + " it's of type " + obj.getClass()); + else + return (T)obj; + } + // ------------------------------------------------------------------------------------------ // // @@ -244,6 +282,7 @@ public class RefMetaDataTracker { * * @return variant context */ + @Deprecated public List getAllVariantContexts() { return getAllVariantContexts(null, false, false); } @@ -254,6 +293,7 @@ public class RefMetaDataTracker { * @param curLocation * @return */ + @Deprecated public List getAllVariantContexts(final GenomeLoc curLocation) { return getAllVariantContexts(curLocation, true, false); } @@ -275,6 +315,7 @@ public class RefMetaDataTracker { * @param takeFirstOnly do we take the first rod only? * @return variant context */ + @Deprecated public List getAllVariantContexts(final GenomeLoc curLocation, final boolean requireStartHere, final boolean takeFirstOnly) { @@ -299,6 +340,7 @@ public class RefMetaDataTracker { * @param takeFirstOnly do we take the first rod only? * @return variant context */ + @Deprecated public List getVariantContexts(final String name, final GenomeLoc curLocation, final boolean requireStartHere, @@ -306,6 +348,7 @@ public class RefMetaDataTracker { return getVariantContexts(Arrays.asList(name), curLocation, requireStartHere, takeFirstOnly); } + @Deprecated public List getVariantContexts(final Collection names, final GenomeLoc curLocation, final boolean requireStartHere, @@ -314,9 +357,7 @@ public class RefMetaDataTracker { for ( String name : names ) { RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match - - if ( rodList != null ) - addVariantContexts(contexts, rodList, curLocation, requireStartHere, takeFirstOnly ); + addVariantContexts(contexts, rodList, curLocation, requireStartHere, takeFirstOnly ); } return contexts; @@ -332,6 +373,7 @@ public class RefMetaDataTracker { * @param requireStartHere do we require the rod to start at this location? * @return variant context */ + @Deprecated public VariantContext getVariantContext(final String name, final GenomeLoc curLocation, final boolean requireStartHere) { @@ -354,11 +396,13 @@ public class RefMetaDataTracker { * @param curLocation * @return */ + @Deprecated public VariantContext getVariantContext(final String name, final GenomeLoc curLocation) { return getVariantContext(name, curLocation, true); } + @Deprecated private void addVariantContexts(final List contexts, final RODRecordList rodList, final GenomeLoc curLocation, @@ -367,13 +411,27 @@ public class RefMetaDataTracker { addValues("xxx", VariantContext.class, contexts, rodList, curLocation, requireStartHere, takeFirstOnly); } - private static List addValues(final String name, - final Class type, - final List values, - final RODRecordList rodList, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { + private List addValues(final Collection names, + final Class type, + final List values, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { + for ( String name : names ) { + RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match + addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly ); + } + + return values; + } + + private List addValues(final String name, + final Class type, + final List values, + final RODRecordList rodList, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { for ( GATKFeature rec : rodList ) { if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing Object obj = rec.getUnderlyingObject(); @@ -406,7 +464,8 @@ public class RefMetaDataTracker { */ private RODRecordList getTrackDataByName(final String name) { final String luName = canonicalName(name); - return map.get(luName); + RODRecordList l = map.get(luName); + return l == null ? EMPTY_ROD_RECORD_LIST : l; } /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java new file mode 100644 index 000000000..7ae1ed3be --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata; + +import net.sf.samtools.SAMFileHeader; +import org.apache.log4j.Logger; +import org.broad.tribble.Feature; +import org.broad.tribble.dbsnp.DbSNPCodec; +import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.testng.Assert; +import org.testng.annotations.*; +import java.util.*; +import java.util.List; + +public class RefMetaDataTrackerUnitTest { + final protected static Logger logger = Logger.getLogger(RefMetaDataTrackerUnitTest.class); + private static SAMFileHeader header; + private ReferenceContext context; + private GenomeLocParser genomeLocParser; + private GenomeLoc locus; + private final static int START_POS = 10; + Allele A,C,G,T; + VariantContext AC_SNP, AG_SNP, AT_SNP; + TableFeature span10_10, span1_20, span10_20; + DbSNPFeature dbsnp1, dbsnp2; + + @BeforeClass + public void beforeClass() { + header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + locus = genomeLocParser.createGenomeLoc("chr1", START_POS, START_POS); + context = new ReferenceContext(genomeLocParser, locus, (byte)'A'); + A = Allele.create("A", true); + C = Allele.create("C"); + G = Allele.create("G"); + T = Allele.create("T"); + AC_SNP = new VariantContext("x", "chr1", START_POS, START_POS, Arrays.asList(A, C)); + AG_SNP = new VariantContext("x", "chr1", START_POS, START_POS, Arrays.asList(A, G)); + AT_SNP = new VariantContext("x", "chr1", START_POS, START_POS, Arrays.asList(A, T)); + span10_10 = makeSpan(10, 10); + span1_20 = makeSpan(1, 20); + span10_20 = makeSpan(10, 20); + + // dbsnp records + DbSNPCodec dbsnpCodec = new DbSNPCodec(); + String line1 = Utils.join("\t", "585 chr1 9 9 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1".split(" +")); + String line2 = Utils.join("\t", "585 chr1 9 10 rs55998931 0 + C C C/T genomic single unknown 0 0 unknown exact 1".split(" +")); + dbsnp1 = (DbSNPFeature)dbsnpCodec.decode(line1); + dbsnp2 = (DbSNPFeature)dbsnpCodec.decode(line2); + } + + private class MyTest extends BaseTest.TestDataProvider { + public RODRecordList AValues, BValues; + + private MyTest(Class c, final List AValues, final List BValues) { + super(c); + this.AValues = AValues == null ? null : makeRODRecord("A", AValues); + this.BValues = BValues == null ? null : makeRODRecord("B", BValues); + } + + private MyTest(final List AValues, final List BValues) { + super(MyTest.class); + this.AValues = AValues == null ? null : makeRODRecord("A", AValues); + this.BValues = BValues == null ? null : makeRODRecord("B", BValues); + } + + private final RODRecordList makeRODRecord(String name, List features) { + List x = new ArrayList(); + for ( Feature f : features ) + x.add(new GATKFeature.TribbleGATKFeature(genomeLocParser, f, name)); + return new RODRecordListImpl(name, x, locus); + } + + public List expected(String name) { + if ( name.equals("A+B") ) return allValues(); + if ( name.equals("A") ) return expectedAValues(); + if ( name.equals("B") ) return expectedBValues(); + throw new RuntimeException("FAIL"); + } + + public List allValues() { + List x = new ArrayList(); + x.addAll(expectedAValues()); + x.addAll(expectedBValues()); + return x; + } + + public List expectedAValues() { + return AValues == null ? Collections.emptyList() : AValues; + } + + public List expectedBValues() { + return BValues == null ? Collections.emptyList() : BValues; + } + + public RefMetaDataTracker makeTracker() { + List x = new ArrayList(); + if ( AValues != null ) x.add(AValues); + if ( BValues != null ) x.add(BValues); + return new RefMetaDataTracker(x, context); + } + + public int nBoundTracks() { + int n = 0; + if ( AValues != null ) n++; + if ( BValues != null ) n++; + return n; + } + } + + private class MyTestAdaptors extends MyTest { + private MyTestAdaptors(final List AValues) { + super(MyTestAdaptors.class, AValues, null); + } + } + + private final TableFeature makeSpan(int start, int stop) { + return new TableFeature(genomeLocParser.createGenomeLoc("chr1", start, stop), + Collections.emptyList(), Collections.emptyList()); + } + + @DataProvider(name = "tests") + public Object[][] createTests() { + new MyTest(null, null); + new MyTest(Arrays.asList(AC_SNP), null); + new MyTest(Arrays.asList(AC_SNP, AT_SNP), null); + new MyTest(Arrays.asList(AC_SNP), Arrays.asList(AG_SNP)); + new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(AG_SNP)); + new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(span10_10)); + new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(span10_10, span10_20)); + new MyTest(Arrays.asList(AC_SNP, AT_SNP), Arrays.asList(span10_10, span10_20, span1_20)); + + // for requires starts + new MyTest(Arrays.asList(span1_20), null); + new MyTest(Arrays.asList(span10_10, span10_20), null); + new MyTest(Arrays.asList(span10_10, span10_20, span1_20), null); + + return MyTest.getTests(MyTest.class); + } + + @Test(enabled = true, dataProvider = "tests") + public void testRawBindings(MyTest test) { + logger.warn("Testing " + test + " for number of bound tracks"); + RefMetaDataTracker tracker = test.makeTracker(); + Assert.assertEquals(tracker.getNumberOfTracksWithValue(), test.nBoundTracks()); + + testSimpleBindings("A", tracker, test.AValues); + testSimpleBindings("B", tracker, test.BValues); + } + + private void testSimpleBindings(String name, RefMetaDataTracker tracker, RODRecordList expected) { + List asValues = tracker.getValues(Feature.class, name); + List asFeatures = tracker.getValuesAsGATKFeatures(name); + + Assert.assertEquals(tracker.hasValues(name), expected != null); + Assert.assertEquals(asFeatures.size(), expected == null ? 0 : expected.size()); + Assert.assertEquals(asValues.size(), expected == null ? 0 : expected.size()); + + if ( expected != null ) { + for ( GATKFeature e : expected ) { + boolean foundFeature = false; + for ( GATKFeature f : asFeatures ) { + if ( e.getUnderlyingObject() == f.getUnderlyingObject() ) foundFeature = true; + } + Assert.assertTrue(foundFeature, "Never found expected GATKFeature " + e + " bound to " + name + " in " + tracker); + + boolean foundValue = false; + for ( Feature f : asValues ) { + if ( e.getUnderlyingObject() == f ) foundValue = true; + } + Assert.assertTrue(foundValue, "Never found expected value of " + e.getUnderlyingObject() + " bound to " + name + " in " + tracker); + } + } + } + + @Test(enabled = true, dataProvider = "tests") + public void testGetters(MyTest test) { + logger.warn("Testing " + test + " for getFirst() methods"); + RefMetaDataTracker tracker = test.makeTracker(); + + for ( String name : Arrays.asList("A+B", "A", "B") ) { + List v1 = name.equals("A+B") ? tracker.getValues(Feature.class) : tracker.getValues(Feature.class, name); + testGetter(name, v1, test.expected(name), true, tracker); + + List v2 = name.equals("A+B") ? tracker.getValues(Feature.class, locus) : tracker.getValues(Feature.class, name, locus); + testGetter(name, v2, startingHere(test.expected(name)), true, tracker); + + Feature v3 = name.equals("A+B") ? tracker.getFirstValue(Feature.class) : tracker.getFirstValue(Feature.class, name); + testGetter(name, Arrays.asList(v3), test.expected(name), false, tracker); + + Feature v4 = name.equals("A+B") ? tracker.getFirstValue(Feature.class, locus) : tracker.getFirstValue(Feature.class, name, locus); + testGetter(name, Arrays.asList(v4), startingHere(test.expected(name)), false, tracker); + } + } + + private List startingHere(List l) { + List x = new ArrayList(); + for ( GATKFeature f : l ) if ( f.getStart() == locus.getStart() ) x.add(f); + return x; + } + + private void testGetter(String name, List got, List expected, boolean requireExact, RefMetaDataTracker tracker) { + if ( got.size() == 1 && got.get(0) == null ) + got = Collections.emptyList(); + + if ( requireExact ) + Assert.assertEquals(got.size(), expected.size()); + + boolean foundAny = false; + for ( GATKFeature e : expected ) { + boolean found1 = false; + for ( Feature got1 : got ) { + if ( e.getUnderlyingObject() == got1 ) + found1 = true; + } + if ( requireExact ) + Assert.assertTrue(found1, "Never found expected GATKFeature " + e + " bound to " + name + " in " + tracker); + foundAny = found1 || foundAny; + } + + if ( ! requireExact && ! expected.isEmpty() ) + Assert.assertTrue(foundAny, "Never found any got values matching one of the expected values bound to " + name + " in " + tracker); + } + + @Test(enabled = true, dataProvider = "testAdaptors") + public void testAdaptors(MyTestAdaptors test) { + logger.warn("Testing " + test + " for number of bound tracks"); + RefMetaDataTracker tracker = test.makeTracker(); + Assert.assertEquals(tracker.getNumberOfTracksWithValue(), test.nBoundTracks()); + + // all of the objects should be of type VariantContext + for ( Feature v : tracker.getValues(Feature.class) ) + Assert.assertEquals(v.getClass(), VariantContext.class, "Conversion failed from dbsnp to variant context in RefMetaDataTracker"); + } + + @DataProvider(name = "testAdaptors") + public Object[][] createTestAdaptors() { + new MyTestAdaptors(Arrays.asList(dbsnp1)); + new MyTestAdaptors(Arrays.asList(dbsnp1, dbsnp2)); + return MyTestAdaptors.getTests(MyTestAdaptors.class); + } +} From 6acb4aad3badccec6d187c50a6f4bc058c65da92 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 29 Jul 2011 14:37:12 -0400 Subject: [PATCH 058/186] RodBinding are properly generic now. VariantContextRodBinding removed, as RodBinding is the right style now. --- .../commandline/ArgumentTypeDescriptor.java | 17 +++- .../sting/commandline/RodBinding.java | 33 +++++--- .../commandline/VariantContextRodBinding.java | 78 ------------------- .../VariantsToTableNewRodStyle.java | 13 ++-- 4 files changed, 44 insertions(+), 97 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 2e5cb4d62..5106dccb7 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.commandline; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.walkers.Multiplex; import org.broadinstitute.sting.gatk.walkers.Multiplexer; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -299,10 +300,10 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); try { - // TODO: determine type of internal value via Parameter - Constructor ctor = type.getConstructor(Class.class, String.class, String.class, ParsingEngine.class); - RodBinding result = (RodBinding)ctor.newInstance(null, source.field.getName(), value, parsingEngine); Tags tags = getArgumentTags(matches); + Constructor ctor = type.getConstructor(Class.class, String.class, String.class, Tags.class); + Class parameterType = getParameterizedTypeClass(source.field.getGenericType()); + RodBinding result = (RodBinding)ctor.newInstance(parameterType, source.field.getName(), value, tags); parsingEngine.addTags(result,tags); return result; } catch (InvocationTargetException e) { @@ -315,6 +316,16 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { value, source.field.getName())); } } + + private Class getParameterizedTypeClass(Type t) { + if ( t instanceof ParameterizedType ) { + ParameterizedType parameterizedType = (ParameterizedType)t; + if ( parameterizedType.getActualTypeArguments().length != 1 ) + throw new ReviewedStingException("BUG: more than 1 generic type found on class" + t); + return (Class)parameterizedType.getActualTypeArguments()[0]; + } else + throw new ReviewedStingException("BUG: could not find generic type on class " + t); + } } /** diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index d7d086824..b39e0d7ba 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -27,7 +27,10 @@ package org.broadinstitute.sting.commandline; import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.utils.GenomeLoc; +import java.util.ArrayList; +import java.util.Collection; import java.util.List; /** @@ -38,14 +41,14 @@ import java.util.List; public class RodBinding { final String variableName; final String source; - final ParsingEngine parser; + final Tags tags; final Class type; - protected RodBinding(Class type, final String variableName, final String source, final ParsingEngine parser) { + public RodBinding(Class type, final String variableName, final String source, final Tags tags) { this.type = type; this.variableName = variableName; this.source = source; - this.parser = parser; + this.tags = tags; } public String getVariableName() { @@ -56,16 +59,26 @@ public class RodBinding { return source; } + // ------------------------------------------------------------------------------------------ + // + // + // Accessors should be kept in sync with RefMetaDataTracker + // + // + // ------------------------------------------------------------------------------------------ + public List getValues(final RefMetaDataTracker tracker) { - return tracker.getValues(variableName, type); + return tracker.getValues(type, getVariableName()); + } + public List getValues(final RefMetaDataTracker tracker, final GenomeLoc onlyAtThisLoc) { + return tracker.getValues(type, getVariableName(), onlyAtThisLoc); } -// public List getValues(final RefMetaDataTracker tracker, final Class clazz) { -// return tracker.getValues(variableName, clazz); -// } - public T getFirstValue(final RefMetaDataTracker tracker) { - return tracker.getFirstValue(variableName, type); + return tracker.getFirstValue(type, getVariableName()); + } + public T getFirstValue(final RefMetaDataTracker tracker, final GenomeLoc onlyAtThisLoc) { + return tracker.getFirstValue(type, getVariableName(), onlyAtThisLoc); } public boolean hasValues(final RefMetaDataTracker tracker) { @@ -77,7 +90,7 @@ public class RodBinding { } public Tags getTags() { - return parser.getTags(this); + return tags; } public String toString() { diff --git a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java deleted file mode 100644 index a1bc05ef6..000000000 --- a/public/java/src/org/broadinstitute/sting/commandline/VariantContextRodBinding.java +++ /dev/null @@ -1,78 +0,0 @@ -/* -* Copyright (c) 2011, The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -* OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.commandline; - -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.Collection; - -/** -* A RodBinding representing a walker argument that gets bound to a ROD track containing VariantContexts -*/ -public class VariantContextRodBinding extends RodBinding { - /** - * Create a new RodBinding specialized to provide VariantContexts. - * @param variableName the name of the field in the walker that we will bind the ROD track too - * @param sourceFile the data source from which we will read the VCs - * @param parser the Engine parser used to obtain information about this argument, such as its underlying file type - */ - protected VariantContextRodBinding(final String variableName, final String sourceFile, final ParsingEngine parser) { - super(VariantContext.class, variableName, sourceFile, parser); - } - - /** - * Forwarding method to identical tracker method - */ - public Collection getVariantContexts(final RefMetaDataTracker tracker, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { - return tracker.getVariantContexts(variableName, curLocation, requireStartHere, takeFirstOnly); - } - - /** - * Forwarding method to identical tracker method - * @param tracker - * @param curLocation - * @param requireStartHere - * @return - */ - public VariantContext getVariantContext(final RefMetaDataTracker tracker, - final GenomeLoc curLocation, - final boolean requireStartHere ) { - return tracker.getVariantContext(variableName, curLocation, requireStartHere); - } - - /** - * Forwarding method to identical tracker method - */ - public VariantContext getVariantContext(final RefMetaDataTracker tracker, - final GenomeLoc curLocation) { - return tracker.getVariantContext(variableName, curLocation); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java index f5db48f0d..a7955f1fb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -62,10 +63,10 @@ public class VariantsToTableNewRodStyle extends RodWalker { public boolean ALLOW_MISSING_DATA = false; @Input(fullName="variants", shortName="V", doc="The variant file we will convert to a table", required=true) - public VariantContextRodBinding variants; + public RodBinding variants; - @Input(fullName="rodList", shortName="RL", doc="A list of ROD types that we will convert to a table", required=true) - public List variantsList; +// @Input(fullName="rodList", shortName="RL", doc="A list of ROD types that we will convert to a table", required=true) +// public List> variantsList; public void initialize() { out.println(Utils.join("\t", fieldsToTake)); @@ -134,11 +135,11 @@ public class VariantsToTableNewRodStyle extends RodWalker { if ( tracker == null ) // RodWalkers can make funky map calls return 0; - for ( RodBinding binding : variantsList ) - System.out.printf("VariantList binding %s tags=%s%n", binding, getToolkit().getTags(binding).getPositionalTags()); +// for ( RodBinding binding : variantsList ) +// System.out.printf("VariantList binding %s tags=%s%n", binding, getToolkit().getTags(binding).getPositionalTags()); if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { - VariantContext vc = variants.getVariantContext(tracker, context.getLocation()); + VariantContext vc = variants.getFirstValue(tracker, context.getLocation()); if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); out.println(Utils.join("\t", vals)); From a6691ab2fd1b60bc3cbab6931e707d796910bb4f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 29 Jul 2011 16:11:22 -0400 Subject: [PATCH 059/186] List> now working (sort of). At least the argument parsing system tolerates it. --- .../commandline/ArgumentTypeDescriptor.java | 94 +++++++++++-------- .../sting/gatk/CommandLineExecutable.java | 2 - .../OutputStreamArgumentTypeDescriptor.java | 5 +- .../SAMFileReaderArgumentTypeDescriptor.java | 3 +- .../SAMFileWriterArgumentTypeDescriptor.java | 3 +- .../VCFWriterArgumentTypeDescriptor.java | 3 +- .../VariantsToTableNewRodStyle.java | 8 +- .../gatk/GATKExtensionsGenerator.java | 2 - 8 files changed, 67 insertions(+), 53 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 5106dccb7..134fe0e49 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -111,7 +111,7 @@ public abstract class ArgumentTypeDescriptor { * @return The parsed object. */ public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) { - return parse(parsingEngine, source, source.field.getType(), matches); + return parse(parsingEngine, source, source.field.getGenericType(), matches); } /** @@ -133,18 +133,18 @@ public abstract class ArgumentTypeDescriptor { protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) { Annotation argumentAnnotation = getArgumentAnnotation(source); return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation), - source.field.getType(), - ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()), - ArgumentDefinition.getShortName(argumentAnnotation), - ArgumentDefinition.getDoc(argumentAnnotation), - source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(), - source.isFlag(), - source.isMultiValued(), - source.isHidden(), - getCollectionComponentType(source.field), - ArgumentDefinition.getExclusiveOf(argumentAnnotation), - ArgumentDefinition.getValidationRegex(argumentAnnotation), - getValidOptions(source) ); + source.field.getType(), + ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()), + ArgumentDefinition.getShortName(argumentAnnotation), + ArgumentDefinition.getDoc(argumentAnnotation), + source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(), + source.isFlag(), + source.isMultiValued(), + source.isHidden(), + makeRawTypeIfNecessary(getCollectionComponentType(source.field)), + ArgumentDefinition.getExclusiveOf(argumentAnnotation), + ArgumentDefinition.getValidationRegex(argumentAnnotation), + getValidOptions(source) ); } /** @@ -153,7 +153,7 @@ public abstract class ArgumentTypeDescriptor { * @return The parameterized component type, or String.class if the parameterized type could not be found. * @throws IllegalArgumentException If more than one parameterized type is found on the field. */ - protected Class getCollectionComponentType( Field field ) { + protected Type getCollectionComponentType( Field field ) { return null; } @@ -164,7 +164,7 @@ public abstract class ArgumentTypeDescriptor { * @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection. * @return The individual parsed object matching the argument match with Class type. */ - public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ); + public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ); /** * If the argument source only accepts a small set of options, populate the returned list with @@ -275,6 +275,18 @@ public abstract class ArgumentTypeDescriptor { public static boolean isArgumentHidden(Field field) { return field.isAnnotationPresent(Hidden.class); } + + public Class makeRawTypeIfNecessary(Type t) { + if ( t == null ) + return null; + else if ( t instanceof ParameterizedType ) + return (Class)((ParameterizedType) t).getRawType(); + else if ( t instanceof Class ) { + return (Class)t; + } else { + throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t); + } + } } /** @@ -296,13 +308,13 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); try { Tags tags = getArgumentTags(matches); - Constructor ctor = type.getConstructor(Class.class, String.class, String.class, Tags.class); - Class parameterType = getParameterizedTypeClass(source.field.getGenericType()); + Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, Tags.class); + Class parameterType = getParameterizedTypeClass(type); RodBinding result = (RodBinding)ctor.newInstance(parameterType, source.field.getName(), value, tags); parsingEngine.addTags(result,tags); return result; @@ -352,7 +364,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type fulltype, ArgumentMatches matches) { + Class type = makeRawTypeIfNecessary(fulltype); if (source.isFlag()) return true; @@ -393,7 +406,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { throw e; } catch (InvocationTargetException e) { throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)", - value, source.field.getName())); + value, source.field.getName())); } catch (Exception e) { throw new DynamicClassResolutionException(String.class, e); } @@ -405,7 +418,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { return result; } - + /** * A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer @@ -436,8 +449,9 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override @SuppressWarnings("unchecked") - public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) { - Class componentType; + public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Type fulltype, ArgumentMatches matches) { + Class type = makeRawTypeIfNecessary(fulltype); + Type componentType; Object result; if( Collection.class.isAssignableFrom(type) ) { @@ -452,7 +466,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { } componentType = getCollectionComponentType( source.field ); - ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType); + ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType)); Collection collection; try { @@ -481,7 +495,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { } else if( type.isArray() ) { componentType = type.getComponentType(); - ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType); + ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType)); // Assemble a collection of individual values used in this computation. Collection values = new ArrayList(); @@ -489,7 +503,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { for( ArgumentMatch value: match ) values.add(value); - result = Array.newInstance(componentType,values.size()); + result = Array.newInstance(makeRawTypeIfNecessary(componentType),values.size()); int i = 0; for( ArgumentMatch value: values ) { @@ -512,16 +526,16 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @throws IllegalArgumentException If more than one parameterized type is found on the field. */ @Override - protected Class getCollectionComponentType( Field field ) { - // If this is a parameterized collection, find the contained type. If blow up if more than one type exists. - if( field.getGenericType() instanceof ParameterizedType) { - ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); - if( parameterizedType.getActualTypeArguments().length > 1 ) - throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString()); - return (Class)parameterizedType.getActualTypeArguments()[0]; - } - else - return String.class; + protected Type getCollectionComponentType( Field field ) { + // If this is a parameterized collection, find the contained type. If blow up if more than one type exists. + if( field.getGenericType() instanceof ParameterizedType) { + ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); + if( parameterizedType.getActualTypeArguments().length > 1 ) + throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString()); + return parameterizedType.getActualTypeArguments()[0]; + } + else + return String.class; } } @@ -568,7 +582,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { throw new ReviewedStingException("No multiplexed ids available"); Map multiplexedMapping = new HashMap(); - Class componentType = getCollectionComponentType(source.field); + Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field)); ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType); for(Object id: multiplexedIds) { @@ -582,13 +596,13 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override - public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { + public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { if(multiplexedIds == null) throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first."); Map multiplexedMapping = new HashMap(); - Class componentType = getCollectionComponentType(source.field); + Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field)); for(Object id: multiplexedIds) { @@ -659,7 +673,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @throws IllegalArgumentException If more than one parameterized type is found on the field. */ @Override - protected Class getCollectionComponentType( Field field ) { + protected Type getCollectionComponentType( Field field ) { // Multiplex arguments must resolve to maps from which the clp should extract the second type. if( field.getGenericType() instanceof ParameterizedType) { ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index 035ce1cbc..ebdafc703 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; -import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; @@ -165,7 +164,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram { */ protected Collection getArgumentTypeDescriptors() { return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources), - new SAMFileReaderArgumentTypeDescriptor(engine), new SAMFileWriterArgumentTypeDescriptor(engine,System.out), new OutputStreamArgumentTypeDescriptor(engine,System.out) ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index 8bc97c886..8fef10cd6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.OutputStream; import java.lang.reflect.Constructor; +import java.lang.reflect.Type; /** * Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations. @@ -78,7 +79,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition definition = createDefaultArgumentDefinition(source); String fileName = getArgumentValue( definition, matches ); @@ -91,7 +92,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { engine.addOutput(stub); - Object result = createInstanceOfClass(type,stub); + Object result = createInstanceOfClass(makeRawTypeIfNecessary(type),stub); // WARNING: Side effects required by engine! parsingEngine.addTags(result,getArgumentTags(matches)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java index f124c2302..8b3efd7ef 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileReaderArgumentTypeDescriptor.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import java.io.File; +import java.lang.reflect.Type; /** * Describe how to parse SAMFileReaders. @@ -59,7 +60,7 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor } @Override - public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 38640eda0..3fdb38b3d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.OutputStream; import java.lang.annotation.Annotation; +import java.lang.reflect.Type; import java.util.Arrays; import java.util.List; @@ -102,7 +103,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor } @Override - public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { // Extract all possible parameters that could be passed to a BAM file writer? ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source); String writerFileName = getArgumentValue( bamArgumentDefinition, matches ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 615841f02..e9eed5339 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.OutputStream; +import java.lang.reflect.Type; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; @@ -124,7 +125,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @return Transform from the matches into the associated argument. */ @Override - public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { + public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) { ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source); // Get the filename for the genotype file, if it exists. If not, we'll need to send output to out. String writerFileName = getArgumentValue(defaultArgumentDefinition,matches); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java index a7955f1fb..575daa19d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java @@ -65,8 +65,8 @@ public class VariantsToTableNewRodStyle extends RodWalker { @Input(fullName="variants", shortName="V", doc="The variant file we will convert to a table", required=true) public RodBinding variants; -// @Input(fullName="rodList", shortName="RL", doc="A list of ROD types that we will convert to a table", required=true) -// public List> variantsList; + @Input(fullName="rodList", shortName="RL", doc="A list of ROD types that we will convert to a table", required=true) + public List> variantsList; public void initialize() { out.println(Utils.join("\t", fieldsToTake)); @@ -135,8 +135,8 @@ public class VariantsToTableNewRodStyle extends RodWalker { if ( tracker == null ) // RodWalkers can make funky map calls return 0; -// for ( RodBinding binding : variantsList ) -// System.out.printf("VariantList binding %s tags=%s%n", binding, getToolkit().getTags(binding).getPositionalTags()); + for ( RodBinding binding : variantsList ) + System.out.printf("VariantList binding %s tags=%s%n", binding, binding.getTags().getPositionalTags()); if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { VariantContext vc = variants.getFirstValue(tracker, context.getLocation()); diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index 92e339aa1..e5974e165 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -38,7 +38,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; -import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; @@ -118,7 +117,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram { protected Collection getArgumentTypeDescriptors() { List typeDescriptors = new ArrayList(); typeDescriptors.add(new VCFWriterArgumentTypeDescriptor(GATKEngine,System.out,Collections.emptyList())); - typeDescriptors.add(new SAMFileReaderArgumentTypeDescriptor(GATKEngine)); typeDescriptors.add(new SAMFileWriterArgumentTypeDescriptor(GATKEngine,System.out)); typeDescriptors.add(new OutputStreamArgumentTypeDescriptor(GATKEngine,System.out)); return typeDescriptors; From 2d94037ad06e3cf2b7b3a78f700b52a58c280d82 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sat, 30 Jul 2011 02:05:22 -0400 Subject: [PATCH 060/186] Remove temporary index files (*.bai) some temporary index files were not being removed. --- .../sting/queue/qscripts/DataProcessingPipeline.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 1f4f79993..959d073c7 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -365,6 +365,7 @@ class DataProcessingPipeline extends QScript { } case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = List(inBam) this.output = outBam this.metrics = metricsFile @@ -373,6 +374,7 @@ class DataProcessingPipeline extends QScript { } case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = inBams this.output = outBam this.analysisName = queueLogDir + outBam + ".joinBams" @@ -380,6 +382,7 @@ class DataProcessingPipeline extends QScript { } case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = List(inSam) this.output = outBam this.sortOrder = sortOrderP @@ -399,6 +402,7 @@ class DataProcessingPipeline extends QScript { case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups with ExternalCommonArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = List(inBam) this.output = outBam this.RGID = readGroup.id From 7b07c4e04e2b089836524bc8522fe425c05502f2 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 30 Jul 2011 15:34:11 -0400 Subject: [PATCH 061/186] RefMetaDataTracker now has get() methods accepting RodBindings RodBinding no longer duplicates the get() methods in RMDT. This is just an object now that connects the command line system to the RMDT. Updated programs to use new style Added UnitTests for the RodBinding accessors. --- .../sting/commandline/RodBinding.java | 34 +-------- .../gatk/refdata/RefMetaDataTracker.java | 76 ++++++++++++++----- .../VariantsToTableNewRodStyle.java | 2 +- .../refdata/RefMetaDataTrackerUnitTest.java | 29 ++++++- 4 files changed, 87 insertions(+), 54 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index b39e0d7ba..23acc2a78 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -54,41 +54,13 @@ public class RodBinding { public String getVariableName() { return variableName; } - + public Class getType() { + return type; + } public String getSource() { return source; } - // ------------------------------------------------------------------------------------------ - // - // - // Accessors should be kept in sync with RefMetaDataTracker - // - // - // ------------------------------------------------------------------------------------------ - - public List getValues(final RefMetaDataTracker tracker) { - return tracker.getValues(type, getVariableName()); - } - public List getValues(final RefMetaDataTracker tracker, final GenomeLoc onlyAtThisLoc) { - return tracker.getValues(type, getVariableName(), onlyAtThisLoc); - } - - public T getFirstValue(final RefMetaDataTracker tracker) { - return tracker.getFirstValue(type, getVariableName()); - } - public T getFirstValue(final RefMetaDataTracker tracker, final GenomeLoc onlyAtThisLoc) { - return tracker.getFirstValue(type, getVariableName(), onlyAtThisLoc); - } - - public boolean hasValues(final RefMetaDataTracker tracker) { - return tracker.hasValues(variableName); - } - - public List getValuesAsGATKFeatures(final RefMetaDataTracker tracker) { - return tracker.getValuesAsGATKFeatures(variableName); - } - public Tags getTags() { return tags; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index c47accb00..e51871498 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; @@ -91,45 +93,81 @@ public class RefMetaDataTracker { // // ------------------------------------------------------------------------------------------ - public List getValues(Class type) { + public List getValues(Class type) { return addValues(map.keySet(), type, new ArrayList(), null, false, false); } - public List getValues(Class type, final GenomeLoc onlyAtThisLoc) { + public List getValues(Class type, final GenomeLoc onlyAtThisLoc) { return addValues(map.keySet(), type, new ArrayList(), onlyAtThisLoc, true, false); } - public List getValues(Class type, final String name) { + public List getValues(Class type, final String name) { return addValues(name, type, new ArrayList(), getTrackDataByName(name), null, false, false); } - public List getValues(Class type, final String name, final GenomeLoc onlyAtThisLoc) { + public List getValues(Class type, final String name, final GenomeLoc onlyAtThisLoc) { return addValues(name, type, new ArrayList(), getTrackDataByName(name), onlyAtThisLoc, true, false); } - public List getValues(Class type, final Collection names) { + public List getValues(Class type, final Collection names) { return addValues(names, type, new ArrayList(), null, false, false); } - public List getValues(Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { + public List getValues(Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { return addValues(names, type, new ArrayList(), onlyAtThisLoc, true, false); } - public T getFirstValue(Class type) { + public T getFirstValue(Class type) { return safeGetFirst(getValues(type)); } - public T getFirstValue(Class type, final GenomeLoc onlyAtThisLoc) { + public T getFirstValue(Class type, final GenomeLoc onlyAtThisLoc) { return safeGetFirst(getValues(type, onlyAtThisLoc)); } - public T getFirstValue(Class type, final String name) { + public T getFirstValue(Class type, final String name) { return safeGetFirst(getValues(type, name)); } - public T getFirstValue(Class type, final String name, final GenomeLoc onlyAtThisLoc) { + public T getFirstValue(Class type, final String name, final GenomeLoc onlyAtThisLoc) { return safeGetFirst(getValues(type, name, onlyAtThisLoc)); } - public T getFirstValue(Class type, final Collection names) { + public T getFirstValue(Class type, final Collection names) { return safeGetFirst(getValues(type, names)); } - public T getFirstValue(Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { + public T getFirstValue(Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { return safeGetFirst(getValues(type, names, onlyAtThisLoc)); } - final private T safeGetFirst(List l) { + // + // ROD binding accessors + // + public List getValues(RodBinding rodBinding) { + return getValues(rodBinding.getType(), rodBinding.getVariableName()); + } + public List getValues(RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { + return getValues(rodBinding.getType(), rodBinding.getVariableName(), onlyAtThisLoc); + } + + public T getFirstValue(RodBinding rodBinding) { + return getFirstValue(rodBinding.getType(), rodBinding.getVariableName()); + } + public T getFirstValue(RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { + return getFirstValue(rodBinding.getType(), rodBinding.getVariableName(), onlyAtThisLoc); + } + + public boolean hasValues(RodBinding rodBinding) { + return hasValues(rodBinding.getVariableName()); + } + + public List getValuesAsGATKFeatures(RodBinding rodBinding) { + return getValuesAsGATKFeatures(rodBinding.getVariableName()); + } + + /** + * Helper function for getFirst() operations that takes a list of and + * returns the first element, or null if no such element exists. + * + * TODO: determine specific behavior for l.size() > 1. Do we turn first or an error? + * TODO: right now we return the first. Should be clearer + * + * @param l + * @param + * @return + */ + final private T safeGetFirst(List l) { // todo: should we be warning people here? Throwing an error? return l.isEmpty() ? null : l.get(0); } @@ -216,7 +254,7 @@ public class RefMetaDataTracker { */ @Deprecated public List getValues(final String name) { - return getValues(name, Object.class); + return (List)(List)getValues(name, Feature.class); } /** @@ -229,7 +267,7 @@ public class RefMetaDataTracker { * Important: The list returned by this function is guaranteed not to be null, but may be empty! */ @Deprecated - public List getValues(final String name, final Class clazz) { + public List getValues(final String name, final Class clazz) { RODRecordList list = getTrackDataByName(name); if (list.isEmpty()) @@ -250,11 +288,11 @@ public class RefMetaDataTracker { * * @param name the name of the track * @param clazz the underlying type to return - * @param the type to parameterize on, matching the clazz argument + * @param the type to parameterize on, matching the clazz argument * @return a record of type T, or null if no record is present. */ @Deprecated - public T getFirstValue(final String name, final Class clazz) { + public T getFirstValue(final String name, final Class clazz) { RODRecordList objects = getTrackDataByName(name); if (objects.isEmpty()) return null; @@ -411,7 +449,7 @@ public class RefMetaDataTracker { addValues("xxx", VariantContext.class, contexts, rodList, curLocation, requireStartHere, takeFirstOnly); } - private List addValues(final Collection names, + private List addValues(final Collection names, final Class type, final List values, final GenomeLoc curLocation, @@ -425,7 +463,7 @@ public class RefMetaDataTracker { return values; } - private List addValues(final String name, + private List addValues(final String name, final Class type, final List values, final RODRecordList rodList, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java index 575daa19d..b8c6fd530 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java @@ -139,7 +139,7 @@ public class VariantsToTableNewRodStyle extends RodWalker { System.out.printf("VariantList binding %s tags=%s%n", binding, binding.getTags().getPositionalTags()); if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { - VariantContext vc = variants.getFirstValue(tracker, context.getLocation()); + VariantContext vc = tracker.getFirstValue(variants, context.getLocation()); if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); out.println(Utils.join("\t", vals)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java index 7ae1ed3be..28da4a8f0 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -30,6 +30,8 @@ import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPCodec; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; @@ -181,7 +183,7 @@ public class RefMetaDataTrackerUnitTest { testSimpleBindings("B", tracker, test.BValues); } - private void testSimpleBindings(String name, RefMetaDataTracker tracker, RODRecordList expected) { + private void testSimpleBindings(String name, RefMetaDataTracker tracker, RODRecordList expected) { List asValues = tracker.getValues(Feature.class, name); List asFeatures = tracker.getValuesAsGATKFeatures(name); @@ -207,8 +209,8 @@ public class RefMetaDataTrackerUnitTest { } @Test(enabled = true, dataProvider = "tests") - public void testGetters(MyTest test) { - logger.warn("Testing " + test + " for getFirst() methods"); + public void testGettersAsString(MyTest test) { + logger.warn("Testing " + test + " for get() methods"); RefMetaDataTracker tracker = test.makeTracker(); for ( String name : Arrays.asList("A+B", "A", "B") ) { @@ -226,6 +228,27 @@ public class RefMetaDataTrackerUnitTest { } } + @Test(enabled = true, dataProvider = "tests") + public void testGettersAsRodBindings(MyTest test) { + logger.warn("Testing " + test + " for get() methods as RodBindings"); + RefMetaDataTracker tracker = test.makeTracker(); + + for ( String nameAsString : Arrays.asList("A", "B") ) { + RodBinding binding = new RodBinding(Feature.class, nameAsString, "none", new Tags()); + List v1 = tracker.getValues(binding); + testGetter(nameAsString, v1, test.expected(nameAsString), true, tracker); + + List v2 = tracker.getValues(binding, locus); + testGetter(nameAsString, v2, startingHere(test.expected(nameAsString)), true, tracker); + + Feature v3 = tracker.getFirstValue(binding); + testGetter(nameAsString, Arrays.asList(v3), test.expected(nameAsString), false, tracker); + + Feature v4 = tracker.getFirstValue(binding, locus); + testGetter(nameAsString, Arrays.asList(v4), startingHere(test.expected(nameAsString)), false, tracker); + } + } + private List startingHere(List l) { List x = new ArrayList(); for ( GATKFeature f : l ) if ( f.getStart() == locus.getStart() ) x.add(f); From 4f8d8309607ecc2141d8af52a6f93147503a2b5e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sat, 30 Jul 2011 15:34:20 -0400 Subject: [PATCH 062/186] Updated to reflect new parse() function --- .../queue/util/ScalaCompoundArgumentTypeDescriptor.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala b/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala index 40a296022..58341a0a5 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/ScalaCompoundArgumentTypeDescriptor.scala @@ -4,6 +4,7 @@ import collection.JavaConversions._ import org.broadinstitute.sting.queue.QException import java.lang.Class import org.broadinstitute.sting.commandline.{ArgumentMatches, ArgumentSource, ArgumentTypeDescriptor, ParsingEngine} +import java.lang.reflect.Type /** * An ArgumentTypeDescriptor that can parse the scala collections. @@ -42,6 +43,10 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor { * @param argumentMatches The argument match strings that were found for this argument source. * @return The parsed object. */ + def parse(parsingEngine: ParsingEngine, source: ArgumentSource, typeType: Type, argumentMatches: ArgumentMatches) = { + parse(parsingEngine,source, makeRawTypeIfNecessary(typeType), argumentMatches) + } + def parse(parsingEngine: ParsingEngine, source: ArgumentSource, classType: Class[_], argumentMatches: ArgumentMatches) = { val componentType = ReflectionUtils.getCollectionType(source.field) val componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType) From f69bff5dd603168be251e8f3f951d00adfa1e39a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 1 Aug 2011 13:34:25 -0400 Subject: [PATCH 063/186] Commented out, because these fail the now removed dbSNP conversion. --- .../VariantContextIntegrationTest.java | 55 ++++++++++++------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index a344817a0..1a2285b22 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.variantcontext; import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.HashMap; @@ -17,30 +18,42 @@ public class VariantContextIntegrationTest extends WalkerTest { " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + " -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; - static HashMap expectations = new HashMap(); - static { - expectations.put("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); + private static final class VCITTest extends TestDataProvider { + String args, md5; + + private VCITTest(final String args, final String md5) { + super(VCITTest.class); + this.args = args; + this.md5 = md5; + } } - @Test - public void testConversionSelection() { - for ( Map.Entry entry : expectations.entrySet() ) { - String extraArgs = entry.getKey(); - String md5 = entry.getValue(); + @DataProvider(name = "VCITTestData") + public Object[][] createVCITTestData() { + new VCITTest("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a"); + new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b"); + new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed"); + new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126"); + new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); - WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s", - 1, // just one output file - Arrays.asList(md5)); - executeTest("testDbSNPAndVCFConversions", spec); - } + // TODO : Eric, these are bad because the conversion fails + //new VCITTest("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f"); + //new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a"); + //new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde"); + //new VCITTest("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92"); + + return VCITTest.getTests(VCITTest.class); + } + + @Test(dataProvider = "VCITTestData") + public void testConversionSelection(VCITTest test) { + String extraArgs = test.args; + String md5 = test.md5; + + WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testDbSNPAndVCFConversions", spec); } @Test From 8b1adb8c9558aca14522859c04748c533977bb89 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 1 Aug 2011 13:41:09 -0400 Subject: [PATCH 064/186] Removed getVariantContext() code --- .../gatk/refdata/RefMetaDataTracker.java | 158 +--------------- .../walkers/annotator/VariantAnnotator.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 4 +- .../genomicannotator/GenomicAnnotator.java | 2 +- .../beagle/BeagleOutputToVCFWalker.java | 4 +- .../beagle/ProduceBeagleInputWalker.java | 4 +- .../VariantsToBeagleUnphasedWalker.java | 2 +- .../fasta/FastaAlternateReferenceWalker.java | 2 +- .../filters/VariantFiltrationWalker.java | 2 +- ...elGenotypeLikelihoodsCalculationModel.java | 2 +- ...NPGenotypeLikelihoodsCalculationModel.java | 2 +- .../walkers/genotyper/UGCallVariants.java | 4 +- .../indels/RealignerTargetCreator.java | 2 +- .../walkers/phasing/AnnotateMNPsWalker.java | 4 +- .../phasing/MergeAndMatchHaplotypes.java | 4 +- .../gatk/walkers/phasing/MergeMNPsWalker.java | 4 +- ...ergeSegregatingAlternateAllelesWalker.java | 4 +- .../walkers/phasing/PhaseByTransmission.java | 36 ++-- .../phasing/ReadBackedPhasingWalker.java | 4 +- .../walkers/qc/RodSystemValidationWalker.java | 2 +- .../validation/ValidationAmplicons.java | 4 +- .../varianteval/stratifications/Novelty.java | 2 +- .../varianteval/util/VariantEvalUtils.java | 3 +- .../ApplyRecalibration.java | 2 +- .../VariantDataManager.java | 2 +- .../VariantRecalibrator.java | 2 +- .../walkers/variantutils/CombineVariants.java | 2 +- .../variantutils/FilterLiftedVariants.java | 2 +- .../variantutils/LeftAlignVariants.java | 2 +- .../variantutils/LiftoverVariants.java | 2 +- .../variantutils/RandomlySplitVariants.java | 2 +- .../walkers/variantutils/SelectVariants.java | 6 +- .../variantutils/ValidateVariants.java | 2 +- .../VariantValidationAssessor.java | 2 +- .../walkers/variantutils/VariantsToTable.java | 2 +- .../walkers/variantutils/VariantsToVCF.java | 2 +- .../RecalibrationWalkersIntegrationTest.java | 172 +++++++++++------- 37 files changed, 162 insertions(+), 297 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index e51871498..1436465ad 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -254,27 +254,7 @@ public class RefMetaDataTracker { */ @Deprecated public List getValues(final String name) { - return (List)(List)getValues(name, Feature.class); - } - - /** - * get all the reference meta data associated with a track name. - * @param name the name of the track we're looking for - * @param clazz the expected class of the elements bound to rod name - * @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a - * dbSNP RMD this will be a RodDbSNP, etc. - * - * Important: The list returned by this function is guaranteed not to be null, but may be empty! - */ - @Deprecated - public List getValues(final String name, final Class clazz) { - RODRecordList list = getTrackDataByName(name); - - if (list.isEmpty()) - return Collections.emptyList(); - else { - return addValues(name, clazz, new ArrayList(), list, list.getLocation(), false, false); - } + return (List)(List)getValues(Feature.class, name); } @@ -313,142 +293,6 @@ public class RefMetaDataTracker { // // ------------------------------------------------------------------------------------------ - /** - * Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number - * of entries per ROD. - * The name of each VariantContext corresponds to the ROD name. - * - * @return variant context - */ - @Deprecated - public List getAllVariantContexts() { - return getAllVariantContexts(null, false, false); - } - - /** - * Returns all of the variant contexts that start at the current location - * - * @param curLocation - * @return - */ - @Deprecated - public List getAllVariantContexts(final GenomeLoc curLocation) { - return getAllVariantContexts(curLocation, true, false); - } - - /** - * Converts all possible ROD tracks to VariantContexts objects. If allowedTypes != null, then only - * VariantContexts in the allow set of types will be returned. If requireStartsHere is true, then curLocation - * must not be null, and only records whose start position is == to curLocation.getStart() will be returned. - * If takeFirstOnly is true, then only a single VariantContext will be converted from any individual ROD. Of course, - * this single object must pass the allowed types and start here options if provided. Note that the result - * may return multiple VariantContexts with the same name if that particular track contained multiple RODs spanning - * the current location. - * - * The name of each VariantContext corresponds to the ROD name. - * - * - * @param curLocation location - * @param requireStartHere do we require the rod to start at this location? - * @param takeFirstOnly do we take the first rod only? - * @return variant context - */ - @Deprecated - public List getAllVariantContexts(final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly) { - List contexts = new ArrayList(); - - for ( RODRecordList rodList : getBoundRodTracks() ) { - addVariantContexts(contexts, rodList, curLocation, requireStartHere, takeFirstOnly); - } - - return contexts; - } - - /** - * Gets the variant contexts associated with track name name - * - * see getVariantContexts for more information. - * - * - * @param name name - * @param curLocation location - * @param requireStartHere do we require the rod to start at this location? - * @param takeFirstOnly do we take the first rod only? - * @return variant context - */ - @Deprecated - public List getVariantContexts(final String name, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly) { - return getVariantContexts(Arrays.asList(name), curLocation, requireStartHere, takeFirstOnly); - } - - @Deprecated - public List getVariantContexts(final Collection names, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly) { - List contexts = new ArrayList(); - - for ( String name : names ) { - RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match - addVariantContexts(contexts, rodList, curLocation, requireStartHere, takeFirstOnly ); - } - - return contexts; - } - - /** - * Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not. - * see getVariantContexts for more information. - * - * - * @param name name - * @param curLocation location - * @param requireStartHere do we require the rod to start at this location? - * @return variant context - */ - @Deprecated - public VariantContext getVariantContext(final String name, - final GenomeLoc curLocation, - final boolean requireStartHere) { - List contexts = getVariantContexts(name, curLocation, requireStartHere, false ); - - if ( contexts.size() > 1 ) - throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation); - else if ( contexts.size() == 0 ) - return null; - else - return contexts.iterator().next(); - } - - /** - * Very simple accessor that gets the first (and only!) VC associated with name at the current location, or - * null if there's no binding here. - * - * - * @param name - * @param curLocation - * @return - */ - @Deprecated - public VariantContext getVariantContext(final String name, - final GenomeLoc curLocation) { - return getVariantContext(name, curLocation, true); - } - - @Deprecated - private void addVariantContexts(final List contexts, - final RODRecordList rodList, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { - addValues("xxx", VariantContext.class, contexts, rodList, curLocation, requireStartHere, takeFirstOnly); - } - private List addValues(final Collection names, final Class type, final List values, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 932317700..1744cc9e8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -202,7 +202,7 @@ public class VariantAnnotator extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); + Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); if ( VCs.size() == 0 ) return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 9dc0bbfe6..583362ae4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -203,7 +203,7 @@ public class VariantAnnotatorEngine { infoAnnotations.put(VariantContext.ID_KEY, rsID); } else { boolean overlapsComp = false; - for ( VariantContext comp : tracker.getVariantContexts(dbSet.getKey(), ref.getLocus(), false, false) ) { + for ( VariantContext comp : tracker.getValues(VariantContext.class, dbSet.getKey()) ) { if ( !comp.isFiltered() ) { overlapsComp = true; break; @@ -216,7 +216,7 @@ public class VariantAnnotatorEngine { private void annotateExpressions(RefMetaDataTracker tracker, ReferenceContext ref, Map infoAnnotations) { for ( VAExpression expression : requestedExpressions ) { - Collection VCs = tracker.getVariantContexts(expression.bindingName, ref.getLocus(), false, true); + Collection VCs = tracker.getValues(VariantContext.class, expression.bindingName); if ( VCs.size() == 0 ) continue; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java index 78057849c..a82041af2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/GenomicAnnotator.java @@ -244,7 +244,7 @@ public class GenomicAnnotator extends RodWalker implements Tre return 0; Set results = new LinkedHashSet(); - for (VariantContext vc : tracker.getVariantContexts("variant", context.getLocation(), true, false)) { + for (VariantContext vc : tracker.getValues(VariantContext.class, "variant", context.getLocation())) { if ( (vc.isFiltered() && IGNORE_FILTERED_SITES) || (vc.isVariant() && !vc.isBiallelic()) ) { results.add(vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 19eafc872..97a4b6a8f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -119,9 +119,9 @@ public class BeagleOutputToVCFWalker extends RodWalker { return 0; GenomeLoc loc = context.getLocation(); - VariantContext vc_input = tracker.getVariantContext(INPUT_ROD_NAME, loc, true); + VariantContext vc_input = tracker.getFirstValue(VariantContext.class, INPUT_ROD_NAME, loc); - VariantContext vc_comp = tracker.getVariantContext(COMP_ROD_NAME, loc, true); + VariantContext vc_comp = tracker.getFirstValue(VariantContext.class, COMP_ROD_NAME, loc); if ( vc_input == null ) return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 353ebb82a..354b032f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -121,8 +121,8 @@ public class ProduceBeagleInputWalker extends RodWalker { public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext variant_eval = tracker.getVariantContext(ROD_NAME, loc, true); - VariantContext validation_eval = tracker.getVariantContext(VALIDATION_ROD_NAME, loc, true); + VariantContext variant_eval = tracker.getFirstValue(VariantContext.class, ROD_NAME, loc); + VariantContext validation_eval = tracker.getFirstValue(VariantContext.class, VALIDATION_ROD_NAME, loc); if ( goodSite(variant_eval,validation_eval) ) { if ( useValidation(validation_eval, ref) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index ee3dfb1df..b95f3097d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -102,7 +102,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext vc = tracker.getVariantContext(ROD_NAME, loc, true); + VariantContext vc = tracker.getFirstValue(VariantContext.class, ROD_NAME, loc); if ( ProduceBeagleInputWalker.canBeOutputToBeagle(vc) ) { // do we want to hold back this site? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 665ac539c..2865c2380 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -57,7 +57,7 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { String refBase = String.valueOf((char)ref.getBase()); - Collection vcs = tracker.getAllVariantContexts(); + Collection vcs = tracker.getValues(VariantContext.class); // Check to see if we have a called snp for ( VariantContext vc : vcs ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 61991db2d..9e89944a5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -149,7 +149,7 @@ public class VariantFiltrationWalker extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts(INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation(), true, false); + Collection VCs = tracker.getValues(VariantContext.class, INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation()); // is there a SNP mask present? boolean hasMask = tracker.getValues("mask").size() > 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 54bb888c8..897e1a668 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -321,7 +321,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood haplotypeMap.clear(); if (getAlleleListFromVCF) { - for( final VariantContext vc_input : tracker.getVariantContexts("alleles", ref.getLocus(), false, false) ) { + for( final VariantContext vc_input : tracker.getValues(VariantContext.class, "alleles") ) { if( vc_input != null && allowableTypes.contains(vc_input.getType()) && ref.getLocus().getStart() == vc_input.getStart()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 4f784d37a..865bdfafe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -63,7 +63,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC VariantContext vc = null; // search for usable record - for( final VariantContext vc_input : tracker.getVariantContexts("alleles", ref.getLocus(), true, false) ) { + for( final VariantContext vc_input : tracker.getValues(VariantContext.class, "alleles", ref.getLocus()) ) { if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) { if ( vc == null ) { vc = vc_input; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 1c5d55225..0ddfe609b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -96,8 +96,8 @@ public class UGCallVariants extends RodWalker { List VCs = new ArrayList(); for ( String name : trackNames ) { - Collection vc = tracker.getVariantContexts(name, context.getLocation(), true, true); - VCs.addAll(vc); + VariantContext vc = tracker.getFirstValue(VariantContext.class, name, context.getLocation()); + VCs.add(vc); } VariantContext mergedVC = mergeVCsWithGLs(VCs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 38a1dcb8d..7d805f092 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -110,7 +110,7 @@ public class RealignerTargetCreator extends RodWalker { GenomeLoc curLocus = ref.getLocus(); clearOldLocusFeatures(curLocus); - boolean requireStartHere = false; // see EVERY site of the MNP - boolean takeFirstOnly = false; // take as many entries as the VCF file has - for (VariantContext vc : tracker.getVariantContexts(rodName, context.getLocation(), requireStartHere, takeFirstOnly)) { + for (VariantContext vc : tracker.getValues(VariantContext.class, rodName)) { GenomeLoc vcLoc = VariantContextUtils.getLocation(locParser, vc); boolean atStartOfVc = curLocus.getStart() == vcLoc.getStart(); boolean atEndOfVc = curLocus.getStart() == vcLoc.getStop(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java index ae7782434..315ae36d8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java @@ -44,8 +44,8 @@ public class MergeAndMatchHaplotypes extends RodWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker != null) { - Collection pbts = tracker.getVariantContexts("pbt", ref.getLocus(), true, true); - Collection rbps = tracker.getVariantContexts("rbp", ref.getLocus(), true, true); + Collection pbts = tracker.getValues(VariantContext.class, "pbt", ref.getLocus()); + Collection rbps = tracker.getValues(VariantContext.class, "rbp", ref.getLocus()); VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null; VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java index 6e328c07e..de62f9652 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java @@ -98,9 +98,7 @@ public class MergeMNPsWalker extends RodWalker { if (tracker == null) return null; - boolean requireStartHere = true; // only see each VariantContext once - boolean takeFirstOnly = false; // take as many entries as the VCF file has - for (VariantContext vc : tracker.getVariantContexts(rodName, context.getLocation(), requireStartHere, takeFirstOnly)) + for (VariantContext vc : tracker.getValues(VariantContext.class, rodName, context.getLocation())) writeVCF(vc); return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java index c747e35d0..bf26b327d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java @@ -135,9 +135,7 @@ public class MergeSegregatingAlternateAllelesWalker extends RodWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker != null) { - Collection vcs = tracker.getVariantContexts(ROD_NAME, context.getLocation(), true, true); + VariantContext vc = tracker.getFirstValue(VariantContext.class, ROD_NAME, context.getLocation()); - for (VariantContext vc : vcs) { - Map genotypeMap = vc.getGenotypes(); + Map genotypeMap = vc.getGenotypes(); - for (Trio trio : trios) { - Genotype mother = vc.getGenotype(trio.getMother()); - Genotype father = vc.getGenotype(trio.getFather()); - Genotype child = vc.getGenotype(trio.getChild()); + for (Trio trio : trios) { + Genotype mother = vc.getGenotype(trio.getMother()); + Genotype father = vc.getGenotype(trio.getFather()); + Genotype child = vc.getGenotype(trio.getChild()); - ArrayList trioGenotypes = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child); + ArrayList trioGenotypes = phaseTrioGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child); - Genotype phasedMother = trioGenotypes.get(0); - Genotype phasedFather = trioGenotypes.get(1); - Genotype phasedChild = trioGenotypes.get(2); + Genotype phasedMother = trioGenotypes.get(0); + Genotype phasedFather = trioGenotypes.get(1); + Genotype phasedChild = trioGenotypes.get(2); - genotypeMap.put(phasedMother.getSampleName(), phasedMother); - genotypeMap.put(phasedFather.getSampleName(), phasedFather); - genotypeMap.put(phasedChild.getSampleName(), phasedChild); - } - - VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap); - - vcfWriter.add(newvc, ref.getBase()); + genotypeMap.put(phasedMother.getSampleName(), phasedMother); + genotypeMap.put(phasedFather.getSampleName(), phasedFather); + genotypeMap.put(phasedChild.getSampleName(), phasedChild); } + + VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap); + + vcfWriter.add(newvc, ref.getBase()); } return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 165cef477..dd7c68247 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -204,9 +204,7 @@ public class ReadBackedPhasingWalker extends RodWalker unprocessedList = new LinkedList(); - boolean requireStartHere = true; // only see each VariantContext once - boolean takeFirstOnly = false; // take as many entries as the VCF file has - for (VariantContext vc : tracker.getVariantContexts(rodName, context.getLocation(), requireStartHere, takeFirstOnly)) { + for (VariantContext vc : tracker.getValues(VariantContext.class, rodName, context.getLocation())) { if (samplesToPhase != null) vc = reduceVCToSamples(vc, samplesToPhase); if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java index 608d5925a..22b145911 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java @@ -84,7 +84,7 @@ public class RodSystemValidationWalker extends RodWalker { // if the argument was set, check for equivalence if (allRecordsVariantContextEquivalent && tracker != null) { - Collection col = tracker.getAllVariantContexts(); + Collection col = tracker.getValues(VariantContext.class); VariantContext con = null; for (VariantContext contextInList : col) if (con == null) con = contextInList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 3d09ef785..69bb78b1e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -138,8 +138,8 @@ public class ValidationAmplicons extends RodWalker { // step 3 (or 1 if not new): // build up the sequence - VariantContext mask = tracker.getVariantContext("MaskAlleles",ref.getLocus()); - VariantContext validate = tracker.getVariantContext("ValidateAlleles",ref.getLocus()); + VariantContext mask = tracker.getFirstValue(VariantContext.class, "MaskAlleles",ref.getLocus()); + VariantContext validate = tracker.getFirstValue(VariantContext.class, "ValidateAlleles",ref.getLocus()); if ( mask == null && validate == null ) { if ( indelCounter > 0 ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java index 5e98350ed..d2e4392a5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java @@ -25,7 +25,7 @@ public class Novelty extends VariantStratifier implements StandardStratification public ArrayList getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { if (tracker != null && eval != null) { for (final String knownName : knownNames) { - final Collection knownComps = tracker.getVariantContexts(knownName, ref.getLocus(), true, false); + final Collection knownComps = tracker.getValues(VariantContext.class, knownName, ref.getLocus()); for ( final VariantContext c : knownComps ) { // loop over sites, looking for something that matches the type eval if ( eval.getType() == c.getType() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 285e75ed8..79cd89ca3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -336,8 +336,7 @@ public class VariantEvalUtils { for (String trackName : trackNames) { HashMap vcs = new HashMap(); - Collection contexts = tracker == null ? null : tracker.getVariantContexts(trackName, ref.getLocus(), true, true); - VariantContext vc = contexts != null && contexts.size() == 1 ? contexts.iterator().next() : null; + VariantContext vc = tracker == null ? null : tracker.getFirstValue(VariantContext.class, trackName, ref.getLocus()); // First, filter the VariantContext to represent only the samples for evaluation if (vc != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index b195256d8..1415db87c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -168,7 +168,7 @@ public class ApplyRecalibration extends RodWalker { return 1; } - for( VariantContext vc : tracker.getVariantContexts(inputNames, context.getLocation(), true, false) ) { + for( VariantContext vc : tracker.getValues(VariantContext.class, inputNames, context.getLocation()) ) { if( vc != null ) { if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { String filterString = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 7f32882f4..e7f74de0d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -258,7 +258,7 @@ public class VariantDataManager { datum.consensusCount = 0; for( final TrainingSet trainingSet : trainingSets ) { - for( final VariantContext trainVC : tracker.getVariantContexts(trainingSet.name, context.getLocation(), false, false ) ) { + for( final VariantContext trainVC : tracker.getValues(VariantContext.class, trainingSet.name) ) { if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 497a02baf..7bd7ea46d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -163,7 +163,7 @@ public class VariantRecalibrator extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getAllVariantContexts(context.getLocation(), true, false); + Collection vcs = tracker.getValues(VariantContext.class, context.getLocation()); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index a55a53ff0..2a5a4e97c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -85,7 +85,7 @@ public class FilterLiftedVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); + Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); for ( VariantContext vc : VCs ) filterAndWrite(ref.getBases(), vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 01c7ddc91..e7e21d256 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -68,7 +68,7 @@ public class LeftAlignVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); + Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); int changedSites = 0; for ( VariantContext vc : VCs ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 38ac1e013..5e85d03cb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -143,7 +143,7 @@ public class LiftoverVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); + Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); for ( VariantContext vc : VCs ) convertAndWrite(vc, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index de194e93f..b3926bc34 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -97,7 +97,7 @@ public class RandomlySplitVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getVariantContexts(INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation(), true, false); + Collection vcs = tracker.getValues(VariantContext.class, INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation()); for ( VariantContext vc : vcs ) { int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000); if ( random < iFraction ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 3bc598e2d..dc24287e9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -308,7 +308,7 @@ public class SelectVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getVariantContexts(variantRodName, context.getLocation(), true, false); + Collection vcs = tracker.getValues(VariantContext.class, variantRodName, context.getLocation()); if ( vcs == null || vcs.size() == 0) { return 0; @@ -336,12 +336,12 @@ public class SelectVariants extends RodWalker { break; } if (DISCORDANCE_ONLY) { - Collection compVCs = tracker.getVariantContexts(discordanceRodName, context.getLocation(), true, false); + Collection compVCs = tracker.getValues(VariantContext.class, discordanceRodName, context.getLocation()); if (!isDiscordant(vc, compVCs)) return 0; } if (CONCORDANCE_ONLY) { - Collection compVCs = tracker.getVariantContexts(concordanceRodName, context.getLocation(), true, false); + Collection compVCs = tracker.getValues(VariantContext.class, concordanceRodName, context.getLocation()); if (!isConcordant(vc, compVCs)) return 0; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 3bf8bd9e9..6655d26dc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -86,7 +86,7 @@ public class ValidateVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getVariantContexts("variant", context.getLocation(), true, false); + Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); for ( VariantContext vc : VCs ) validate(vc, tracker, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index e8a012e99..245ed9edd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -93,7 +93,7 @@ public class VariantValidationAssessor extends RodWalker { return 0; if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { - Collection vcs = tracker.getAllVariantContexts(context.getLocation()); + Collection vcs = tracker.getValues(VariantContext.class, context.getLocation()); for ( VariantContext vc : vcs) { if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 170daf6cc..39822d7a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -162,7 +162,7 @@ public class VariantsToVCF extends RodWalker { } // for everything else, we can just convert to VariantContext - return tracker.getVariantContexts(INPUT_ROD_NAME, ref.getLocus(), true, false); + return tracker.getValues(VariantContext.class, INPUT_ROD_NAME, ref.getLocus()); } private DbSNPFeature getDbsnpFeature(String rsID) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 129161da3..0992dbe31 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.HashMap; @@ -15,75 +16,106 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { static HashMap paramsFilesNoReadGroupTest = new HashMap(); static HashMap paramsFilesSolidIndels = new HashMap(); - @Test - public void testCountCovariates1() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" ); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9c006f8e9fb5752b1c139f5a8cc7ea88"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e6b98af01c5a08e4954b79ec42db6fc3" ); + private static final class CCTest extends TestDataProvider { + String file, md5; - for ( String parallelism : Arrays.asList("", " -nt 4")) { - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + - " -T CountCovariates" + - " -I " + bam + - ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) - ? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) + - " -cov ReadGroupCovariate" + - " -cov QualityScoreCovariate" + - " -cov CycleCovariate" + - " -cov DinucCovariate" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s" + parallelism, - 1, // just one output file - Arrays.asList(md5)); - List result = executeTest("testCountCovariates1" + parallelism, spec).getFirst(); - paramsFiles.put(bam, result.get(0).getAbsolutePath()); - } + private CCTest(final String file, final String md5) { + super(CCTest.class); + this.file = file; + this.md5 = md5; } } - - @Test - public void testTableRecalibrator1() { - HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "6797d7ffa4ef6c48413719ba32696ccf"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1f9d8944b73169b367cb83b0d22e5432" ); - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - String paramsFile = paramsFiles.get(bam); - System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); - if ( paramsFile != null ) { - WalkerTestSpec spec = new WalkerTestSpec( - "-R " + b36KGReference + - " -T TableRecalibration" + - " -I " + bam + - ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) + @DataProvider(name = "cctestdata") + public Object[][] createCCTestData() { + + new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "" ); + new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", ""); + new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "" ); + new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "" ); + return CCTest.getTests(CCTest.class); + } + + @Test(dataProvider = "cctestdata") + public void testCountCovariates1(CCTest test) { + testCC(test, ""); + } + + @Test(dataProvider = "cctestdata") + public void testCountCovariates4(CCTest test) { + testCC(test, " -nt 4"); + } + + private final void testCC(CCTest test, String parallelism) { + String bam = test.file; + String md5 = test.md5; + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf" + + " -T CountCovariates" + + " -I " + bam + + ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) + ? " -L 1:10,800,000-10,810,000" : " -L 1:10,000,000-10,200,000" ) + + " -cov ReadGroupCovariate" + + " -cov QualityScoreCovariate" + + " -cov CycleCovariate" + + " -cov DinucCovariate" + + " --solid_recal_mode SET_Q_ZERO" + + " -recalFile %s" + parallelism, + 1, // just one output file + Arrays.asList(md5)); + List result = executeTest("testCountCovariates1" + parallelism, spec).getFirst(); + paramsFiles.put(bam, result.get(0).getAbsolutePath()); + } + + + private static final class TRTest extends TestDataProvider { + String file, md5; + + private TRTest(final String file, final String md5) { + super(TRTest.class); + this.file = file; + this.md5 = md5; + } + } + + @DataProvider(name = "trtestdata") + public Object[][] createTRTestData() { + new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "" ); + new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", ""); + new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "" ); + new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "" ); + return TRTest.getTests(TRTest.class); + } + + @Test(dataProvider = "trtestdata", dependsOnMethods = "testCountCovariates1") + public void testTableRecalibrator1(TRTest test) { + String bam = test.file; + String md5 = test.md5; + String paramsFile = paramsFiles.get(bam); + System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile); + if ( paramsFile != null ) { + WalkerTestSpec spec = new WalkerTestSpec( + "-R " + b36KGReference + + " -T TableRecalibration" + + " -I " + bam + + ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) ? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) + - " -o %s" + - " --no_pg_tag" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile " + paramsFile, - 1, // just one output file - Arrays.asList(md5)); - executeTest("testTableRecalibrator1", spec); - } + " -o %s" + + " --no_pg_tag" + + " --solid_recal_mode SET_Q_ZERO" + + " -recalFile " + paramsFile, + 1, // just one output file + Arrays.asList(md5)); + executeTest("testTableRecalibrator1", spec); } } @Test public void testCountCovariatesUseOriginalQuals() { HashMap e = new HashMap(); - e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "3404965ec4fa99873fe6a44521944fd5"); + e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", ""); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -97,7 +129,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -standard" + " -OQ" + " -recalFile %s" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod", + " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf", 1, // just one output file Arrays.asList(md5)); executeTest("testCountCovariatesUseOriginalQuals", spec); @@ -107,7 +139,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -136,7 +168,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c9ea5f995e1e2b7a5688533e678dcedc" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -144,7 +176,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf" + " -T CountCovariates" + " -I " + bam + " -standard" + @@ -162,7 +194,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "993fae4270e7e1e15986f270acf247af" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -190,7 +222,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -214,7 +246,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesBED() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b460478d9683e827784e42bc352db8bb"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -238,7 +270,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCFPlusDBsnp() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d892bd60d8f679affda3c1e3af96c1"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -249,7 +281,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf" + " -L 1:10,000,000-10,200,000" + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + @@ -266,7 +298,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "284ccac1f8fe485e52c86333cac7c2d4" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -274,7 +306,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf" + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" + @@ -292,7 +324,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "c167799c2d9cab815d7c9b23337f162e" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); From 184030dd562059fcf9c390336c76a3badc552463 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 1 Aug 2011 15:21:16 -0400 Subject: [PATCH 065/186] RefMetaDataTracker no longer automagically converts inputs to VariantContexts This was no longer working properly given that DBSNP indels needed to be moved around. The adaptor system is being refactored and you will need to convert files from X -> VCF for many tools to work. --- .../gatk/refdata/RefMetaDataTracker.java | 49 +++++-------------- 1 file changed, 12 insertions(+), 37 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 1436465ad..7ee560d1d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -54,37 +54,12 @@ public class RefMetaDataTracker { else { map = new HashMap(allBindings.size()); for ( RODRecordList rod : allBindings ) { - //logger.debug(String.format("Binding %s to %s", name, rod)); if ( rod != null ) - map.put(canonicalName(rod.getName()), maybeConvertToVariantContext(rod)); + map.put(canonicalName(rod.getName()), rod); } } } - /** - * A private converter that transforms a RODRecordList of objects of type X into - * a list of VariantContexts, if possible. - * - * TODO: should be removed when Features like dbsnp and hapmap produce VCs directly - * - * @param bindings - * @return - */ - private final RODRecordList maybeConvertToVariantContext(RODRecordList bindings) { - List values = new ArrayList(bindings.size()); - - for ( GATKFeature rec : bindings ) { - if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) { - final VariantContext vc = VariantContextAdaptors.toVariantContext(bindings.getName(), rec.getUnderlyingObject(), ref); - if ( vc != null ) // it's possible that the conversion failed, but we continue along anyway - values.add(new GATKFeature.TribbleGATKFeature(ref.getGenomeLocParser(), vc, rec.getName())); - } else - values.add(rec); - } - - return new RODRecordListImpl(bindings.getName(), values, bindings.getLocation()); - } - // ------------------------------------------------------------------------------------------ // // @@ -294,11 +269,11 @@ public class RefMetaDataTracker { // ------------------------------------------------------------------------------------------ private List addValues(final Collection names, - final Class type, - final List values, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { + final Class type, + final List values, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { for ( String name : names ) { RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly ); @@ -308,12 +283,12 @@ public class RefMetaDataTracker { } private List addValues(final String name, - final Class type, - final List values, - final RODRecordList rodList, - final GenomeLoc curLocation, - final boolean requireStartHere, - final boolean takeFirstOnly ) { + final Class type, + final List values, + final RODRecordList rodList, + final GenomeLoc curLocation, + final boolean requireStartHere, + final boolean takeFirstOnly ) { for ( GATKFeature rec : rodList ) { if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing Object obj = rec.getUnderlyingObject(); From 5626199bb69f7d9788b5112922d88d845dda1721 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 2 Aug 2011 10:14:21 -0400 Subject: [PATCH 067/186] The Unified Genotyper now does NOT emit SLOD/SB by default; to compute SB use --computeSLOD --- .../genotyper/UnifiedArgumentCollection.java | 6 +- .../walkers/genotyper/UnifiedGenotyper.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 12 ++-- .../UnifiedGenotyperIntegrationTest.java | 61 ++++++++----------- 4 files changed, 34 insertions(+), 47 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 2b25df4aa..52bf3f715 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -58,8 +58,8 @@ public class UnifiedArgumentCollection { @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false) public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0; - @Argument(fullName = "noSLOD", shortName = "nsl", doc = "If provided, we will not calculate the SLOD", required = false) - public boolean NO_SLOD = false; + @Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false) + public boolean COMPUTE_SLOD = false; // control the error modes @@ -154,7 +154,7 @@ public class UnifiedArgumentCollection { uac.PCR_error = PCR_error; uac.GenotypingMode = GenotypingMode; uac.OutputMode = OutputMode; - uac.NO_SLOD = NO_SLOD; + uac.COMPUTE_SLOD = COMPUTE_SLOD; uac.ASSUME_SINGLE_SAMPLE = ASSUME_SINGLE_SAMPLE; uac.STANDARD_CONFIDENCE_FOR_CALLING = STANDARD_CONFIDENCE_FOR_CALLING; uac.STANDARD_CONFIDENCE_FOR_EMITTING = STANDARD_CONFIDENCE_FOR_EMITTING; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 2a0338bca..c673f7b3b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -144,7 +144,7 @@ public class UnifiedGenotyper extends LocusWalker e = new HashMap(); - e.put( "--min_base_quality_score 26", "5043c9a101e691602eb7a3f9704bdf20" ); - e.put( "--min_mapping_quality_score 26", "71a833eb8fd93ee62ae0d5a430f27940" ); - e.put( "--p_nonref_model GRID_SEARCH", "ddf443e9dcadef367476b26b4d52c134" ); + e.put( "--min_base_quality_score 26", "6d3aa9f783ca63f37c952f83eeda593c" ); + e.put( "--min_mapping_quality_score 26", "51bfdf777123bf49de5d92ffde5c74e7" ); + e.put( "--p_nonref_model GRID_SEARCH", "333328ab2c8da2875fade599e80a271f" ); + e.put( "--computeSLOD", "226caa28a4fa9fe34f3beb8a23f3d53d" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -153,9 +154,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameter() { HashMap e = new HashMap(); - e.put( "-sites_only", "eaad6ceb71ab94290650a70bea5ab951" ); - e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "05bf7db8a3d19ef4a3d14772c90b732f" ); - e.put( "--output_mode EMIT_ALL_SITES", "e4b86740468d7369f0156550855586c7" ); + e.put( "-sites_only", "5f659dee408710d3709ed72005cd863a" ); + e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "55d09bf13149bddc06cc36be0801507b" ); + e.put( "--output_mode EMIT_ALL_SITES", "727f49dcb2439b18446829efc3b1561c" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -169,12 +170,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("71a833eb8fd93ee62ae0d5a430f27940")); + Arrays.asList("51bfdf777123bf49de5d92ffde5c74e7")); executeTest("test confidence 1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("79968844dc3ddecb97748c1acf2984c7")); + Arrays.asList("c67c285e70fd4457c9f9ce7bd878ddca")); executeTest("test confidence 2", spec2); } @@ -186,8 +187,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "4e878664f61d2d800146d3762303fde1" ); - e.put( 1.0 / 1850, "9204caec095ff5e63ca21a10b6fab453" ); + e.put( 0.01, "7ecc564d4db97d5932cef2e558550ed2" ); + e.put( 1.0 / 1850, "aa9e101bb9f9e111fe292fec467d915a" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -211,7 +212,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("1a58ec52df545f946f80cc16c5736a91")); + Arrays.asList("2efd686186b2c5129be4cf89274a24dd")); executeTest(String.format("test multiple technologies"), spec); } @@ -230,25 +231,11 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("62d0f6d9de344ce68ce121c13b1e78b1")); + Arrays.asList("2892d35331fe9fc141ba19269ec7caed")); executeTest(String.format("test calling with BAQ"), spec); } - @Test - public void testCallingWithBAQOff() { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + - " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" + - " -o %s" + - " -L 1:10,000,000-10,100,000" + - " -baq OFF", - 1, - Arrays.asList("1a58ec52df545f946f80cc16c5736a91")); - - executeTest(String.format("test calling with BAQ OFF"), spec); - } - // -------------------------------------------------------------------------------------------------------------- // // testing indel caller @@ -263,7 +250,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("631ae1f1eb6bc4c1a4136b8495250536")); + Arrays.asList("8c2afb4289ed44521933d1a74c8d6c7f")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -278,7 +265,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("fd556585c79e2b892a5976668f45aa43")); + Arrays.asList("b6fb70590a10e1c27fb611732916f27d")); executeTest(String.format("test indel caller in SLX witn low min allele count"), spec); } @@ -291,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("9cd56feedd2787919e571383889fde70")); + Arrays.asList("61642502bd08cc03cdaaeb83a5426b46")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -301,14 +288,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("315e1b78d7a403d7fcbcf0caa8c496b8")); + Arrays.asList("69b0b3f089c80b9864294d838a061336")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("cf89e0c54f14482a23c105b73a333d8a")); + Arrays.asList("c90174cfd7dd68bdef36fe2c60145e10")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } From 2c5e526eb7d9a4bf30d9fd0a715874e1517736e0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 2 Aug 2011 10:34:46 -0400 Subject: [PATCH 068/186] Don't use the mismatch fraction by default in the RealignerTargetCreator (since it's only useful when using SW in the indel realigner). Also, no more use of -D but instead move over to using VCFs. One integration test is temporarily commented out while I wait for a VCF file to get fixed. --- .../sting/gatk/walkers/indels/RealignerTargetCreator.java | 2 +- .../indels/RealignerTargetCreatorIntegrationTest.java | 6 +++--- .../indels/RealignerTargetCreatorPerformanceTest.java | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 488e37f26..6453ce8de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -64,7 +64,7 @@ public class RealignerTargetCreator extends RodWalker Date: Tue, 2 Aug 2011 10:39:50 -0400 Subject: [PATCH 069/186] No more use of -D in the integration tests but instead stick with VCFs only. Since all of these tests were duplicated (one each for dbSNP format and for VCF), we don't actually lose coverage in the integration tests. --- .../indels/IndelRealignerIntegrationTest.java | 19 ------------------- .../indels/IndelRealignerPerformanceTest.java | 4 ++-- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 2676f7067..19dc99682 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -32,13 +32,6 @@ public class IndelRealignerIntegrationTest extends WalkerTest { 1, Arrays.asList(base_md5_with_SW_or_VCF)); executeTest("test realigner defaults with VCF", spec2); - - WalkerTestSpec spec3 = new WalkerTestSpec( - baseCommand + "-D " + GATKDataLocation + "dbsnp_129_b36.rod", - 1, - Arrays.asList(base_md5)); - executeTest("realigner defaults with dbsnp", spec3); - } @Test @@ -48,12 +41,6 @@ public class IndelRealignerIntegrationTest extends WalkerTest { 1, Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888")); executeTest("realigner known indels only from VCF", spec1); - - WalkerTestSpec spec2 = new WalkerTestSpec( - baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod", - 1, - Arrays.asList("05a114623c126b0398fbc1703437461e")); - executeTest("realigner known indels only from dbsnp", spec2); } @Test @@ -63,12 +50,6 @@ public class IndelRealignerIntegrationTest extends WalkerTest { 1, Arrays.asList(base_md5_with_SW_or_VCF)); executeTest("realigner use SW from VCF", spec1); - - WalkerTestSpec spec2 = new WalkerTestSpec( - baseCommand + "--consensusDeterminationModel USE_SW -D " + GATKDataLocation + "dbsnp_129_b36.rod", - 1, - Arrays.asList(base_md5_with_SW_or_VCF)); - executeTest("realigner use SW from dbsnp", spec2); } @Test diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java index fd5ad0b22..e8b5033cf 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java @@ -30,7 +30,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest { " -LOD 5" + " -maxConsensuses 100" + " -greedy 100" + - " -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -o /dev/null" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-5,650,000" + @@ -45,7 +45,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest { " -LOD 5" + " -maxConsensuses 100" + " -greedy 100" + - " -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -o /dev/null" + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + " -L chr1:1-150,000,000" + From b9d0d2af223ea5537fac2d5af6d4ecf79702487a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 2 Aug 2011 12:39:11 -0400 Subject: [PATCH 071/186] Adding back temporarily removed integration test now that the file permissions have been fixed. --- .../walkers/indels/RealignerTargetCreatorIntegrationTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java index cc67c354a..f5ed69476 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -19,8 +19,8 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( "-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", 1, - Arrays.asList("f23ba17ee0f9573dd307708175d90cd2")); - //executeTest("test dbsnp", spec2); + Arrays.asList("0367d39a122c8ac0899fb868a82ef728")); + executeTest("test dbsnp", spec2); WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( "-T RealignerTargetCreator -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI indels -o %s", From 65c5d55b724bffd1d895e9cadd7d4acfa1d276dd Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 2 Aug 2011 12:48:36 -0400 Subject: [PATCH 072/186] Not sure how I missed these. These lines are now superfluous. --- .../sting/gatk/walkers/genotyper/UGCallVariants.java | 1 - 1 file changed, 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index 68d8f9b54..a3b9f379e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -62,7 +62,6 @@ public class UGCallVariants extends RodWalker { private Set trackNames = new HashSet(); public void initialize() { - UAC.NO_SLOD = true; for ( ReferenceOrderedDataSource d : getToolkit().getRodDataSources() ) { if ( d.getName().startsWith("variant") ) From 821bbfa9e0eb5a9d975277bfb011c6afcec6a673 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 2 Aug 2011 13:17:20 -0400 Subject: [PATCH 073/186] Bug fixes and enhancements to run whole-genome indel VQSR, removed old chr20-only code and cleanup --- .../gatk/walkers/variantrecalibration/VariantDataManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 67d54a408..7426a7726 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -258,7 +258,7 @@ public class VariantDataManager { datum.consensusCount = 0; for( final TrainingSet trainingSet : trainingSets ) { - for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), false, false ) ) { + for( final VariantContext trainVC : tracker.getVariantContexts( ref, trainingSet.name, null, context.getLocation(), true, false ) ) { if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) { From 38e4ae4176a9a8ce5b6c359b01ca63c08b21c236 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 2 Aug 2011 13:30:38 -0400 Subject: [PATCH 074/186] minor update to comment in UG --- .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index a10897172..61892a8c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -428,7 +428,7 @@ public class UnifiedGenotyperEngine { myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes); if ( annotationEngine != null ) { - // first off, we want to use the *unfiltered* and *unBAQed* context for the annotations + // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations ReadBackedPileup pileup = null; if (rawContext.hasExtendedEventPileup()) pileup = rawContext.getExtendedEventPileup(); From c0653514b3114324947a20e0b86ed7947c025e91 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 2 Aug 2011 13:34:48 -0400 Subject: [PATCH 075/186] minor update to comment in UG --- .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index d1096e25e..99666bba6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -247,7 +247,7 @@ public class UnifiedGenotyperEngine { } if ( annotationEngine != null ) { - // we want to use the *unfiltered* and *unBAQed* context for the annotations + // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations ReadBackedPileup pileup = null; if (rawContext.hasExtendedEventPileup()) pileup = rawContext.getExtendedEventPileup(); From a366f9a18d4790879e4996de30dd60f84de17e3d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 2 Aug 2011 14:05:51 -0400 Subject: [PATCH 076/186] Updating tools to use the RodBinding syntax --- build.xml | 2 +- .../sting/gatk/CommandLineExecutable.java | 2 +- .../arguments/GATKArgumentCollection.java | 8 --- .../gatk/refdata/RefMetaDataTracker.java | 61 ++++++++++++------- .../sting/gatk/walkers/PileupWalker.java | 18 +++--- .../beagle/BeagleOutputToVCFWalker.java | 49 ++++++++------- .../beagle/ProduceBeagleInputWalker.java | 24 ++++---- .../VariantsToBeagleUnphasedWalker.java | 11 ++-- .../GCContentByIntervalWalker.java | 3 +- .../fasta/FastaAlternateReferenceWalker.java | 24 ++++---- .../filters/VariantFiltrationWalker.java | 12 ++-- .../evaluators/VariantEvaluator.java | 19 +++--- .../varianteval/util/VariantEvalUtils.java | 13 +--- .../walkers/variantutils/CombineVariants.java | 1 - .../variantutils/FilterLiftedVariants.java | 14 +++-- .../variantutils/LeftAlignVariants.java | 14 +++-- .../variantutils/LiftoverVariants.java | 16 +++-- .../variantutils/RandomlySplitVariants.java | 18 +++--- .../walkers/variantutils/SelectVariants.java | 16 ++--- .../variantutils/ValidateVariants.java | 17 +++--- .../VariantValidationAssessor.java | 13 ++-- .../walkers/variantutils/VariantsToTable.java | 8 ++- .../walkers/variantutils/VariantsToVCF.java | 21 ++++--- .../sting/utils/text/ListFileUtils.java | 14 +---- .../org/broadinstitute/sting/BaseTest.java | 4 +- .../GATKArgumentCollectionUnitTest.java | 1 - .../VariantAnnotatorIntegrationTest.java | 2 +- ...astaAlternateReferenceIntegrationTest.java | 11 ++-- .../UnifiedGenotyperPerformanceTest.java | 6 +- .../indels/IndelRealignerIntegrationTest.java | 6 +- .../indels/IndelRealignerPerformanceTest.java | 2 +- .../RecalibrationWalkersIntegrationTest.java | 10 +-- .../RecalibrationWalkersPerformanceTest.java | 4 +- .../VariantEvalIntegrationTest.java | 24 ++++---- .../VariantContextIntegrationTest.java | 33 ++++------ 35 files changed, 245 insertions(+), 256 deletions(-) rename public/java/src/org/broadinstitute/sting/gatk/walkers/{ => coverage}/GCContentByIntervalWalker.java (96%) diff --git a/build.xml b/build.xml index 438e9c90c..1e4badc2c 100644 --- a/build.xml +++ b/build.xml @@ -168,7 +168,7 @@ - + diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index ebdafc703..efdc64066 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -114,7 +114,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram { logger.warn("################################################################################"); } - Collection oldStyle = ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings, getArgumentCollection().DBSNPFile, parser); + Collection oldStyle = ListFileUtils.unpackRODBindingsOldStyle(getArgumentCollection().RODBindings, parser); oldStyle.addAll(newStyle); engine.setReferenceMetaDataFiles(oldStyle); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index ee2e85025..62135f21b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -117,11 +117,6 @@ public class GATKArgumentCollection { @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false) public boolean nonDeterministicRandomSeed = false; - - @Element(required = false) - @Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false) - public String DBSNPFile = null; - /** * The override mechanism in the GATK, by default, populates the command-line arguments, then * the defaults from the walker annotations. Unfortunately, walker annotations should be trumped @@ -380,9 +375,6 @@ public class GATKArgumentCollection { if (!other.excludeIntervals.equals(this.excludeIntervals)) { return false; } - if (!other.DBSNPFile.equals(this.DBSNPFile)) { - return false; - } if (!other.unsafe.equals(this.unsafe)) { return false; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 7ee560d1d..297c163ab 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -68,66 +68,81 @@ public class RefMetaDataTracker { // // ------------------------------------------------------------------------------------------ - public List getValues(Class type) { + public List getValues(final Class type) { return addValues(map.keySet(), type, new ArrayList(), null, false, false); } - public List getValues(Class type, final GenomeLoc onlyAtThisLoc) { + public List getValues(final Class type, final GenomeLoc onlyAtThisLoc) { return addValues(map.keySet(), type, new ArrayList(), onlyAtThisLoc, true, false); } - public List getValues(Class type, final String name) { + public List getValues(final Class type, final String name) { return addValues(name, type, new ArrayList(), getTrackDataByName(name), null, false, false); } - public List getValues(Class type, final String name, final GenomeLoc onlyAtThisLoc) { + public List getValues(final Class type, final String name, final GenomeLoc onlyAtThisLoc) { return addValues(name, type, new ArrayList(), getTrackDataByName(name), onlyAtThisLoc, true, false); } - public List getValues(Class type, final Collection names) { + public List getValues(final Class type, final Collection names) { return addValues(names, type, new ArrayList(), null, false, false); } - public List getValues(Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { + public List getValues(final Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { return addValues(names, type, new ArrayList(), onlyAtThisLoc, true, false); } - public T getFirstValue(Class type) { + public T getFirstValue(final Class type) { return safeGetFirst(getValues(type)); } - public T getFirstValue(Class type, final GenomeLoc onlyAtThisLoc) { + public T getFirstValue(final Class type, final GenomeLoc onlyAtThisLoc) { return safeGetFirst(getValues(type, onlyAtThisLoc)); } - public T getFirstValue(Class type, final String name) { + public T getFirstValue(final Class type, final String name) { return safeGetFirst(getValues(type, name)); } - public T getFirstValue(Class type, final String name, final GenomeLoc onlyAtThisLoc) { + public T getFirstValue(final Class type, final String name, final GenomeLoc onlyAtThisLoc) { return safeGetFirst(getValues(type, name, onlyAtThisLoc)); } - public T getFirstValue(Class type, final Collection names) { + public T getFirstValue(final Class type, final Collection names) { return safeGetFirst(getValues(type, names)); } - public T getFirstValue(Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { + public T getFirstValue(final Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { return safeGetFirst(getValues(type, names, onlyAtThisLoc)); } // // ROD binding accessors // - public List getValues(RodBinding rodBinding) { + public List getValues(final RodBinding rodBinding) { return getValues(rodBinding.getType(), rodBinding.getVariableName()); } - public List getValues(RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { + + public List getValues(final Collection> rodBindings) { + List results = new ArrayList(); + for ( RodBinding rodBinding : rodBindings ) + results.addAll(getValues(rodBinding)); + return results; + } + + public List getValues(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { return getValues(rodBinding.getType(), rodBinding.getVariableName(), onlyAtThisLoc); } - public T getFirstValue(RodBinding rodBinding) { + public List getValues(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) { + List results = new ArrayList(); + for ( RodBinding rodBinding : rodBindings ) + results.addAll(getValues(rodBinding, onlyAtThisLoc)); + return results; + } + + public T getFirstValue(final RodBinding rodBinding) { return getFirstValue(rodBinding.getType(), rodBinding.getVariableName()); } - public T getFirstValue(RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { + public T getFirstValue(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { return getFirstValue(rodBinding.getType(), rodBinding.getVariableName(), onlyAtThisLoc); } - public boolean hasValues(RodBinding rodBinding) { + public boolean hasValues(final RodBinding rodBinding) { return hasValues(rodBinding.getVariableName()); } - public List getValuesAsGATKFeatures(RodBinding rodBinding) { + public List getValuesAsGATKFeatures(final RodBinding rodBinding) { return getValuesAsGATKFeatures(rodBinding.getVariableName()); } @@ -142,7 +157,7 @@ public class RefMetaDataTracker { * @param * @return */ - final private T safeGetFirst(List l) { + final private T safeGetFirst(final List l) { // todo: should we be warning people here? Throwing an error? return l.isEmpty() ? null : l.get(0); } @@ -277,10 +292,12 @@ public class RefMetaDataTracker { for ( String name : names ) { RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly ); - } + } + + return values; + } + - return values; - } private List addValues(final String name, final Class type, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index 1484841b3..fd9bf5734 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -25,9 +25,11 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -68,6 +70,9 @@ public class PileupWalker extends LocusWalker implements TreeR @Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events") public boolean SHOW_INDEL_PILEUPS = false; + @Argument(fullName="rodBind",shortName="-B",doc="Add these ROD bindings to the output Pileup", required=false) + public List> rods; + public void initialize() { } @@ -112,18 +117,11 @@ public class PileupWalker extends LocusWalker implements TreeR */ private String getReferenceOrderedData( RefMetaDataTracker tracker ) { ArrayList rodStrings = new ArrayList(); - for ( GATKFeature datum : tracker.getAllValuesAsGATKFeatures() ) { - if ( datum != null && datum.getUnderlyingObject() instanceof ReferenceOrderedDatum ) { - rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it - } + for ( Feature datum : tracker.getValues(rods) ) { + rodStrings.add(datum.toString()); } String rodString = Utils.join(", ", rodStrings); - DbSNPFeature dbsnp = tracker.getFirstValue(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class); - - if ( dbsnp != null) - rodString += DbSNPHelper.toMediumString(dbsnp); - if ( !rodString.equals("") ) rodString = "[ROD: " + rodString + "]"; @@ -132,8 +130,6 @@ public class PileupWalker extends LocusWalker implements TreeR @Override public void onTraversalDone(Integer result) { - // Double check traversal result to make count is the same. - // TODO: Is this check necessary? out.println("[REDUCE RESULT] Traversal result is: " + result); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 97a4b6a8f..e4773bf4d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.beagle; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -51,15 +53,22 @@ import static java.lang.Math.log10; /** * Takes files produced by Beagle imputation engine and creates a vcf with modified annotations. */ -@Requires(value={},referenceMetaData=@RMD(name=BeagleOutputToVCFWalker.INPUT_ROD_NAME, type=VariantContext.class)) - +@Requires(value={}) public class BeagleOutputToVCFWalker extends RodWalker { + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; - public static final String INPUT_ROD_NAME = "variant"; - public static final String COMP_ROD_NAME = "comp"; - public static final String R2_ROD_NAME = "beagleR2"; - public static final String PROBS_ROD_NAME = "beagleProbs"; - public static final String PHASED_ROD_NAME = "beaglePhased"; + @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) + public RodBinding comp; + + @Input(fullName="beagleR2", shortName = "beagleR2", doc="VCF file", required=true) + public RodBinding beagleR2; + + @Input(fullName="beagleProbs", shortName = "beagleProbs", doc="VCF file", required=true) + public RodBinding beagleProbs; + + @Input(fullName="beaglePhased", shortName = "beaglePhased", doc="VCF file", required=true) + public RodBinding beaglePhased; @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; @@ -98,7 +107,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { final List dataSources = this.getToolkit().getRodDataSources(); for( final ReferenceOrderedDataSource source : dataSources ) { - if (source.getName().equals(COMP_ROD_NAME)) { + if (source.getName().equals(comp.getVariableName())) { hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); @@ -107,7 +116,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { } - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(INPUT_ROD_NAME)); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); @@ -119,9 +128,9 @@ public class BeagleOutputToVCFWalker extends RodWalker { return 0; GenomeLoc loc = context.getLocation(); - VariantContext vc_input = tracker.getFirstValue(VariantContext.class, INPUT_ROD_NAME, loc); + VariantContext vc_input = tracker.getFirstValue(variants, loc); - VariantContext vc_comp = tracker.getFirstValue(VariantContext.class, COMP_ROD_NAME, loc); + VariantContext vc_comp = tracker.getFirstValue(comp, loc); if ( vc_input == null ) return 0; @@ -130,30 +139,24 @@ public class BeagleOutputToVCFWalker extends RodWalker { vcfWriter.add(vc_input, ref.getBase()); return 1; } - List r2rods = tracker.getValues(R2_ROD_NAME); + BeagleFeature beagleR2Feature = tracker.getFirstValue(beagleR2); // ignore places where we don't have a variant - if ( r2rods.size() == 0 ) + if ( beagleR2Feature == null ) return 0; - BeagleFeature beagleR2Feature = (BeagleFeature)r2rods.get(0); - List gProbsrods = tracker.getValues(PROBS_ROD_NAME); + BeagleFeature beagleProbsFeature = tracker.getFirstValue(beagleProbs); // ignore places where we don't have a variant - if ( gProbsrods.size() == 0 ) + if ( beagleProbsFeature == null ) return 0; - BeagleFeature beagleProbsFeature = (BeagleFeature)gProbsrods.get(0); - - List gPhasedrods = tracker.getValues(PHASED_ROD_NAME); - + BeagleFeature beaglePhasedFeature = tracker.getFirstValue(beaglePhased); // ignore places where we don't have a variant - if ( gPhasedrods.size() == 0 ) + if ( beaglePhasedFeature == null ) return 0; - BeagleFeature beaglePhasedFeature = (BeagleFeature)gPhasedrods.get(0); - // get reference base for current position byte refByte = ref.getBase(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 354b032f2..291ef7201 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -25,10 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -54,10 +51,13 @@ import java.util.*; /** * Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input variant file */ -@Requires(value={},referenceMetaData=@RMD(name=ProduceBeagleInputWalker.ROD_NAME, type=VariantContext.class)) +@Requires(value={}) public class ProduceBeagleInputWalker extends RodWalker { - public static final String ROD_NAME = "variant"; - public static final String VALIDATION_ROD_NAME = "validation"; + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; + + @Input(fullName="validation", shortName = "validation", doc="Input VCF file", required=false) + public RodBinding validation; @Output(doc="File to which BEAGLE input should be written",required=true) protected PrintStream beagleWriter = null; @@ -99,7 +99,7 @@ public class ProduceBeagleInputWalker extends RodWalker { public void initialize() { - samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(ROD_NAME)); + samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); beagleWriter.print("marker alleleA alleleB"); for ( String sample : samples ) @@ -121,8 +121,8 @@ public class ProduceBeagleInputWalker extends RodWalker { public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext variant_eval = tracker.getFirstValue(VariantContext.class, ROD_NAME, loc); - VariantContext validation_eval = tracker.getFirstValue(VariantContext.class, VALIDATION_ROD_NAME, loc); + VariantContext variant_eval = tracker.getFirstValue(variants, loc); + VariantContext validation_eval = tracker.getFirstValue(validation, loc); if ( goodSite(variant_eval,validation_eval) ) { if ( useValidation(validation_eval, ref) ) { @@ -303,9 +303,7 @@ public class ProduceBeagleInputWalker extends RodWalker { } private void initializeVcfWriter() { - - final ArrayList inputNames = new ArrayList(); - inputNames.add( VALIDATION_ROD_NAME ); + final List inputNames = Arrays.asList(validation.getVariableName()); // setup the header fields Set hInfo = new HashSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index b95f3097d..3c221087a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.beagle; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -54,9 +56,10 @@ import java.util.Set; * in input variant file. Will additional hold back a fraction of the sites for evaluation, marking the * genotypes at that sites as missing, and writing the truth of these sites to a second VCF file */ -@Requires(value={},referenceMetaData=@RMD(name= VariantsToBeagleUnphasedWalker.ROD_NAME, type=VariantContext.class)) +@Requires(value={}) public class VariantsToBeagleUnphasedWalker extends RodWalker { - public static final String ROD_NAME = "variant"; + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Output(doc="File to which BEAGLE unphased genotypes should be written",required=true) protected PrintStream beagleWriter = null; @@ -75,7 +78,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker private int testSetSize = 0; public void initialize() { - samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(ROD_NAME)); + samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); beagleWriter.print("I marker alleleA alleleB"); for ( String sample : samples ) @@ -102,7 +105,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext vc = tracker.getFirstValue(VariantContext.class, ROD_NAME, loc); + VariantContext vc = tracker.getFirstValue(variants, loc); if ( ProduceBeagleInputWalker.canBeOutputToBeagle(vc) ) { // do we want to hold back this site? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java index 68bea4dba..a4944e939 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/GCContentByIntervalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByIntervalWalker.java @@ -22,12 +22,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.walkers; +package org.broadinstitute.sting.gatk.walkers.coverage; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 2865c2380..fe58fb038 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.fasta; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -45,6 +47,8 @@ import java.util.Collection; @Reference(window=@Window(start=-1,stop=50)) @Requires(value={DataSource.REFERENCE}) public class FastaAlternateReferenceWalker extends FastaReferenceWalker { + @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=true) + public RodBinding snpmask; private int deletionBasesRemaining = 0; @@ -57,20 +61,18 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { String refBase = String.valueOf((char)ref.getBase()); - Collection vcs = tracker.getValues(VariantContext.class); + Collection vcs = tracker.getValues(snpmask); // Check to see if we have a called snp for ( VariantContext vc : vcs ) { - if ( !vc.getSource().startsWith("snpmask") ) { - if ( vc.isDeletion()) { - deletionBasesRemaining = vc.getReference().length(); - // delete the next n bases, not this one - return new Pair(context.getLocation(), refBase); - } else if ( vc.isInsertion()) { - return new Pair(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); - } else if (vc.isSNP()) { - return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); - } + if ( vc.isDeletion()) { + deletionBasesRemaining = vc.getReference().length(); + // delete the next n bases, not this one + return new Pair(context.getLocation(), refBase); + } else if ( vc.isInsertion()) { + return new Pair(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); + } else if (vc.isSNP()) { + return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 9e89944a5..22c45df19 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -46,9 +48,11 @@ import java.util.*; /** * Filters variant calls using a number of user-selectable, parameterizable criteria. */ -@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class)) +@Requires(value={}) @Reference(window=@Window(start=-50,stop=50)) public class VariantFiltrationWalker extends RodWalker { + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Output(doc="File to which variants should be written", required=true) protected VCFWriter writer = null; @@ -80,7 +84,6 @@ public class VariantFiltrationWalker extends RodWalker { List filterExps; List genotypeFilterExps; - public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant"; public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster"; private ClusteredSnps clusteredSNPs = null; private GenomeLoc previousMaskPosition = null; @@ -92,8 +95,7 @@ public class VariantFiltrationWalker extends RodWalker { private void initializeVcfWriter() { - final ArrayList inputNames = new ArrayList(); - inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME ); + final List inputNames = Arrays.asList(variants.getVariableName()); // setup the header fields Set hInfo = new HashSet(); @@ -149,7 +151,7 @@ public class VariantFiltrationWalker extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(VariantContext.class, INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation()); + Collection VCs = tracker.getValues(variants, context.getLocation()); // is there a SNP mask present? boolean hasMask = tracker.getValues("mask").size() > 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index e29e7ed50..83a1c2f3b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -8,6 +8,8 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.NewEvaluationConte import org.broadinstitute.sting.gatk.walkers.varianteval.util.StateKey; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.util.Collection; + public abstract class VariantEvaluator { public void initialize(VariantEvalWalker walker) {} @@ -17,25 +19,18 @@ public abstract class VariantEvaluator { public abstract int getComparisonOrder(); // called at all sites, regardless of eval context itself; useful for counting processed bases - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + } + + public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return null; } - public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) { - return update1(vc1, tracker, ref, context); - } - - - public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return null; } - public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) { - return update2(vc1, vc2, tracker, ref, context); - } - public void finalizeEvaluation() {} protected double rate(long n, long d) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 79cd89ca3..61a959c99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -330,9 +330,7 @@ public class VariantEvalUtils { * to do this) * @return a mapping of track names to a list of VariantContext objects */ - public HashMap> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, EnumSet allowableTypes, boolean byFilter, boolean subsetBySample, boolean trackPerSample) { - HashMap> bindings = new HashMap>(); - + protected void bindVariantContexts(HashMap> bindings, RefMetaDataTracker tracker, ReferenceContext ref, Set trackNames, EnumSet allowableTypes, boolean byFilter, boolean subsetBySample, boolean trackPerSample) { for (String trackName : trackNames) { HashMap vcs = new HashMap(); @@ -364,8 +362,6 @@ public class VariantEvalUtils { bindings.put(trackName, vcs); } } - - return bindings; } /** @@ -393,11 +389,8 @@ public class VariantEvalUtils { } } - HashMap> evalBindings = bindVariantContexts(tracker, ref, evalNames, allowableTypes, byFilter, true, perSampleIsEnabled); - HashMap> compBindings = bindVariantContexts(tracker, ref, compNames, allowableTypes, byFilter, false, false); - - vcs.putAll(compBindings); - vcs.putAll(evalBindings); + bindVariantContexts(vcs, tracker, ref, evalNames, allowableTypes, byFilter, true, perSampleIsEnabled); + bindVariantContexts(vcs, tracker, ref, compNames, allowableTypes, byFilter, false, false); return vcs; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index eec9bda79..6d1d75c03 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -54,7 +54,6 @@ import java.util.*; @Reference(window=@Window(start=-50,stop=50)) @Requires(value={}) public class CombineVariants extends RodWalker { - @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 2a5a4e97c..4f36bb6a3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -24,7 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -44,8 +46,10 @@ import java.util.Set; * Filters a lifted-over VCF file for ref bases that have been changed. */ @Reference(window=@Window(start=0,stop=100)) -@Requires(value={},referenceMetaData=@RMD(name="variant",type= VariantContext.class)) +@Requires(value={}) public class FilterLiftedVariants extends RodWalker { + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; private static final int MAX_VARIANT_SIZE = 100; @@ -55,10 +59,10 @@ public class FilterLiftedVariants extends RodWalker { private long failedLocs = 0, totalLocs = 0; public void initialize() { - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getVariableName())); - final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples); + final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(variants.getVariableName()) ? vcfHeaders.get(variants.getVariableName()).getMetaData() : null, samples); writer.writeHeader(vcfHeader); } @@ -85,7 +89,7 @@ public class FilterLiftedVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); + Collection VCs = tracker.getValues(variants, context.getLocation()); for ( VariantContext vc : VCs ) filterAndWrite(ref.getBases(), vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index e7e21d256..38f6a2f39 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -28,7 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -46,8 +48,10 @@ import java.util.*; * Left-aligns indels from a variants file. */ @Reference(window=@Window(start=-200,stop=200)) -@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class)) +@Requires(value={}) public class LeftAlignVariants extends RodWalker { + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Output(doc="File to which variants should be written",required=true) protected VCFWriter baseWriter = null; @@ -55,10 +59,10 @@ public class LeftAlignVariants extends RodWalker { private SortingVCFWriter writer; public void initialize() { - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getVariableName())); - Set headerLines = vcfHeaders.get("variant").getMetaData(); + Set headerLines = vcfHeaders.get(variants.getVariableName()).getMetaData(); baseWriter.writeHeader(new VCFHeader(headerLines, samples)); writer = new SortingVCFWriter(baseWriter, 200); @@ -68,7 +72,7 @@ public class LeftAlignVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); + Collection VCs = tracker.getValues(variants, context.getLocation()); int changedSites = 0; for ( VariantContext vc : VCs ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 5e85d03cb..4a4ab1eb6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -30,7 +30,9 @@ import net.sf.picard.util.Interval; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -49,8 +51,10 @@ import java.util.*; /** * Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted. */ -@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class)) +@Requires(value={}) public class LiftoverVariants extends RodWalker { + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Output(doc="File to which variants should be written",required=true) protected File file = null; @@ -85,12 +89,12 @@ public class LiftoverVariants extends RodWalker { throw new UserException.BadInput("the chain file you are using is not compatible with the reference you are trying to lift over to; please use the appropriate chain file for the given reference"); } - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getVariableName())); Set metaData = new HashSet(); - if ( vcfHeaders.containsKey("variant") ) - metaData.addAll(vcfHeaders.get("variant").getMetaData()); + if ( vcfHeaders.containsKey(variants.getVariableName()) ) + metaData.addAll(vcfHeaders.get(variants.getVariableName()).getMetaData()); if ( RECORD_ORIGINAL_LOCATION ) { metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record")); metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record")); @@ -143,7 +147,7 @@ public class LiftoverVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); + Collection VCs = tracker.getValues(variants, context.getLocation()); for ( VariantContext vc : VCs ) convertAndWrite(vc, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index b3926bc34..8ccdef2d3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -25,7 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -39,16 +41,15 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; +import java.util.*; /** * Takes a VCF file, randomly splits variants into two different sets, and outputs 2 new VCFs with the results. */ -@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class)) +@Requires(value={}) public class RandomlySplitVariants extends RodWalker { + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Output(fullName="out1", shortName="o1", doc="File #1 to which variants should be written", required=true) protected VCFWriter vcfWriter1 = null; @@ -61,8 +62,6 @@ public class RandomlySplitVariants extends RodWalker { @Argument(fullName="fractionToOut1", shortName="fraction", doc="Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2", required=false) protected double fraction = 0.5; - protected static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant"; - protected int iFraction; /** @@ -74,8 +73,7 @@ public class RandomlySplitVariants extends RodWalker { iFraction = (int)(fraction * 1000.0); // setup the header info - final ArrayList inputNames = new ArrayList(); - inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME ); + final List inputNames = Arrays.asList(variants.getVariableName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames); Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); @@ -97,7 +95,7 @@ public class RandomlySplitVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getValues(VariantContext.class, INPUT_VARIANT_ROD_BINDING_NAME, context.getLocation()); + Collection vcs = tracker.getValues(variants, context.getLocation()); for ( VariantContext vc : vcs ) { int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000); if ( random < iFraction ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index dc24287e9..1926d68a0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -24,7 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -32,8 +32,6 @@ import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -54,8 +52,10 @@ import java.util.*; * Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria, * recompute the value of certain annotations based on the new sample set, and output a new VCF with the results. */ -@Requires(value={},referenceMetaData=@RMD(name="variant", type=VariantContext.class)) +@Requires(value={}) public class SelectVariants extends RodWalker { + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; @@ -156,9 +156,6 @@ public class SelectVariants extends RodWalker { private Set mvSet = new HashSet(); - /* default name for the variant dataset (VCF) */ - private final String variantRodName = "variant"; - /* variables used by the SELECT RANDOM modules */ private boolean SELECT_RANDOM_NUMBER = false; @@ -183,8 +180,7 @@ public class SelectVariants extends RodWalker { */ public void initialize() { // Get list of samples to include in the output - ArrayList rodNames = new ArrayList(); - rodNames.add(variantRodName); + List rodNames = Arrays.asList(variants.getVariableName()); Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); @@ -308,7 +304,7 @@ public class SelectVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getValues(VariantContext.class, variantRodName, context.getLocation()); + Collection vcs = tracker.getValues(variants, context.getLocation()); if ( vcs == null || vcs.size() == 0) { return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 6655d26dc..775d749b6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -29,6 +29,8 @@ import org.broad.tribble.TribbleException; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -50,10 +52,10 @@ import java.util.Set; * Validates a variants file. */ @Reference(window=@Window(start=0,stop=100)) -@Requires(value={},referenceMetaData=@RMD(name=ValidateVariants.TARGET_ROD_NAME, type=VariantContext.class)) +@Requires(value={}) public class ValidateVariants extends RodWalker { - - protected static final String TARGET_ROD_NAME = "variant"; + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; public enum ValidationType { ALL, REF, IDS, ALLELES, CHR_COUNTS @@ -74,19 +76,14 @@ public class ValidateVariants extends RodWalker { private File file = null; public void initialize() { - for ( ReferenceOrderedDataSource source : getToolkit().getRodDataSources() ) { - if ( source.getName().equals(TARGET_ROD_NAME) ) { - file = source.getFile(); - break; - } - } + file = new File(variants.getSource()); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); + Collection VCs = tracker.getValues(variants, context.getLocation()); for ( VariantContext vc : VCs ) validate(vc, tracker, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 245ed9edd..93f5c872f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -45,10 +47,10 @@ import java.util.*; * Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes) */ @Reference(window=@Window(start=0,stop=40)) -@Requires(value={},referenceMetaData=@RMD(name=VariantValidationAssessor.INPUT_VARIANT_ROD_BINDING_NAME, type=VariantContext.class)) +@Requires(value={}) public class VariantValidationAssessor extends RodWalker,Integer> { - - public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant"; + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfwriter = null; @@ -93,7 +95,7 @@ public class VariantValidationAssessor extends RodWalker inputNames = new ArrayList(); - inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME ); + final List inputNames = Arrays.asList(variants.getVariableName()); // setup the header fields Set hInfo = new HashSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 4f0821477..fb6ccfb76 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -24,6 +24,8 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; @@ -45,6 +47,9 @@ import java.util.*; */ @Requires(value={}) public class VariantsToTable extends RodWalker { + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; + @Output(doc="File to which results should be written",required=true) protected PrintStream out; @@ -132,8 +137,7 @@ public class VariantsToTable extends RodWalker { return 0; if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { - Collection vcs = tracker.getValues(VariantContext.class, context.getLocation()); - for ( VariantContext vc : vcs) { + for ( VariantContext vc : tracker.getValues(variants, context.getLocation())) { if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); out.println(Utils.join("\t", vals)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 39822d7a6..b57606927 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -29,7 +29,9 @@ import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.dbsnp.DbSNPCodec; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -54,7 +56,7 @@ import java.util.*; /** * Converts variants from other file formats to VCF format. */ -@Requires(value={},referenceMetaData=@RMD(name=VariantsToVCF.INPUT_ROD_NAME, type=VariantContext.class)) +@Requires(value={}) @Reference(window=@Window(start=-40,stop=40)) public class VariantsToVCF extends RodWalker { @@ -62,7 +64,8 @@ public class VariantsToVCF extends RodWalker { protected VCFWriter baseWriter = null; private SortingVCFWriter vcfwriter; // needed because hapmap indel records move - public static final String INPUT_ROD_NAME = "variant"; + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false) protected String sampleName = null; @@ -98,8 +101,8 @@ public class VariantsToVCF extends RodWalker { } // set the appropriate sample name if necessary - if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(INPUT_ROD_NAME) ) { - Genotype g = Genotype.modifyName(vc.getGenotype(INPUT_ROD_NAME), sampleName); + if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getVariableName()) ) { + Genotype g = Genotype.modifyName(vc.getGenotype(variants.getVariableName()), sampleName); Map genotypes = new HashMap(); genotypes.put(sampleName, g); vc = VariantContext.modifyGenotypes(vc, genotypes); @@ -114,7 +117,7 @@ public class VariantsToVCF extends RodWalker { private Collection getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref) { // we need to special case the HapMap format because indels aren't handled correctly - List features = tracker.getValues(INPUT_ROD_NAME); + List features = tracker.getValues(variants.getVariableName()); if ( features.size() > 0 && features.get(0) instanceof HapMapFeature ) { ArrayList hapmapVCs = new ArrayList(features.size()); for ( Object feature : features ) { @@ -148,7 +151,7 @@ public class VariantsToVCF extends RodWalker { } refBase = ref.getBases()[hapmap.getStart() - ref.getWindow().getStart()]; } - VariantContext vc = VariantContextAdaptors.toVariantContext(INPUT_ROD_NAME, hapmap, ref); + VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getVariableName(), hapmap, ref); if ( vc != null ) { if ( refBase != null ) { Map attrs = new HashMap(vc.getAttributes()); @@ -162,7 +165,7 @@ public class VariantsToVCF extends RodWalker { } // for everything else, we can just convert to VariantContext - return tracker.getValues(VariantContext.class, INPUT_ROD_NAME, ref.getLocus()); + return tracker.getValues(variants, ref.getLocus()); } private DbSNPFeature getDbsnpFeature(String rsID) { @@ -216,10 +219,10 @@ public class VariantsToVCF extends RodWalker { samples.add(sampleName); } else { // try VCF first - samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(INPUT_ROD_NAME)); + samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); if ( samples.isEmpty() ) { - List rods = tracker.getValues(INPUT_ROD_NAME); + List rods = tracker.getValues(variants.getVariableName()); if ( rods.size() == 0 ) throw new IllegalStateException("No rod data is present"); diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index b8e39fb61..d758b4e5c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -93,7 +93,8 @@ public class ListFileUtils { * @param RODBindings a text equivale * @return a list of expanded, bound RODs. */ - public static Collection unpackRODBindings(final Collection RODBindings, final String dbSNPFile, final ParsingEngine parser) { + @Deprecated + public static Collection unpackRODBindingsOldStyle(final Collection RODBindings, final ParsingEngine parser) { // todo -- this is a strange home for this code. Move into ROD system Collection rodBindings = new ArrayList(); @@ -122,17 +123,6 @@ public class ListFileUtils { rodBindings.add(new RMDTriplet(name,type,fileName,storageType,tags)); } - if (dbSNPFile != null) { - if(dbSNPFile.toLowerCase().contains("vcf")) - throw new UserException("--DBSNP (-D) argument currently does not support VCF. To use dbSNP in VCF format, please use -B:dbsnp,vcf ."); - - final Tags tags = parser.getTags(dbSNPFile); - String fileName = expandFileName(dbSNPFile); - RMDTriplet.RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDTriplet.RMDStorageType.STREAM : RMDTriplet.RMDStorageType.FILE; - - rodBindings.add(new RMDTriplet(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME,"dbsnp",fileName,storageType,tags)); - } - return rodBindings; } diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index ef46d4bff..13b3e1df3 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -64,9 +64,7 @@ public abstract class BaseTest { public static final String b37Refseq = refseqAnnotationLocation + "refGene-big-table-b37.txt"; public static final String dbsnpDataLocation = GATKDataLocation; - public static final String hg18dbSNP129 = dbsnpDataLocation + "dbsnp_129_hg18.rod"; - public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_129_b36.rod"; - public static final String b37dbSNP129 = dbsnpDataLocation + "dbsnp_129_b37.rod"; + public static final String b37dbSNP129 = dbsnpDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf"; public static final String b37dbSNP132 = dbsnpDataLocation + "dbsnp_132_b37.vcf"; public static final String hapmapDataLocation = comparisonDataLocation + "Validated/HapMap/3.3/"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java index 59edf934e..f3e868474 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java @@ -81,7 +81,6 @@ public class GATKArgumentCollectionUnitTest extends BaseTest { collect.samFiles = input; collect.strictnessLevel = SAMFileReader.ValidationStringency.STRICT; collect.referenceFile = new File("referenceFile".toLowerCase()); - collect.DBSNPFile = "DBSNPFile".toLowerCase(); collect.unsafe = ValidationExclusion.TYPE.ALL; collect.downsampleFraction = null; collect.downsampleCoverage = null; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index e6300e6c9..5a6a66bbd 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -94,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, + baseTestString() + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d")); executeTest("getting DB tag with dbSNP", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 0c034eba9..9db2c82c7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -29,10 +29,11 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { Arrays.asList("3a48986c3832a768b478c3e95f994b0f")); executeTest("testFastaAlternateReferenceIndels", spec2); - WalkerTestSpec spec3 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:snps,GeliText " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B:snpmask,dbsnp " + GATKDataLocation + "dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s", - 1, - Arrays.asList("82705a88f6fc25880dd2331183531d9a")); - executeTest("testFastaAlternateReferenceSnps", spec3); + // TODO : Eric, update with new DBSNP +// WalkerTestSpec spec3 = new WalkerTestSpec( +// "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:snps,GeliText " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B:snpmask,dbsnp " + GATKDataLocation + "dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s", +// 1, +// Arrays.asList("82705a88f6fc25880dd2331183531d9a")); +// executeTest("testFastaAlternateReferenceSnps", spec3); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java index 866c27f8d..738580ab1 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java @@ -15,7 +15,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-50,000,000" + - " -D " + GATKDataLocation + "dbsnp_129_hg18.rod" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -o /dev/null", 0, new ArrayList(0)); @@ -30,7 +30,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + - " -D " + GATKDataLocation + "dbsnp_129_hg18.rod" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -o /dev/null", 0, new ArrayList(0)); @@ -46,7 +46,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -L chr1:1-50,000,000" + " -nt 10" + - " -D " + GATKDataLocation + "dbsnp_129_hg18.rod" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -o /dev/null", 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 2676f7067..bcb0c3a57 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -34,7 +34,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest("test realigner defaults with VCF", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( - baseCommand + "-D " + GATKDataLocation + "dbsnp_129_b36.rod", + baseCommand + "-B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", 1, Arrays.asList(base_md5)); executeTest("realigner defaults with dbsnp", spec3); @@ -50,7 +50,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest("realigner known indels only from VCF", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod", + baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", 1, Arrays.asList("05a114623c126b0398fbc1703437461e")); executeTest("realigner known indels only from dbsnp", spec2); @@ -65,7 +65,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest("realigner use SW from VCF", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseCommand + "--consensusDeterminationModel USE_SW -D " + GATKDataLocation + "dbsnp_129_b36.rod", + baseCommand + "--consensusDeterminationModel USE_SW -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", 1, Arrays.asList(base_md5_with_SW_or_VCF)); executeTest("realigner use SW from dbsnp", spec2); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java index fd5ad0b22..2c702dec4 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java @@ -30,7 +30,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest { " -LOD 5" + " -maxConsensuses 100" + " -greedy 100" + - " -D /humgen/gsa-hpprojects/GATK/data/dbsnp_129_hg18.rod" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -o /dev/null" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-5,650,000" + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 0992dbe31..97748cf7f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -52,7 +52,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -T CountCovariates" + " -I " + bam + ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) @@ -129,7 +129,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -standard" + " -OQ" + " -recalFile %s" + - " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf", + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", 1, // just one output file Arrays.asList(md5)); executeTest("testCountCovariatesUseOriginalQuals", spec); @@ -176,7 +176,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -T CountCovariates" + " -I " + bam + " -standard" + @@ -281,7 +281,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + - " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -L 1:10,000,000-10,200,000" + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + @@ -306,7 +306,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_132_b37.vcf" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java index ade34c964..1a063ec2a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java @@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L chr1:1-50,000,000" + " -standard" + " -OQ" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_hg18.rod" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); @@ -31,7 +31,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + " -standard" + " -OQ" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_hg18.rod" + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 38663ad42..1362a1f88 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -36,7 +36,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -56,7 +56,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -77,7 +77,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -99,7 +99,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -120,7 +120,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -141,7 +141,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -162,7 +162,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -183,7 +183,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -206,7 +206,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -342,7 +342,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestSNPsVCF, "-noEV", "-EV CompOverlap", @@ -360,7 +360,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, "-noEV", "-EV CompOverlap", @@ -381,7 +381,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-D " + b37dbSNP129, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", "-B:eval,VCF " + fundamentalTestSNPsVCF, "-noEV", "-EV CountVariants", diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 1a2285b22..772112026 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -15,7 +15,7 @@ public class VariantContextIntegrationTest extends WalkerTest { " -R " + b36KGReference; private static String root = cmdRoot + - " -D " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -L 1:1-1,000,000 -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; private static final class VCITTest extends TestDataProvider { @@ -30,17 +30,15 @@ public class VariantContextIntegrationTest extends WalkerTest { @DataProvider(name = "VCITTestData") public Object[][] createVCITTestData() { - new VCITTest("-L 1:1-10000 --printPerLocus", "e4ee2eaa3114888e918a1c82df7a027a"); - new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b"); - new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "033bd952fca048fe1a4f6422b57ab2ed"); - new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "e5a00766f8c1ff9cf92310bafdec3126"); - new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); - - // TODO : Eric, these are bad because the conversion fails - //new VCITTest("-L 1:1-10000 --printPerLocus --takeFirstOnly", "5b5635e4877d82e8a27d70dac24bda2f"); - //new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5e40980c02797f90821317874426a87a"); - //new VCITTest("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "ceced3f270b4fe407ee83bc9028becde"); - //new VCITTest("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9a9b9e283553c28bf58de1cafa38fe92"); + new VCITTest("--printPerLocus", "f36b81b8bcd210c0e3a1058d791b78ec"); + new VCITTest("--printPerLocus --onlyContextsOfType SNP", "a77492ba003a1fca8d8e0227fa642f34"); + new VCITTest("--printPerLocus --onlyContextsOfType INDEL", "9e0375a1b680d7df0971dbf256944d7a"); + new VCITTest("--printPerLocus --onlyContextsOfType MIXED", "93628cbba30033398e7e680b92cb3680"); + new VCITTest("--printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); + new VCITTest("--printPerLocus --takeFirstOnly", "c4a3d7545d26880635e0e5e4e69952e2"); + new VCITTest("--printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "22a7bb9e63d5f2950322c26397670e5c"); + new VCITTest("--printPerLocus --onlyContextsStartinAtCurrentPosition", "6387c1a400d1872ae4394d01e533c296"); + new VCITTest("--printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "dde3a3db4d9c57f5042e0dfe03380987"); return VCITTest.getTests(VCITTest.class); } @@ -53,7 +51,7 @@ public class VariantContextIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s", 1, // just one output file Arrays.asList(md5)); - executeTest("testDbSNPAndVCFConversions", spec); + executeTest("testSelectors", spec); } @Test @@ -65,13 +63,4 @@ public class VariantContextIntegrationTest extends WalkerTest { Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63")); executeTest("testToVCF", spec); } - - @Test - public void testLargeScaleConversion() { - // this really just tests that we are seeing the same number of objects over all of chr1 - WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", - 1, // just one output file - Arrays.asList("529f936aa6c303658b23caf4e527782f")); - executeTest("testLargeScaleConversion", spec); - } } From e4a67f3df17ec4f3ca5fbd2bafc7c9f768b29f64 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 2 Aug 2011 14:28:35 -0400 Subject: [PATCH 077/186] RefMetaDataTracker has complete set of get() functions for List> Including unit tests --- .../gatk/refdata/RefMetaDataTracker.java | 21 +++++++++++++++- .../refdata/RefMetaDataTrackerUnitTest.java | 25 ++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 297c163ab..d37839eff 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -32,7 +32,6 @@ import java.util.*; */ public class RefMetaDataTracker { // TODO: this should be a list, not a map, actually - private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY"); final Map map; @@ -138,6 +137,26 @@ public class RefMetaDataTracker { return getFirstValue(rodBinding.getType(), rodBinding.getVariableName(), onlyAtThisLoc); } + public T getFirstValue(final Collection> rodBindings) { + for ( RodBinding rodBinding : rodBindings ) { + T val = getFirstValue(rodBinding); + if ( val != null ) + return val; + } + return null; + } + + public T getFirstValue(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) { + for ( RodBinding rodBinding : rodBindings ) { + T val = getFirstValue(rodBinding, onlyAtThisLoc); + if ( val != null ) + return val; + } + return null; + } + + + public boolean hasValues(final RodBinding rodBinding) { return hasValues(rodBinding.getVariableName()); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java index ab67e48e1..1e1c11ab8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -226,7 +226,7 @@ public class RefMetaDataTrackerUnitTest { RefMetaDataTracker tracker = test.makeTracker(); for ( String nameAsString : Arrays.asList("A", "B") ) { - RodBinding binding = new RodBinding(Feature.class, nameAsString, "none", new Tags()); + RodBinding binding = new RodBinding(Feature.class, nameAsString, "none", new Tags()); List v1 = tracker.getValues(binding); testGetter(nameAsString, v1, test.expected(nameAsString), true, tracker); @@ -241,6 +241,29 @@ public class RefMetaDataTrackerUnitTest { } } + @Test(enabled = true, dataProvider = "tests") + public void testGettersAsListOfRodBindings(MyTest test) { + logger.warn("Testing " + test + " for get() methods for List"); + RefMetaDataTracker tracker = test.makeTracker(); + + String nameAsString = "A+B"; + RodBinding A = new RodBinding(Feature.class, "A", "none", new Tags()); + RodBinding B = new RodBinding(Feature.class, "B", "none", new Tags()); + List> binding = Arrays.asList(A, B); + + List v1 = tracker.getValues(binding); + testGetter(nameAsString, v1, test.expected(nameAsString), true, tracker); + + List v2 = tracker.getValues(binding, locus); + testGetter(nameAsString, v2, startingHere(test.expected(nameAsString)), true, tracker); + + Feature v3 = tracker.getFirstValue(binding); + testGetter(nameAsString, Arrays.asList(v3), test.expected(nameAsString), false, tracker); + + Feature v4 = tracker.getFirstValue(binding, locus); + testGetter(nameAsString, Arrays.asList(v4), startingHere(test.expected(nameAsString)), false, tracker); + } + private List startingHere(List l) { List x = new ArrayList(); for ( GATKFeature f : l ) if ( f.getStart() == locus.getStart() ) x.add(f); From b2cde87378edfe5b3d5768d8935c5b824e3b585d Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 2 Aug 2011 15:34:38 -0400 Subject: [PATCH 079/186] Removing --DBSNP syntax from BQSR integration tests --- .../RecalibrationWalkersIntegrationTest.java | 43 ++++++++++--------- .../RecalibrationWalkersPerformanceTest.java | 4 +- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 129161da3..049f44845 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -18,10 +18,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariates1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "7b5832d4b2a23b8ef2bb639eb59bfa88" ); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "9c006f8e9fb5752b1c139f5a8cc7ea88"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "e6f7b4ab9aa291022e0ba8b7dbe4c77e" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e6b98af01c5a08e4954b79ec42db6fc3" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "5a52b00d9794d27af723bcf93366681e" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941"); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "64e9f17a1cf6fc04c1f2717c2d2eca67" ); for ( String parallelism : Arrays.asList("", " -nt 4")) { for ( Map.Entry entry : e.entrySet() ) { @@ -30,7 +30,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -T CountCovariates" + " -I " + bam + ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) @@ -52,10 +52,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibrator1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "6797d7ffa4ef6c48413719ba32696ccf"); - e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "2bb3374dde131791d7638031ae3b3e10" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "1f9d8944b73169b367cb83b0d22e5432" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "2864f231fab7030377f3c8826796e48f" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c164dd635721ba6df3f06dac1877c32d"); + e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "74314e5562c1a65547bb0edaacffe602" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "2a37c6001826bfabf87063b1dfcf594f" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -83,7 +83,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesUseOriginalQuals() { HashMap e = new HashMap(); - e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "3404965ec4fa99873fe6a44521944fd5"); + e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "278846c55d97bd9812b758468a83f559"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -97,7 +97,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -standard" + " -OQ" + " -recalFile %s" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod", + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", 1, // just one output file Arrays.asList(md5)); executeTest("testCountCovariatesUseOriginalQuals", spec); @@ -107,7 +107,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0278cce4cfdab869dc0c11d6852a984b" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "2864f231fab7030377f3c8826796e48f" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -136,7 +136,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c9ea5f995e1e2b7a5688533e678dcedc" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "8379f24cf5312587a1f92c162ecc220f" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -144,7 +144,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -T CountCovariates" + " -I " + bam + " -standard" + @@ -162,7 +162,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "993fae4270e7e1e15986f270acf247af" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "7d5edb75b176e4151de225f699719ee4" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -238,7 +238,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCFPlusDBsnp() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "a3d892bd60d8f679affda3c1e3af96c1"); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "9131d96f39badbf9753653f55b148012"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -249,7 +249,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -L 1:10,000,000-10,200,000" + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + @@ -263,10 +263,11 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } + @Test public void testCountCovariatesNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "284ccac1f8fe485e52c86333cac7c2d4" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "8993d32df5cb66c7149f59eccbd57f4c" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -274,7 +275,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" + @@ -292,7 +293,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "c167799c2d9cab815d7c9b23337f162e" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "5f913c98ca99754902e9d34f99df468f" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -315,7 +316,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } } - + @Test public void testCountCovariatesFailWithoutDBSNP() { HashMap e = new HashMap(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java index ade34c964..08b9e0431 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java @@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L chr1:1-50,000,000" + " -standard" + " -OQ" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_hg18.rod" + + " D:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_hg18.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); @@ -31,7 +31,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + " -standard" + " -OQ" + - " --DBSNP " + GATKDataLocation + "dbsnp_129_hg18.rod" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); From 3a27a25cfced1be0b0dda79924bcd4769bb522b1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 2 Aug 2011 20:11:24 -0400 Subject: [PATCH 081/186] Validates that the tribble binding provides the right object types at startup Tests to ensure this remains working --- .../sting/commandline/RodBinding.java | 13 +++-- .../sting/gatk/CommandLineExecutable.java | 6 +- .../gatk/refdata/tracks/RMDTrackBuilder.java | 31 +++++++--- .../sting/utils/text/ListFileUtils.java | 14 ++++- .../gatk/EngineFeaturesIntegrationTest.java | 58 +++++++++++++++++++ 5 files changed, 106 insertions(+), 16 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index 23acc2a78..f5621a780 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -39,10 +39,15 @@ import java.util.List; * There is no constraint on the type of the ROD bound. */ public class RodBinding { - final String variableName; - final String source; - final Tags tags; - final Class type; + final private String variableName; + final private String source; + final private Tags tags; + final private Class type; + + public boolean isBound() { + // todo : implement me + return source != null; + } public RodBinding(Class type, final String variableName, final String source, final Tags tags) { this.type = type; diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index efdc64066..32132c7ca 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -38,10 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.text.ListFileUtils; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; +import java.util.*; /** * @author aaron @@ -134,6 +131,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram { return 0; } + /** * Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled. * This report will be written to either STDOUT or to the run repository, depending on the options diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index 41e8cf15b..a775a82d2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The Broad Institute + * Copyright (c) 2011, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12,15 +12,14 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.gatk.refdata.tracks; @@ -100,7 +99,7 @@ public class RMDTrackBuilder extends PluginManager { public RMDTrackBuilder(SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) { - super(FeatureCodec.class, "Codecs", "Codec"); + this(); this.dict = dict; this.genomeLocParser = genomeLocParser; this.validationExclusionType = validationExclusionType; @@ -108,7 +107,21 @@ public class RMDTrackBuilder extends PluginManager { classes = new HashMap(); for (String name: this.getPluginsByName().keySet()) { classes.put(name.toUpperCase(), getPluginsByName().get(name)); - } } + } + } + + /** + * Limited constructor that produces a builder capable for validating types, but not building tracks + */ + public RMDTrackBuilder() { + super(FeatureCodec.class, "Codecs", "Codec"); + + classes = new HashMap(); + for (String name: this.getPluginsByName().keySet()) { + classes.put(name.toUpperCase(), getPluginsByName().get(name)); + } + } + /** @return a list of all available track types we currently have access to create */ public Map getAvailableTrackNamesAndTypes() { @@ -125,6 +138,10 @@ public class RMDTrackBuilder extends PluginManager { return classToRecord; } + public Class getFeatureCodecClass(RMDTriplet fileDescriptor) { + return getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + } + /** * create a RMDTrack of the specified type * @@ -136,7 +153,7 @@ public class RMDTrackBuilder extends PluginManager { String name = fileDescriptor.getName(); File inputFile = new File(fileDescriptor.getFile()); - Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + Class featureCodecClass = getFeatureCodecClass(fileDescriptor); if (featureCodecClass == null) throw new UserException.BadArgumentValue("-B",fileDescriptor.getType()); diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index d758b4e5c..3175037f6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -134,6 +135,7 @@ public class ListFileUtils { public static Collection unpackRODBindings(final Collection RODBindings, final ParsingEngine parser) { // todo -- this is a strange home for this code. Move into ROD system Collection rodBindings = new ArrayList(); + RMDTrackBuilder builderForValidation = new RMDTrackBuilder(); for (RodBinding rodBinding: RODBindings) { String argValue = rodBinding.getSource(); @@ -158,7 +160,17 @@ public class ListFileUtils { else storageType = RMDTriplet.RMDStorageType.FILE; - rodBindings.add(new RMDTriplet(name,type,fileName,storageType,tags)); + RMDTriplet triplet = new RMDTriplet(name,type,fileName,storageType,tags); + + // validate triplet type + Class typeFromTribble = builderForValidation.getFeatureCodecClass(triplet); + if ( typeFromTribble != null && ! rodBinding.getType().isAssignableFrom(typeFromTribble) ) + throw new UserException.BadArgumentValue(rodBinding.getVariableName(), + String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s", + rodBinding.getVariableName(), rodBinding.getType(), typeFromTribble)); + + + rodBindings.add(triplet); } return rodBindings; diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java new file mode 100644 index 000000000..2bdddafe3 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.Test; + +/** + * + */ +public class EngineFeaturesIntegrationTest extends WalkerTest { + private void testBadRODBindingInput(String type, String name) { + WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -L 1:1 --variants:" + type + " " + + b37dbSNP132 + " -R " + b37KGReference + " -o %s", + 1, UserException.class); + executeTest(name, spec); + } + + + @Test() private void testBadRODBindingInputType1() { + testBadRODBindingInput("beagle", "BEAGLE input to VCF expecting walker"); + } + + @Test() private void testBadRODBindingInputType2() { + testBadRODBindingInput("vcf3", "VCF3 input to VCF expecting walker"); + } + + @Test() private void testBadRODBindingInputType3() { + testBadRODBindingInput("bed", "Bed input to VCF expecting walker"); + } + + @Test() private void testBadRODBindingInputTypeUnknownType() { + testBadRODBindingInput("bedXXX", "Unknown input to VCF expecting walker"); + } +} From 83891271b5006735fe4dbc903e5d487ff07be17b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 2 Aug 2011 20:28:47 -0400 Subject: [PATCH 082/186] --variants throughout integrationtests --- .../sting/commandline/RodBinding.java | 21 ++- .../beagle/BeagleOutputToVCFWalker.java | 2 +- .../beagle/ProduceBeagleInputWalker.java | 2 +- .../VariantsToBeagleUnphasedWalker.java | 2 +- .../fasta/FastaAlternateReferenceWalker.java | 28 ++-- .../filters/VariantFiltrationWalker.java | 7 +- .../variantutils/FilterLiftedVariants.java | 2 +- .../variantutils/LeftAlignVariants.java | 2 +- .../variantutils/LiftoverVariants.java | 2 +- .../variantutils/RandomlySplitVariants.java | 2 +- .../walkers/variantutils/SelectVariants.java | 26 ++-- .../variantutils/ValidateVariants.java | 4 +- .../VariantValidationAssessor.java | 2 +- .../walkers/variantutils/VariantsToTable.java | 2 +- .../walkers/variantutils/VariantsToVCF.java | 2 +- .../refdata/RefMetaDataTrackerUnitTest.java | 11 +- .../walkers/beagle/BeagleIntegrationTest.java | 22 +-- ...astaAlternateReferenceIntegrationTest.java | 2 +- .../VariantFiltrationIntegrationTest.java | 22 +-- .../DictionaryConsistencyIntegrationTest.java | 2 +- .../LiftoverVariantsIntegrationTest.java | 6 +- .../SelectVariantsIntegrationTest.java | 8 +- .../VCFStreamingIntegrationTest.java | 4 +- .../ValidateVariantsIntegrationTest.java | 2 +- .../VariantsToTableIntegrationTest.java | 2 +- .../VariantsToVCFIntegrationTest.java | 139 +++++++++--------- .../utils/codecs/vcf/VCFIntegrationTest.java | 4 +- 27 files changed, 173 insertions(+), 157 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index f5621a780..5260fce3b 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -39,14 +39,18 @@ import java.util.List; * There is no constraint on the type of the ROD bound. */ public class RodBinding { + public final static RodBinding makeUnbound(Class type) { + return new RodBinding(type); + } + final private String variableName; final private String source; final private Tags tags; final private Class type; + final private boolean bound; public boolean isBound() { - // todo : implement me - return source != null; + return bound; } public RodBinding(Class type, final String variableName, final String source, final Tags tags) { @@ -54,6 +58,19 @@ public class RodBinding { this.variableName = variableName; this.source = source; this.tags = tags; + this.bound = true; + } + + /** + * Make an unbound RodBinding + * @param type + */ + private RodBinding(Class type) { + this.type = type; + this.variableName = ""; // special value can never be found in RefMetaDataTracker + this.source = ""; + this.tags = new Tags(); + this.bound = false; } public String getVariableName() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index e4773bf4d..3dc974248 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -55,7 +55,7 @@ import static java.lang.Math.log10; */ @Requires(value={}) public class BeagleOutputToVCFWalker extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 291ef7201..cc80eec43 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -53,7 +53,7 @@ import java.util.*; */ @Requires(value={}) public class ProduceBeagleInputWalker extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Input(fullName="validation", shortName = "validation", doc="Input VCF file", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index 3c221087a..43a021f31 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -58,7 +58,7 @@ import java.util.Set; */ @Requires(value={}) public class VariantsToBeagleUnphasedWalker extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which BEAGLE unphased genotypes should be written",required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index fe58fb038..5117c1201 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -47,7 +47,7 @@ import java.util.Collection; @Reference(window=@Window(start=-1,stop=50)) @Requires(value={DataSource.REFERENCE}) public class FastaAlternateReferenceWalker extends FastaReferenceWalker { - @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=true) + @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false) public RodBinding snpmask; private int deletionBasesRemaining = 0; @@ -61,24 +61,24 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { String refBase = String.valueOf((char)ref.getBase()); - Collection vcs = tracker.getValues(snpmask); - // Check to see if we have a called snp - for ( VariantContext vc : vcs ) { - if ( vc.isDeletion()) { - deletionBasesRemaining = vc.getReference().length(); - // delete the next n bases, not this one - return new Pair(context.getLocation(), refBase); - } else if ( vc.isInsertion()) { - return new Pair(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); - } else if (vc.isSNP()) { - return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); + for ( VariantContext vc : tracker.getValues(VariantContext.class) ) { + if ( ! vc.getSource().equals(snpmask.getVariableName())) { + if ( vc.isDeletion()) { + deletionBasesRemaining = vc.getReference().length(); + // delete the next n bases, not this one + return new Pair(context.getLocation(), refBase); + } else if ( vc.isInsertion()) { + return new Pair(context.getLocation(), refBase.concat(vc.getAlternateAllele(0).toString())); + } else if (vc.isSNP()) { + return new Pair(context.getLocation(), vc.getAlternateAllele(0).toString()); + } } } // if we don't have a called site, and we have a mask at this site, mask it - for ( VariantContext vc : vcs ) { - if ( vc.getSource().startsWith("snpmask") && vc.isSNP()) { + for ( VariantContext vc : tracker.getValues(snpmask) ) { + if ( vc.isSNP()) { return new Pair(context.getLocation(), "N"); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 22c45df19..134aa0a59 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.filters; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; @@ -51,7 +52,7 @@ import java.util.*; @Requires(value={}) @Reference(window=@Window(start=-50,stop=50)) public class VariantFiltrationWalker extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which variants should be written", required=true) @@ -76,6 +77,8 @@ public class VariantFiltrationWalker extends RodWalker { protected Integer MASK_EXTEND = 0; @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false) protected String MASK_NAME = "Mask"; + @Input(fullName="mask", doc="Input ROD mask", required=false) + public RodBinding mask; @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?", required=false) protected Boolean FAIL_MISSING_VALUES = false; @@ -154,7 +157,7 @@ public class VariantFiltrationWalker extends RodWalker { Collection VCs = tracker.getValues(variants, context.getLocation()); // is there a SNP mask present? - boolean hasMask = tracker.getValues("mask").size() > 0; + boolean hasMask = tracker.hasValues(mask); if ( hasMask ) previousMaskPosition = ref.getLocus(); // multi-base masks will get triggered over all bases of the mask diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 4f36bb6a3..8db957ed1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -48,7 +48,7 @@ import java.util.Set; @Reference(window=@Window(start=0,stop=100)) @Requires(value={}) public class FilterLiftedVariants extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; private static final int MAX_VARIANT_SIZE = 100; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 38f6a2f39..787b86600 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -50,7 +50,7 @@ import java.util.*; @Reference(window=@Window(start=-200,stop=200)) @Requires(value={}) public class LeftAlignVariants extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which variants should be written",required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 4a4ab1eb6..7baa7193f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -53,7 +53,7 @@ import java.util.*; */ @Requires(value={}) public class LiftoverVariants extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which variants should be written",required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index 8ccdef2d3..a16f06ced 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -48,7 +48,7 @@ import java.util.*; */ @Requires(value={}) public class RandomlySplitVariants extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(fullName="out1", shortName="o1", doc="File #1 to which variants should be written", required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 1926d68a0..7e6c3c6b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -54,7 +54,7 @@ import java.util.*; */ @Requires(value={}) public class SelectVariants extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which variants should be written",required=true) @@ -81,15 +81,11 @@ public class SelectVariants extends RodWalker { @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't include filtered loci.", required=false) private boolean KEEP_ORIGINAL_CHR_COUNTS = false; - @Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track. Use -disc ROD_NAME", required=false) - private String discordanceRodName = ""; + @Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track", required=false) + private RodBinding discordanceTrack = RodBinding.makeUnbound(VariantContext.class); - @Argument(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track. Use -conc ROD_NAME", required=false) - private String concordanceRodName = ""; - - @Hidden - @Argument(fullName="inputAF", shortName = "inputAF", doc="", required=false) - private String inputAFRodName = ""; + @Argument(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track", required=false) + private RodBinding concordanceTrack = RodBinding.makeUnbound(VariantContext.class); @Hidden @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) @@ -222,11 +218,11 @@ public class SelectVariants extends RodWalker { jexls = VariantContextUtils.initializeMatchExps(selectNames, SELECT_EXPRESSIONS); // Look at the parameters to decide which analysis to perform - DISCORDANCE_ONLY = discordanceRodName.length() > 0; - if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceRodName); + DISCORDANCE_ONLY = discordanceTrack.isBound(); + if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getVariableName()); - CONCORDANCE_ONLY = concordanceRodName.length() > 0; - if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceRodName); + CONCORDANCE_ONLY = concordanceTrack.isBound(); + if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getVariableName()); if (MENDELIAN_VIOLATIONS) { if ( FAMILY_STRUCTURE_FILE != null) { @@ -332,12 +328,12 @@ public class SelectVariants extends RodWalker { break; } if (DISCORDANCE_ONLY) { - Collection compVCs = tracker.getValues(VariantContext.class, discordanceRodName, context.getLocation()); + Collection compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) return 0; } if (CONCORDANCE_ONLY) { - Collection compVCs = tracker.getValues(VariantContext.class, concordanceRodName, context.getLocation()); + Collection compVCs = tracker.getValues(concordanceTrack, context.getLocation()); if (!isConcordant(vc, compVCs)) return 0; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 775d749b6..625635c89 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -54,7 +54,7 @@ import java.util.Set; @Reference(window=@Window(start=0,stop=100)) @Requires(value={}) public class ValidateVariants extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; public enum ValidationType { @@ -145,6 +145,8 @@ public class ValidateVariants extends RodWalker { for ( Object d : dbsnpList ) { if (d instanceof DbSNPFeature ) rsIDs.add(((DbSNPFeature)d).getRsID()); + else if (d instanceof VariantContext ) + rsIDs.add(((VariantContext)d).getID()); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 93f5c872f..63f7609fb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -49,7 +49,7 @@ import java.util.*; @Reference(window=@Window(start=0,stop=40)) @Requires(value={}) public class VariantValidationAssessor extends RodWalker,Integer> { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which variants should be written",required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index fb6ccfb76..3bf615c94 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -47,7 +47,7 @@ import java.util.*; */ @Requires(value={}) public class VariantsToTable extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which results should be written",required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index b57606927..5853e6d44 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -64,7 +64,7 @@ public class VariantsToVCF extends RodWalker { protected VCFWriter baseWriter = null; private SortingVCFWriter vcfwriter; // needed because hapmap indel records move - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false) diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java index 1e1c11ab8..85c5b3f73 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -91,6 +91,11 @@ public class RefMetaDataTrackerUnitTest { this.BValues = BValues == null ? null : makeRODRecord("B", BValues); } + @Override + public String toString() { + return String.format("A=%s, B=%s", AValues, BValues); + } + private final RODRecordList makeRODRecord(String name, List features) { List x = new ArrayList(); for ( Feature f : features ) @@ -135,12 +140,6 @@ public class RefMetaDataTrackerUnitTest { } } - private class MyTestAdaptors extends MyTest { - private MyTestAdaptors(final List AValues) { - super(MyTestAdaptors.class, AValues, null); - } - } - private final TableFeature makeSpan(int start, int stop) { return new TableFeature(genomeLocParser.createGenomeLoc("chr1", start, stop), Collections.emptyList(), Collections.emptyList()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java index fef1b6e64..d6ff95539 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java @@ -37,10 +37,10 @@ public class BeagleIntegrationTest extends WalkerTest { public void testBeagleOutput() { WalkerTestSpec spec = new WalkerTestSpec( "-T BeagleOutputToVCF -R " + hg19Reference + " " + - "-B:variant,VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " + - "-B:beagleR2,BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " + - "-B:beagleProbs,BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " + - "-B:beaglePhased,BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " + + "--variants:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " + + "--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " + + "--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " + + "--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " + "-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4")); executeTest("test BeagleOutputToVCF", spec); } @@ -49,7 +49,7 @@ public class BeagleIntegrationTest extends WalkerTest { public void testBeagleInput() { WalkerTestSpec spec = new WalkerTestSpec( "-T ProduceBeagleInput -R " + hg19Reference + " " + - "-B:variant,VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " + + "--variants:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " + "-o %s", 1, Arrays.asList("a01c704246f3dd1b9c65774007e51e69")); executeTest("test BeagleInput", spec); } @@ -57,8 +57,8 @@ public class BeagleIntegrationTest extends WalkerTest { @Test public void testBeagleInput2() { WalkerTestSpec spec = new WalkerTestSpec( - "-T ProduceBeagleInput -B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+ - "-B:validation,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+ + "-T ProduceBeagleInput --variants:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+ + "--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+ "-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta -NO_HEADER ",2, Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166")); executeTest("test BeagleInputWithBootstrap",spec); @@ -68,10 +68,10 @@ public class BeagleIntegrationTest extends WalkerTest { public void testBeagleOutput2() { WalkerTestSpec spec = new WalkerTestSpec( "-T BeagleOutputToVCF -R "+hg19Reference+" "+ - "-B:variant,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.vcf "+ - "-B:beagleR2,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+ - "-B:beagleProbs,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+ - "-B:beaglePhased,beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+ + "--variants:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.vcf "+ + "--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+ + "--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+ + "--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+ "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965")); executeTest("testBeagleChangesSitesToRef",spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 3d74c5527..325d4a960 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -24,7 +24,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { executeTest("testFastaReference", spec1b); WalkerTestSpec spec2 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -B:snpmask,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", 1, Arrays.asList("3a48986c3832a768b478c3e95f994b0f")); executeTest("testFastaAlternateReferenceIndels", spec2); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 7bec67d2e..85b7ea8e7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -15,7 +15,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testNoAction() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test no action", spec); } @@ -23,7 +23,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -window 10 -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -window 10 --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("27b13f179bb4920615dff3a32730d845")); executeTest("test clustered SNPs", spec); } @@ -31,17 +31,17 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testMasks() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -mask foo --mask:VCF3 " + validationDataLocation + "vcfexample2.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("578f9e774784c25871678e6464fd212b")); executeTest("test mask all", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -mask foo --mask:VCF " + validationDataLocation + "vcfMask.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f")); executeTest("test mask some", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( - baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -mask foo -maskExtend 10 --mask:VCF " + validationDataLocation + "vcfMask.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("5939f80d14b32d88587373532d7b90e5")); executeTest("test mask extend", spec3); } @@ -49,7 +49,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368")); executeTest("test filter #1", spec); } @@ -57,7 +57,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("c95845e817da7352b9b72bc9794f18fb")); executeTest("test filter #2", spec); } @@ -65,7 +65,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testFilterWithSeparateNames() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530")); executeTest("test filter with separate names #2", spec); } @@ -73,12 +73,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testGenotypeFilters() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("96b61e4543a73fe725e433f007260039")); executeTest("test genotype filter #1", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e")); executeTest("test genotype filter #2", spec2); } @@ -86,7 +86,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testDeletions() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo -B:variant,VCF " + validationDataLocation + "twoDeletions.vcf", 1, + baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variants:VCF3 " + validationDataLocation + "twoDeletions.vcf", 1, Arrays.asList("569546fd798afa0e65c5b61b440d07ac")); executeTest("test deletions", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java index fc4e5ac66..1392f136a 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java @@ -56,7 +56,7 @@ public class DictionaryConsistencyIntegrationTest extends WalkerTest { } private WalkerTest.WalkerTestSpec testVCF(String ref, String vcf, Class c) { - return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 -B:two,vcf " + return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variants:vcf " + vcf + " -F POS,CHROM -R " + ref + " -o %s", 1, c); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java index 82c894c6f..c3795f98e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java @@ -38,7 +38,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { @Test public void testb36Tohg19() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + "-T LiftoverVariants -o %s -R " + b36KGReference + " --variants:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd")); executeTest("test b36 to hg19", spec); @@ -47,7 +47,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { @Test public void testb36Tohg19UnsortedSamples() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LiftoverVariants -o %s -R " + b36KGReference + " -B:variant,vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + "-T LiftoverVariants -o %s -R " + b36KGReference + " --variants:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, Arrays.asList("3fd7ec2dc4064ef410786276b0dc9d08")); executeTest("test b36 to hg19, unsorted samples", spec); @@ -56,7 +56,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { @Test public void testhg18Tohg19Unsorted() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LiftoverVariants -o %s -R " + hg18Reference + " -B:variant,vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + "-T LiftoverVariants -o %s -R " + hg18Reference + " --variants:vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b")); executeTest("test hg18 to hg19, unsorted", spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index b5f41542e..8ae56d93d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -16,7 +16,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' -B:variant,VCF3 " + testfile + " -NO_HEADER"), + baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variants:VCF3 " + testfile + " -NO_HEADER"), 1, Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") ); @@ -29,7 +29,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testfile = validationDataLocation + "test.dup.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -sn A -sn B -sn C -B:variant,VCF3 " + testfile + " -NO_HEADER"), + baseTestString(" -sn A -sn B -sn C --variants:VCF3 " + testfile + " -NO_HEADER"), 1, Arrays.asList("b74038779fe6485dbb8734ae48178356") ); @@ -42,7 +42,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -disc myvar -L 20:1012700-1020000 -B:variant,VCF " + b37hapmapGenotypes + " -B:myvar,VCF " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e") ); @@ -55,7 +55,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -conc hapmap -L 20:1012700-1020000 -B:hapmap,VCF " + b37hapmapGenotypes + " -B:variant,VCF " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant:VCF " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a") ); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java index d7efe4212..113bd5491 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java @@ -56,7 +56,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants" + " -R " + b36KGReference + - " -B:variant,vcf3,storage=STREAM " + tmpFifo.getAbsolutePath() + + " --variants:vcf3,storage=STREAM " + tmpFifo.getAbsolutePath() + " --NO_HEADER" + " -o %s", 1, @@ -80,7 +80,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { WalkerTestSpec selectTestSpec = new WalkerTestSpec( "-T SelectVariants" + " -R " + b36KGReference + - " -B:variant,vcf3,storage=STREAM " + testFile + + " --variants:vcf3,storage=STREAM " + testFile + " --NO_HEADER" + " -select 'QD > 2.0'" + " -o " + tmpFifo.getAbsolutePath(), diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index 7a4fb1c33..73a389ba6 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -34,7 +34,7 @@ import java.util.Arrays; public class ValidateVariantsIntegrationTest extends WalkerTest { public static String baseTestString(String file, String type) { - return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 -B:variant,VCF " + validationDataLocation + file + " --validationType " + type; + return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variants:vcf " + validationDataLocation + file + " --validationType " + type; } @Test diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java index 1db712353..96cece3ca 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -35,7 +35,7 @@ import java.io.File; public class VariantsToTableIntegrationTest extends WalkerTest { private String variantsToTableCmd(String moreArgs) { return "-R " + hg18Reference + - " -B:eval,vcf " + validationDataLocation + "/soap_gatk_annotated.vcf" + + " --variants:vcf " + validationDataLocation + "/soap_gatk_annotated.vcf" + " -T VariantsToTable" + " -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER -F TRANSITION -F DP -F SB -F set -F RankSumP -F refseq.functionalClass*" + " -L chr1 -KMA -o %s" + moreArgs; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index 8c96c1e11..51859df53 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -15,74 +15,73 @@ import java.util.ArrayList; * test(s) for the VariantsToVCF walker. */ public class VariantsToVCFIntegrationTest extends WalkerTest { - - - @Test - public void testVariantsToVCFUsingGeliInput() { - List md5 = new ArrayList(); - md5.add("4accae035d271b35ee2ec58f403c68c6"); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + - " -T VariantsToVCF" + - " -L 1:10,000,000-11,000,000" + - " -sample NA123AB" + - " -o %s" + - " -NO_HEADER", - 1, // just one output file - md5); - executeTest("testVariantsToVCFUsingGeliInput #1", spec).getFirst(); - } - - @Test - public void testGenotypesToVCFUsingGeliInput() { - List md5 = new ArrayList(); - md5.add("71e8c98d7c3a73b6287ecc339086fe03"); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" + - " -T VariantsToVCF" + - " -L 1:10,000,000-11,000,000" + - " -sample NA123AB" + - " -o %s" + - " -NO_HEADER", - 1, // just one output file - md5); - executeTest("testVariantsToVCFUsingGeliInput #2", spec).getFirst(); - } - - @Test - public void testGenotypesToVCFUsingHapMapInput() { - List md5 = new ArrayList(); - md5.add("f343085305e80c7a2493422e4eaad983"); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -B:variant,HapMap " + validationDataLocation + "rawHapMap.yri.chr1.txt" + - " -T VariantsToVCF" + - " -L 1:1-1,000,000" + - " -o %s" + - " -NO_HEADER", - 1, // just one output file - md5); - executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst(); - } - - @Test - public void testGenotypesToVCFUsingVCFInput() { - List md5 = new ArrayList(); - md5.add("86f02e2e764ba35854cff2aa05a1fdd8"); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -B:variant,VCF " + validationDataLocation + "complexExample.vcf4" + - " -T VariantsToVCF" + - " -o %s" + - " -NO_HEADER", - 1, // just one output file - md5); - executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst(); - } + // TODO -- eric, fix me +// @Test +// public void testVariantsToVCFUsingGeliInput() { +// List md5 = new ArrayList(); +// md5.add("4accae035d271b35ee2ec58f403c68c6"); +// +// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( +// "-R " + b36KGReference + +// " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + +// " -T VariantsToVCF" + +// " -L 1:10,000,000-11,000,000" + +// " -sample NA123AB" + +// " -o %s" + +// " -NO_HEADER", +// 1, // just one output file +// md5); +// executeTest("testVariantsToVCFUsingGeliInput #1", spec).getFirst(); +// } +// +// @Test +// public void testGenotypesToVCFUsingGeliInput() { +// List md5 = new ArrayList(); +// md5.add("71e8c98d7c3a73b6287ecc339086fe03"); +// +// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( +// "-R " + b36KGReference + +// " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" + +// " -T VariantsToVCF" + +// " -L 1:10,000,000-11,000,000" + +// " -sample NA123AB" + +// " -o %s" + +// " -NO_HEADER", +// 1, // just one output file +// md5); +// executeTest("testVariantsToVCFUsingGeliInput #2", spec).getFirst(); +// } +// +// @Test +// public void testGenotypesToVCFUsingHapMapInput() { +// List md5 = new ArrayList(); +// md5.add("f343085305e80c7a2493422e4eaad983"); +// +// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( +// "-R " + b36KGReference + +// " -B:variant,HapMap " + validationDataLocation + "rawHapMap.yri.chr1.txt" + +// " -T VariantsToVCF" + +// " -L 1:1-1,000,000" + +// " -o %s" + +// " -NO_HEADER", +// 1, // just one output file +// md5); +// executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst(); +// } +// +// @Test +// public void testGenotypesToVCFUsingVCFInput() { +// List md5 = new ArrayList(); +// md5.add("86f02e2e764ba35854cff2aa05a1fdd8"); +// +// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( +// "-R " + b36KGReference + +// " -B:variant,VCF " + validationDataLocation + "complexExample.vcf4" + +// " -T VariantsToVCF" + +// " -o %s" + +// " -NO_HEADER", +// 1, // just one output file +// md5); +// executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst(); +// } } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 32ff25c7b..741e0bd17 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -17,11 +17,11 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; - String test1 = baseCommand + "-T VariantAnnotator -BTI variant -B:variant,vcf " + testVCF; + String test1 = baseCommand + "-T VariantAnnotator -BTI variant --variants:vcf " + testVCF; WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF)); List result = executeTest("Test Variant Annotator with no changes", spec1).getFirst(); - String test2 = baseCommand + "-T VariantsToVCF -B:variant,vcf " + result.get(0).getAbsolutePath(); + String test2 = baseCommand + "-T VariantsToVCF --variants:vcf " + result.get(0).getAbsolutePath(); WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF)); executeTest("Test Variants To VCF from new output", spec2); } From d3437e62da7cb302e85c36b296c3bffee9981a05 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 2 Aug 2011 21:59:06 -0400 Subject: [PATCH 083/186] Added a simple utility method Utils.optimumHashSize() to calculate the optimum initial size for a Java hash table (HashMap, HashSet, etc.) given an expected maximum number of elements. The optimum size is the smallest size that's guaranteed not to result in any rehash / table-resize operations. Example Usage: Map hash = new HashMap(Utils.optimumHashSize(expectedMaxElements)); I think we're paying way too heavy a price in unnecessary rehash operations across the GATK. If you don't specify an initial size, you get a table of size 16 that gets completely rehashed and doubles in size every time it becomes 75% full. This means you do at least twice as much work as you need to in order to populate your table: (n + n/2 + n/4 + ... 16 ~= (1 + 1/2 + 1/4...) * n ~= 2 * n --- .../src/org/broadinstitute/sting/utils/Utils.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index 6a50badce..015e5d6f6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -42,6 +42,21 @@ public class Utils { /** our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(Utils.class); + public static final float JAVA_DEFAULT_HASH_LOAD_FACTOR = 0.75f; + + /** + * Calculates the optimum initial size for a hash table given the maximum number + * of elements it will need to hold. The optimum size is the smallest size that + * is guaranteed not to result in any rehash/table-resize operations. + * + * @param maxElements The maximum number of elements you expect the hash table + * will need to hold + * @return The optimum initial size for the table, given maxElements + */ + public static int optimumHashSize ( int maxElements ) { + return (int)(maxElements / JAVA_DEFAULT_HASH_LOAD_FACTOR) + 2; + } + public static String getClassName(Class c) { String FQClassName = c.getName(); int firstChar; From b5e843f8f0258dfc999e5ac818a0fa8d132cac22 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 2 Aug 2011 22:00:06 -0400 Subject: [PATCH 084/186] Approaching the end for the new RodBinding system -- support for explicit naming of bindings (-X:name,type x) -- support for automatic naming of bindings in lists (-X:vcf foo.vcf -X:vcf bar.vcf will generate internal names X and X2) -- ParserEngineUnitTest expanded to cover all of the Rodbinding cases -- RodBindingUnitTest tests all of the low-level accessors -- Parsing engine throws UserExceptions when bad bindings are provided on the command line --- .../commandline/ArgumentTypeDescriptor.java | 17 +- .../sting/commandline/RodBinding.java | 50 +++-- .../gatk/refdata/RefMetaDataTracker.java | 15 +- .../sting/utils/text/ListFileUtils.java | 24 +-- .../commandline/ParsingEngineUnitTest.java | 186 +++++++++++++++++- .../sting/commandline/RodBindingUnitTest.java | 75 +++++++ .../gatk/EngineFeaturesIntegrationTest.java | 33 ++++ 7 files changed, 355 insertions(+), 45 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 134fe0e49..8685487ee 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -312,10 +312,23 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); try { + String name = source.field.getName(); + String tribbleType; Tags tags = getArgumentTags(matches); - Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, Tags.class); + // must have one or two tag values here + if ( tags.getPositionalTags().size() == 2 ) { // -X:name,type style + name = tags.getPositionalTags().get(0); + tribbleType = tags.getPositionalTags().get(1); + } else if ( tags.getPositionalTags().size() == 1 ) { // -X:type style + tribbleType = tags.getPositionalTags().get(0); + } else + throw new UserException.CommandLineException( + String.format("Unexpected number of positional tags for argument %s : %s. " + + "Rod bindings only suport -X:type and -X:name,type argument styles", + value, source.field.getName())); + Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); Class parameterType = getParameterizedTypeClass(type); - RodBinding result = (RodBinding)ctor.newInstance(parameterType, source.field.getName(), value, tags); + RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); parsingEngine.addTags(result,tags); return result; } catch (InvocationTargetException e) { diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index 5260fce3b..ab6b0ea06 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -25,13 +25,8 @@ package org.broadinstitute.sting.commandline; import org.broad.tribble.Feature; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.utils.GenomeLoc; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; +import java.util.*; /** * A RodBinding representing a walker argument that gets bound to a ROD track. @@ -39,24 +34,46 @@ import java.util.List; * There is no constraint on the type of the ROD bound. */ public class RodBinding { + protected final static String UNBOUND_VARIABLE_NAME = ""; + protected final static String UNBOUND_SOURCE = "UNBOUND"; + protected final static String UNBOUND_TRIBBLE_TYPE = null; public final static RodBinding makeUnbound(Class type) { return new RodBinding(type); } - final private String variableName; + final private String name; final private String source; + final private String tribbleType; final private Tags tags; final private Class type; final private boolean bound; + final private static Map nameCounter = new HashMap(); + + final protected static void resetNameCounter() { + nameCounter.clear(); + } + + final private static synchronized String countedVariableName(final String rawName) { + Integer count = nameCounter.get(rawName); + if ( count == null ) { + nameCounter.put(rawName, 1); + return rawName; + } else { + nameCounter.put(rawName, count + 1); + return rawName + (count + 1); + } + } + public boolean isBound() { return bound; } - public RodBinding(Class type, final String variableName, final String source, final Tags tags) { + public RodBinding(Class type, final String rawName, final String source, final String tribbleType, final Tags tags) { this.type = type; - this.variableName = variableName; + this.name = countedVariableName(rawName); this.source = source; + this.tribbleType = tribbleType; this.tags = tags; this.bound = true; } @@ -67,14 +84,15 @@ public class RodBinding { */ private RodBinding(Class type) { this.type = type; - this.variableName = ""; // special value can never be found in RefMetaDataTracker - this.source = ""; + this.name = UNBOUND_VARIABLE_NAME; // special value can never be found in RefMetaDataTracker + this.source = UNBOUND_SOURCE; + this.tribbleType = UNBOUND_TRIBBLE_TYPE; this.tags = new Tags(); this.bound = false; } - public String getVariableName() { - return variableName; + public String getName() { + return name; } public Class getType() { return type; @@ -87,7 +105,11 @@ public class RodBinding { return tags; } + public String getTribbleType() { + return tribbleType; + } + public String toString() { - return String.format("(RodBinding name=%s source=%s)", getVariableName(), getSource()); + return String.format("(RodBinding name=%s source=%s)", getName(), getSource()); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index d37839eff..e1b9fedf6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -6,11 +6,8 @@ import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; -import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; @@ -109,7 +106,7 @@ public class RefMetaDataTracker { // ROD binding accessors // public List getValues(final RodBinding rodBinding) { - return getValues(rodBinding.getType(), rodBinding.getVariableName()); + return getValues(rodBinding.getType(), rodBinding.getName()); } public List getValues(final Collection> rodBindings) { @@ -120,7 +117,7 @@ public class RefMetaDataTracker { } public List getValues(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { - return getValues(rodBinding.getType(), rodBinding.getVariableName(), onlyAtThisLoc); + return getValues(rodBinding.getType(), rodBinding.getName(), onlyAtThisLoc); } public List getValues(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) { @@ -131,10 +128,10 @@ public class RefMetaDataTracker { } public T getFirstValue(final RodBinding rodBinding) { - return getFirstValue(rodBinding.getType(), rodBinding.getVariableName()); + return getFirstValue(rodBinding.getType(), rodBinding.getName()); } public T getFirstValue(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { - return getFirstValue(rodBinding.getType(), rodBinding.getVariableName(), onlyAtThisLoc); + return getFirstValue(rodBinding.getType(), rodBinding.getName(), onlyAtThisLoc); } public T getFirstValue(final Collection> rodBindings) { @@ -158,11 +155,11 @@ public class RefMetaDataTracker { public boolean hasValues(final RodBinding rodBinding) { - return hasValues(rodBinding.getVariableName()); + return hasValues(rodBinding.getName()); } public List getValuesAsGATKFeatures(final RodBinding rodBinding) { - return getValuesAsGATKFeatures(rodBinding.getVariableName()); + return getValuesAsGATKFeatures(rodBinding.getName()); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 3175037f6..a0f6426d8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -28,7 +28,6 @@ import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -140,34 +139,25 @@ public class ListFileUtils { for (RodBinding rodBinding: RODBindings) { String argValue = rodBinding.getSource(); String fileName = expandFileName(argValue); - final Tags tags = parser.getTags(rodBinding); - - List positionalTags = tags.getPositionalTags(); - if(positionalTags.size() != 1) - throw new UserException("Invalid syntax for RODBinding (reference-ordered data) input . " + - "Please use the following syntax when providing reference-ordered " + - "data: -: ."); - // Assume that if tags are present, those tags are name and type. - // Name is always first, followed by type. - String name = rodBinding.getVariableName(); - String type = positionalTags.get(0); + String name = rodBinding.getName(); + String type = rodBinding.getTribbleType(); RMDTriplet.RMDStorageType storageType = null; - if(tags.getValue("storage") != null) - storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,tags.getValue("storage")); + if(rodBinding.getTags().getValue("storage") != null) + storageType = Enum.valueOf(RMDTriplet.RMDStorageType.class,rodBinding.getTags().getValue("storage")); else if(fileName.toLowerCase().endsWith("stdin")) storageType = RMDTriplet.RMDStorageType.STREAM; else storageType = RMDTriplet.RMDStorageType.FILE; - RMDTriplet triplet = new RMDTriplet(name,type,fileName,storageType,tags); + RMDTriplet triplet = new RMDTriplet(name,type,fileName,storageType,rodBinding.getTags()); // validate triplet type Class typeFromTribble = builderForValidation.getFeatureCodecClass(triplet); if ( typeFromTribble != null && ! rodBinding.getType().isAssignableFrom(typeFromTribble) ) - throw new UserException.BadArgumentValue(rodBinding.getVariableName(), + throw new UserException.BadArgumentValue(rodBinding.getName(), String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s", - rodBinding.getVariableName(), rodBinding.getType(), typeFromTribble)); + rodBinding.getName(), rodBinding.getType(), typeFromTribble)); rodBindings.add(triplet); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index 54e3b35bc..88ab0268d 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -25,12 +25,16 @@ package org.broadinstitute.sting.commandline; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; +import javax.script.Bindings; import java.util.List; import java.util.EnumSet; /** @@ -42,6 +46,7 @@ public class ParsingEngineUnitTest extends BaseTest { @BeforeMethod public void setUp() { parsingEngine = new ParsingEngine(null); + RodBinding.resetNameCounter(); } private class InputFileArgProvider { @@ -62,7 +67,7 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.inputFile,"na12878.bam","Argument is not correctly initialized"); } - + @Test public void multiCharShortNameArgumentTest() { final String[] commandLine = new String[] {"-out","out.txt"}; @@ -211,7 +216,7 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.testEnum, TestEnum.ONE, "Enum value is not correct"); } - + @Test public void enumDefaultTest() { final String[] commandLine = new String[] {}; @@ -552,7 +557,7 @@ public class ParsingEngineUnitTest extends BaseTest { commandLine = new String[] {"--foo","5","--bar","6"}; parsingEngine.parse( commandLine ); - parsingEngine.validate(); + parsingEngine.validate(); } private class MutuallyExclusiveArgProvider { @@ -618,4 +623,179 @@ public class ParsingEngineUnitTest extends BaseTest { @ArgumentCollection RequiredArgProvider rap2 = new RequiredArgProvider(); } + + // -------------------------------------------------------------------------------- + // + // Tests of the RodBinding system + // + // -------------------------------------------------------------------------------- + + private class SingleRodBindingArgProvider { + @Input(shortName="V", required=false) + public RodBinding binding = RodBinding.makeUnbound(Feature.class); + } + + @Test + public void basicRodBindingArgumentTest() { + final String[] commandLine = new String[] {"-V:vcf","foo.vcf"}; + + parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + SingleRodBindingArgProvider argProvider = new SingleRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); + Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value"); + Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); + } + + @Test + public void unbasicRodBindingArgumentTest() { + final String[] commandLine = new String[] {}; + + parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + SingleRodBindingArgProvider argProvider = new SingleRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.binding.getName(), RodBinding.UNBOUND_VARIABLE_NAME, "Name isn't set properly"); + Assert.assertEquals(argProvider.binding.getSource(), RodBinding.UNBOUND_SOURCE, "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.isBound(), false, "Bound() isn't returning its expected value"); + Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 0, "Tags aren't correctly set"); + } + + @Test(expectedExceptions = UserException.class) + public void rodBindingArgumentTestMissingType() { + final String[] commandLine = new String[] {"-V","foo.vcf"}; + + parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + SingleRodBindingArgProvider argProvider = new SingleRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject(argProvider); + } + + @Test(expectedExceptions = UserException.class) + public void rodBindingArgumentTestTooManyTags() { + final String[] commandLine = new String[] {"-V:x,y,z","foo.vcf"}; + + parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + SingleRodBindingArgProvider argProvider = new SingleRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject(argProvider); + } + + private class VariantContextRodBindingArgProvider { + @Input(shortName="V") + public RodBinding binding; + } + + @Test + public void variantContextBindingArgumentTest() { + final String[] commandLine = new String[] {"-V:vcf","foo.vcf"}; + + parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); + Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); + } + + private class ListRodBindingArgProvider { + @Input(shortName="V", required=false) + public List> bindings; + } + + @Test + public void listRodBindingArgumentTest() { + final String[] commandLine = new String[] {"-V:vcf","foo.vcf"}; + + parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + ListRodBindingArgProvider argProvider = new ListRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.bindings.size(), 1, "Unexpected number of bindings"); + RodBinding binding = argProvider.bindings.get(0); + Assert.assertEquals(binding.getName(), "bindings", "Name isn't set properly"); + Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value"); + Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); + } + + @Test + public void listRodBindingArgumentTest2Args() { + final String[] commandLine = new String[] {"-V:vcf","foo.vcf", "-V:vcf", "bar.vcf"}; + + parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + ListRodBindingArgProvider argProvider = new ListRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.bindings.size(), 2, "Unexpected number of bindings"); + + RodBinding binding = argProvider.bindings.get(0); + Assert.assertEquals(binding.getName(), "bindings", "Name isn't set properly"); + Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value"); + Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); + + RodBinding binding2 = argProvider.bindings.get(1); + Assert.assertEquals(binding2.getName(), "bindings2", "Name isn't set properly"); + Assert.assertEquals(binding2.getSource(), "bar.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(binding2.getType(), Feature.class, "Type isn't set to its expected value"); + Assert.assertEquals(binding2.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); + } + + @Test + public void listRodBindingArgumentTest0Args() { + final String[] commandLine = new String[] {}; + + parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + ListRodBindingArgProvider argProvider = new ListRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertNull(argProvider.bindings, "Bindings were not null"); + } + + @Test + public void listRodBindingArgumentTestExplicitlyNamed() { + final String[] commandLine = new String[] {"-V:foo,vcf","foo.vcf", "-V:foo,vcf", "bar.vcf"}; + + parsingEngine.addArgumentSource( ListRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + ListRodBindingArgProvider argProvider = new ListRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.bindings.size(), 2, "Unexpected number of bindings"); + Assert.assertEquals(argProvider.bindings.get(0).getName(), "foo", "Name isn't set properly"); + Assert.assertEquals(argProvider.bindings.get(1).getName(), "foo2", "Name isn't set properly"); + } + + } diff --git a/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java new file mode 100644 index 000000000..a32157a41 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + * Test suite for the parsing engine. + */ +public class RodBindingUnitTest extends BaseTest { + Tags mytags = new Tags(); + + @Test + public void testStandardRodBinding() { + RodBinding b = new RodBinding(VariantContext.class, "b", "foo", "vcf", mytags); + Assert.assertEquals(b.getName(), "b"); + Assert.assertEquals(b.getType(), VariantContext.class); + Assert.assertEquals(b.getSource(), "foo"); + Assert.assertEquals(b.getTribbleType(), "vcf"); + Assert.assertEquals(b.isBound(), true); + } + + @Test + public void testUnboundRodBinding() { + RodBinding u = RodBinding.makeUnbound(VariantContext.class); + Assert.assertEquals(u.getName(), RodBinding.UNBOUND_VARIABLE_NAME); + Assert.assertEquals(u.getSource(), RodBinding.UNBOUND_SOURCE); + Assert.assertEquals(u.getType(), VariantContext.class); + Assert.assertEquals(u.getTribbleType(), RodBinding.UNBOUND_TRIBBLE_TYPE); + Assert.assertEquals(u.isBound(), false); + } + + @Test + public void testMultipleBindings() { + String name = "binding"; + RodBinding b1 = new RodBinding(VariantContext.class, name, "foo", "vcf", mytags); + Assert.assertEquals(b1.getName(), name); + Assert.assertEquals(b1.getType(), VariantContext.class); + Assert.assertEquals(b1.getSource(), "foo"); + Assert.assertEquals(b1.getTribbleType(), "vcf"); + Assert.assertEquals(b1.isBound(), true); + + RodBinding b2 = new RodBinding(VariantContext.class, name, "foo", "vcf", mytags); + Assert.assertEquals(b2.getName(), name + "2"); + Assert.assertEquals(b2.getType(), VariantContext.class); + Assert.assertEquals(b2.getSource(), "foo"); + Assert.assertEquals(b2.getTribbleType(), "vcf"); + Assert.assertEquals(b2.isBound(), true); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 2bdddafe3..31ba9269a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -24,10 +24,20 @@ package org.broadinstitute.sting.gatk; +import org.broad.tribble.Feature; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.Assert; import org.testng.annotations.Test; +import java.util.List; + /** * */ @@ -56,3 +66,26 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { testBadRODBindingInput("bedXXX", "Unknown input to VCF expecting walker"); } } + +//class TestRodBindings extends RodWalker { +// @Input(fullName="req", required=true) +// public RodBinding required; +// +// @Input(fullName="optional", required=false) +// public RodBinding optional = RodBinding.makeUnbound(Feature.class); +// +// @Input(fullName="rodList", shortName="RL", doc="A list of ROD types that we will convert to a table", required=true) +// public List> variantsList; +// +// public void initialize() { +// // bound values +// Assert.assertEquals(required.isBound(), true); +// +// +// System.exit(0); +// } +// +// public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; } +// public Integer reduceInit() { return 0; } +// public Integer reduce(Integer counter, Integer sum) { return counter + sum; } +//} \ No newline at end of file From 2874835997e3b928144f5d27b126bf807a608de4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 2 Aug 2011 22:25:41 -0400 Subject: [PATCH 085/186] Bug fix for type checking RodBindings Now compares the feature class not the codec class. UnitTests improvements integrationtests on their way to actually running --- .../gatk/refdata/tracks/RMDTrackBuilder.java | 4 ++++ .../sting/gatk/walkers/PileupWalker.java | 2 +- .../walkers/annotator/VariantAnnotator.java | 15 +++++++-------- .../walkers/beagle/BeagleOutputToVCFWalker.java | 5 ++--- .../beagle/ProduceBeagleInputWalker.java | 5 ++--- .../beagle/VariantsToBeagleUnphasedWalker.java | 3 +-- .../fasta/FastaAlternateReferenceWalker.java | 6 ++---- .../filters/VariantFiltrationWalker.java | 2 +- .../variantutils/FilterLiftedVariants.java | 6 +++--- .../walkers/variantutils/LeftAlignVariants.java | 6 +++--- .../walkers/variantutils/LiftoverVariants.java | 9 ++++----- .../variantutils/RandomlySplitVariants.java | 3 +-- .../walkers/variantutils/SelectVariants.java | 7 +++---- ...bleNewRodStyle.java => TestRodBindings.java} | 2 +- .../variantutils/VariantValidationAssessor.java | 2 +- .../walkers/variantutils/VariantsToVCF.java | 12 ++++++------ .../sting/utils/text/ListFileUtils.java | 2 +- .../commandline/ParsingEngineUnitTest.java | 17 +++++++++++++++++ .../SelectVariantsIntegrationTest.java | 4 ++-- 19 files changed, 62 insertions(+), 50 deletions(-) rename public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/{VariantsToTableNewRodStyle.java => TestRodBindings.java} (99%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index a775a82d2..248c454d7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -142,6 +142,10 @@ public class RMDTrackBuilder extends PluginManager { return getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); } + public Class getFeatureClass(RMDTriplet fileDescriptor) { + return getAvailableTrackNamesAndRecordTypes().get(fileDescriptor.getType().toUpperCase()); + } + /** * create a RMDTrack of the specified type * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index fd9bf5734..bd661389c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -70,7 +70,7 @@ public class PileupWalker extends LocusWalker implements TreeR @Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events") public boolean SHOW_INDEL_PILEUPS = false; - @Argument(fullName="rodBind",shortName="-B",doc="Add these ROD bindings to the output Pileup", required=false) + @Argument(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false) public List> rods; public void initialize() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index fdad1485a..59d79ebf2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -49,11 +47,13 @@ import java.util.*; /** * Annotates variant calls with context information. Users can specify which of the available annotations to use. */ -@Requires(value={},referenceMetaData=@RMD(name="variant",type=VariantContext.class)) +@Requires(value={}) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @Reference(window=@Window(start=-50,stop=50)) @By(DataSource.REFERENCE) public class VariantAnnotator extends RodWalker { + @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; @@ -118,8 +118,7 @@ public class VariantAnnotator extends RodWalker { listAnnotationsAndExit(); // get the list of all sample names from the variant VCF input rod, if applicable - Set rodName = new HashSet(); - rodName.add("variant"); + List rodName = Arrays.asList(variants.getName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); // add the non-VCF sample from the command-line, if applicable @@ -143,7 +142,7 @@ public class VariantAnnotator extends RodWalker { // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones Set hInfo = new HashSet(); hInfo.addAll(engine.getVCFAnnotationDescriptions()); - for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant")) ) { + for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())) ) { if ( isUniqueHeaderLine(line, hInfo) ) hInfo.add(line); } @@ -202,7 +201,7 @@ public class VariantAnnotator extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(VariantContext.class, "variant", context.getLocation()); + Collection VCs = tracker.getValues(variants, context.getLocation()); if ( VCs.size() == 0 ) return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 3dc974248..a45d89b19 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -34,7 +34,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; -import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; @@ -107,7 +106,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { final List dataSources = this.getToolkit().getRodDataSources(); for( final ReferenceOrderedDataSource source : dataSources ) { - if (source.getName().equals(comp.getVariableName())) { + if (source.getName().equals(comp.getName())) { hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); @@ -116,7 +115,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { } - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index cc80eec43..e4b2dbfee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve; @@ -99,7 +98,7 @@ public class ProduceBeagleInputWalker extends RodWalker { public void initialize() { - samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); + samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); beagleWriter.print("marker alleleA alleleB"); for ( String sample : samples ) @@ -303,7 +302,7 @@ public class ProduceBeagleInputWalker extends RodWalker { } private void initializeVcfWriter() { - final List inputNames = Arrays.asList(validation.getVariableName()); + final List inputNames = Arrays.asList(validation.getName()); // setup the header fields Set hInfo = new HashSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index 43a021f31..93f62f85d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -32,7 +32,6 @@ import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; @@ -78,7 +77,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker private int testSetSize = 0; public void initialize() { - samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); + samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); beagleWriter.print("I marker alleleA alleleB"); for ( String sample : samples ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 5117c1201..1f214fa62 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -35,8 +35,6 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Collection; - /** * Generates an alternative reference sequence over the specified interval. Given variant ROD tracks, @@ -48,7 +46,7 @@ import java.util.Collection; @Requires(value={DataSource.REFERENCE}) public class FastaAlternateReferenceWalker extends FastaReferenceWalker { @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false) - public RodBinding snpmask; + public RodBinding snpmask = RodBinding.makeUnbound(VariantContext.class); private int deletionBasesRemaining = 0; @@ -63,7 +61,7 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker { // Check to see if we have a called snp for ( VariantContext vc : tracker.getValues(VariantContext.class) ) { - if ( ! vc.getSource().equals(snpmask.getVariableName())) { + if ( ! vc.getSource().equals(snpmask.getName())) { if ( vc.isDeletion()) { deletionBasesRemaining = vc.getReference().length(); // delete the next n bases, not this one diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 134aa0a59..4eaed4840 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -98,7 +98,7 @@ public class VariantFiltrationWalker extends RodWalker { private void initializeVcfWriter() { - final List inputNames = Arrays.asList(variants.getVariableName()); + final List inputNames = Arrays.asList(variants.getName()); // setup the header fields Set hInfo = new HashSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 8db957ed1..2f6c7d99c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -59,10 +59,10 @@ public class FilterLiftedVariants extends RodWalker { private long failedLocs = 0, totalLocs = 0; public void initialize() { - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getVariableName())); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); - final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(variants.getVariableName()) ? vcfHeaders.get(variants.getVariableName()).getMetaData() : null, samples); + final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(variants.getName()) ? vcfHeaders.get(variants.getName()).getMetaData() : null, samples); writer.writeHeader(vcfHeader); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 787b86600..5b83ae688 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -59,10 +59,10 @@ public class LeftAlignVariants extends RodWalker { private SortingVCFWriter writer; public void initialize() { - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getVariableName())); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); - Set headerLines = vcfHeaders.get(variants.getVariableName()).getMetaData(); + Set headerLines = vcfHeaders.get(variants.getName()).getMetaData(); baseWriter.writeHeader(new VCFHeader(headerLines, samples)); writer = new SortingVCFWriter(baseWriter, 200); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 7baa7193f..eeb85d02d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -36,7 +36,6 @@ import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; @@ -89,12 +88,12 @@ public class LiftoverVariants extends RodWalker { throw new UserException.BadInput("the chain file you are using is not compatible with the reference you are trying to lift over to; please use the appropriate chain file for the given reference"); } - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getVariableName())); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); Set metaData = new HashSet(); - if ( vcfHeaders.containsKey(variants.getVariableName()) ) - metaData.addAll(vcfHeaders.get(variants.getVariableName()).getMetaData()); + if ( vcfHeaders.containsKey(variants.getName()) ) + metaData.addAll(vcfHeaders.get(variants.getName()).getMetaData()); if ( RECORD_ORIGINAL_LOCATION ) { metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record")); metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record")); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index a16f06ced..ddbb1ed56 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; @@ -73,7 +72,7 @@ public class RandomlySplitVariants extends RodWalker { iFraction = (int)(fraction * 1000.0); // setup the header info - final List inputNames = Arrays.asList(variants.getVariableName()); + final List inputNames = Arrays.asList(variants.getName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames); Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 7e6c3c6b8..893edec4e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; @@ -176,7 +175,7 @@ public class SelectVariants extends RodWalker { */ public void initialize() { // Get list of samples to include in the output - List rodNames = Arrays.asList(variants.getVariableName()); + List rodNames = Arrays.asList(variants.getName()); Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); @@ -219,10 +218,10 @@ public class SelectVariants extends RodWalker { // Look at the parameters to decide which analysis to perform DISCORDANCE_ONLY = discordanceTrack.isBound(); - if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getVariableName()); + if (DISCORDANCE_ONLY) logger.info("Selecting only variants discordant with the track: " + discordanceTrack.getName()); CONCORDANCE_ONLY = concordanceTrack.isBound(); - if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getVariableName()); + if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName()); if (MENDELIAN_VIOLATIONS) { if ( FAMILY_STRUCTURE_FILE != null) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/TestRodBindings.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/TestRodBindings.java index b8c6fd530..9ca7b89a6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableNewRodStyle.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/TestRodBindings.java @@ -42,7 +42,7 @@ import java.util.*; /** * Emits specific fields as dictated by the user from one or more VCF files. */ -public class VariantsToTableNewRodStyle extends RodWalker { +public class TestRodBindings extends RodWalker { @Output(doc="File to which results should be written",required=true) protected PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 63f7609fb..cb8be9c17 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -115,7 +115,7 @@ public class VariantValidationAssessor extends RodWalker inputNames = Arrays.asList(variants.getVariableName()); + final List inputNames = Arrays.asList(variants.getName()); // setup the header fields Set hInfo = new HashSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 5853e6d44..59cf365c4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -101,8 +101,8 @@ public class VariantsToVCF extends RodWalker { } // set the appropriate sample name if necessary - if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getVariableName()) ) { - Genotype g = Genotype.modifyName(vc.getGenotype(variants.getVariableName()), sampleName); + if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) { + Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName); Map genotypes = new HashMap(); genotypes.put(sampleName, g); vc = VariantContext.modifyGenotypes(vc, genotypes); @@ -117,7 +117,7 @@ public class VariantsToVCF extends RodWalker { private Collection getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref) { // we need to special case the HapMap format because indels aren't handled correctly - List features = tracker.getValues(variants.getVariableName()); + List features = tracker.getValues(variants.getName()); if ( features.size() > 0 && features.get(0) instanceof HapMapFeature ) { ArrayList hapmapVCs = new ArrayList(features.size()); for ( Object feature : features ) { @@ -151,7 +151,7 @@ public class VariantsToVCF extends RodWalker { } refBase = ref.getBases()[hapmap.getStart() - ref.getWindow().getStart()]; } - VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getVariableName(), hapmap, ref); + VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), hapmap, ref); if ( vc != null ) { if ( refBase != null ) { Map attrs = new HashMap(vc.getAttributes()); @@ -219,10 +219,10 @@ public class VariantsToVCF extends RodWalker { samples.add(sampleName); } else { // try VCF first - samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getVariableName())); + samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); if ( samples.isEmpty() ) { - List rods = tracker.getValues(variants.getVariableName()); + List rods = tracker.getValues(variants.getName()); if ( rods.size() == 0 ) throw new IllegalStateException("No rod data is present"); diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index a0f6426d8..97c7c1714 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -153,7 +153,7 @@ public class ListFileUtils { RMDTriplet triplet = new RMDTriplet(name,type,fileName,storageType,rodBinding.getTags()); // validate triplet type - Class typeFromTribble = builderForValidation.getFeatureCodecClass(triplet); + Class typeFromTribble = builderForValidation.getFeatureClass(triplet); if ( typeFromTribble != null && ! rodBinding.getType().isAssignableFrom(typeFromTribble) ) throw new UserException.BadArgumentValue(rodBinding.getName(), String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s", diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index 88ab0268d..ddd07106c 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -717,6 +717,23 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } + @Test + public void variantContextBindingArgumentTestVCF3() { + final String[] commandLine = new String[] {"-V:vcf3","foo.vcf"}; + + parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); + Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); + } + private class ListRodBindingArgProvider { @Input(shortName="V", required=false) public List> bindings; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 8ae56d93d..7e83e11a6 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -42,7 +42,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variants:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e") ); @@ -55,7 +55,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant:VCF " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variants:VCF " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a") ); From 5dcac7b0643cb03a4666d220441bfb1d8692b2ee Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Wed, 3 Aug 2011 00:24:47 -0400 Subject: [PATCH 086/186] GATKReport v0.2: - Floating point column widths are measured correctly - Using fixed width columns instead of white space separated which allows spaces embedded in cell values - Legacy support for parsing white space separated v0.1 tables where the columns may not be fixed width - Enforcing that table descriptions do not contain newlines so that tables can be parsed correctly Replaced GATKReportTableParser with existing functionality in GATKReport --- public/R/src/gsalib/R/gsa.read.gatkreport.R | 45 ++++++- .../sting/gatk/report/GATKReport.java | 53 ++++++-- .../sting/gatk/report/GATKReportColumn.java | 35 +++++- .../sting/gatk/report/GATKReportColumns.java} | 40 +++--- .../sting/gatk/report/GATKReportParser.java | 83 ------------- .../sting/gatk/report/GATKReportTable.java | 115 +++++++++++++++--- .../gatk/report/GATKReportTableParser.java | 75 ------------ .../sting/gatk/report/GATKReportVersion.java | 70 +++++++++++ .../gatk/walkers/diffengine/DiffEngine.java | 2 +- .../sting/utils/text/TextFormattingUtils.java | 53 ++++++++ .../sting/gatk/report/GATKReportUnitTest.java | 55 +++++++++ .../DiffObjectsIntegrationTest.java | 6 +- .../VariantEvalIntegrationTest.java | 42 +++---- .../VCFStreamingIntegrationTest.java | 2 +- .../text/TextFormattingUtilsUnitTest.java | 88 ++++++++++++++ .../sting/queue/pipeline/PipelineTest.scala | 15 +-- 16 files changed, 532 insertions(+), 247 deletions(-) rename public/java/{test/org/broadinstitute/sting/gatk/report/GATKReportParserUnitTest.java => src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java} (50%) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/report/GATKReportParser.java delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableParser.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/text/TextFormattingUtilsUnitTest.java diff --git a/public/R/src/gsalib/R/gsa.read.gatkreport.R b/public/R/src/gsalib/R/gsa.read.gatkreport.R index 9b3ef1ad1..011b5240d 100644 --- a/public/R/src/gsalib/R/gsa.read.gatkreport.R +++ b/public/R/src/gsalib/R/gsa.read.gatkreport.R @@ -20,6 +20,20 @@ assign(tableName, d, envir=tableEnv); } +# Read a fixed width line of text into a list. +.gsa.splitFixedWidth <- function(line, columnStarts) { + splitStartStop <- function(x) { + x = substring(x, starts, stops); + x = gsub("^[[:space:]]+|[[:space:]]+$", "", x); + x; + } + + starts = c(1, columnStarts); + stops = c(columnStarts - 1, nchar(line)); + + sapply(line, splitStartStop)[,1]; +} + # Load all GATKReport tables from a file gsa.read.gatkreport <- function(filename) { con = file(filename, "r", blocking = TRUE); @@ -31,9 +45,10 @@ gsa.read.gatkreport <- function(filename) { tableName = NA; tableHeader = c(); tableRows = c(); + version = NA; for (line in lines) { - if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { + if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) { headerFields = unlist(strsplit(line, "[[:space:]]+")); if (!is.na(tableName)) { @@ -43,13 +58,37 @@ gsa.read.gatkreport <- function(filename) { tableName = headerFields[2]; tableHeader = c(); tableRows = c(); + + # For differences in versions see + # $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java + if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { + version = "v0.1"; + + } else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) { + version = "v0.2"; + columnStarts = c(); + + } + } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) { # do nothing } else if (!is.na(tableName)) { - row = unlist(strsplit(line, "[[:space:]]+")); + + if (version == "v0.1") { + row = unlist(strsplit(line, "[[:space:]]+")); + + } else if (version == "v0.2") { + if (length(tableHeader) == 0) { + headerChars = unlist(strsplit(line, "")); + # Find the first position of non space characters, excluding the first character + columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1); + } + + row = .gsa.splitFixedWidth(line, columnStarts); + } if (length(tableHeader) == 0) { - tableHeader = row; + tableHeader = row; } else { tableRows = rbind(tableRows, row); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index 59d496828..dc3a617e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -1,21 +1,23 @@ package org.broadinstitute.sting.gatk.report; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; +import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.io.*; +import java.util.List; import java.util.TreeMap; /** * Container class for GATK report tables */ public class GATKReport { - private TreeMap tables; + private TreeMap tables = new TreeMap(); /** * Create a new, empty GATKReport. */ public GATKReport() { - tables = new TreeMap(); } /** @@ -23,7 +25,7 @@ public class GATKReport { * @param filename the path to the file to load */ public GATKReport(String filename) { - loadReport(new File(filename)); + this(new File(filename)); } /** @@ -31,7 +33,6 @@ public class GATKReport { * @param file the file to load */ public GATKReport(File file) { - tables = new TreeMap(); loadReport(file); } @@ -46,11 +47,17 @@ public class GATKReport { GATKReportTable table = null; String[] header = null; int id = 0; + GATKReportVersion version = null; + List columnStarts = null; String line; while ( (line = reader.readLine()) != null ) { - if (line.startsWith("##:GATKReport.v0.1 ")) { - line = line.replaceFirst("##:GATKReport.v0.1 ", ""); + + if (line.startsWith("##:GATKReport.v")) { + + version = GATKReportVersion.fromHeader(line); + + line = line.replaceFirst("##:GATKReport." + version.versionString + " ", ""); String[] pieces = line.split(" : "); String tableName = pieces[0]; @@ -58,14 +65,35 @@ public class GATKReport { addTable(tableName, tableDesc); table = getTable(tableName); + table.setVersion(version); header = null; - } else if ( line.isEmpty() ) { + columnStarts = null; + } else if ( line.trim().isEmpty() ) { // do nothing } else { if (table != null) { + + String[] splitLine; + + switch (version) { + case V0_1: + splitLine = TextFormattingUtils.splitWhiteSpace(line); + break; + + case V0_2: + if (header == null) { + columnStarts = TextFormattingUtils.getWordStarts(line); + } + splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts); + break; + + default: + throw new ReviewedStingException("GATK report version parsing not implemented for: " + line); + } + if (header == null) { - header = line.split("\\s+"); + header = splitLine; table.addPrimaryKey("id", false); @@ -75,10 +103,8 @@ public class GATKReport { id = 0; } else { - String[] entries = line.split("\\s+"); - for (int columnIndex = 0; columnIndex < header.length; columnIndex++) { - table.set(id, header[columnIndex], entries[columnIndex]); + table.set(id, header[columnIndex], splitLine[columnIndex]); } id++; @@ -125,7 +151,10 @@ public class GATKReport { * @return the table object */ public GATKReportTable getTable(String tableName) { - return tables.get(tableName); + GATKReportTable table = tables.get(tableName); + if (table == null) + throw new ReviewedStingException("Table is not in GATKReport: " + tableName); + return table; } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 440597754..1c46b3bac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap { * tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero * values) in the table. * - * @param primaryKey the primary key position in the column that should be set + * @param primaryKey the primary key position in the column that should be retrieved * @return the value at the specified position in the column, or the default value if the element is not set */ - public Object getWithoutSideEffects(Object primaryKey) { + private Object getWithoutSideEffects(Object primaryKey) { if (!this.containsKey(primaryKey)) { return defaultValue; } @@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap { return this.get(primaryKey); } + /** + * Return an object from the column, but if it doesn't exist, return the default value. + * + * @param primaryKey the primary key position in the column that should be retrieved + * @return the string value at the specified position in the column, or the default value if the element is not set + */ + public String getStringValue(Object primaryKey) { + return toString(getWithoutSideEffects(primaryKey)); + } + /** * Return the displayable property of the column. If true, the column will be displayed in the final output. * If not, printing will be suppressed for the contents of the table. @@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap { for (Object obj : this.values()) { if (obj != null) { - int width = obj.toString().length(); + int width = toString(obj).length(); if (width > maxWidth) { maxWidth = width; @@ -77,4 +87,23 @@ public class GATKReportColumn extends TreeMap { return maxWidth; } + + /** + * Returns a string version of the values. + * @param obj The object to convert to a string + * @return The string representation of the column + */ + private static String toString(Object obj) { + String value; + if (obj == null) { + value = "null"; + } else if (obj instanceof Float) { + value = String.format("%.8f", (Float) obj); + } else if (obj instanceof Double) { + value = String.format("%.8f", (Double) obj); + } else { + value = obj.toString(); + } + return value; + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportParserUnitTest.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java similarity index 50% rename from public/java/test/org/broadinstitute/sting/gatk/report/GATKReportParserUnitTest.java rename to public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java index cfd75c41a..a33631c85 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportParserUnitTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java @@ -24,26 +24,32 @@ package org.broadinstitute.sting.gatk.report; -import org.broadinstitute.sting.BaseTest; -import org.testng.Assert; -import org.testng.annotations.Test; +import java.util.*; -import java.io.File; +/** + * Tracks a linked list of GATKReportColumn in order by name. + */ +public class GATKReportColumns extends LinkedHashMap { + private List columnNames = new ArrayList(); -public class GATKReportParserUnitTest extends BaseTest { - @Test - public void testParse() throws Exception { - GATKReportParser parser = new GATKReportParser(); - parser.parse(new File(validationDataLocation + "exampleGATKReport.eval")); + /** + * Returns the column by index + * @param i the index + * @return The column + */ + public GATKReportColumn getByIndex(int i) { + return get(columnNames.get(i)); + } - Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nProcessedLoci"), "100000"); - Assert.assertEquals(parser.getValue("CountVariants", "none.eval.none.all", "nNoCalls"), "99872"); + @Override + public GATKReportColumn remove(Object key) { + columnNames.remove(key); + return super.remove(key); + } - Assert.assertEquals(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC"), "2"); - Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2.bad", "AC")); - Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics", "none.eval.none.novel.ac2", "AC.bad")); - Assert.assertNull(parser.getValue("SimpleMetricsByAC.metrics.bad", "none.eval.none.novel.ac2", "AC")); - - Assert.assertEquals(parser.getValue("ValidationReport", "none.eval.none.known", "sensitivity"), "NaN"); + @Override + public GATKReportColumn put(String key, GATKReportColumn value) { + columnNames.add(key); + return super.put(key, value); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportParser.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportParser.java deleted file mode 100644 index 6915d5cb2..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportParser.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.report; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.broadinstitute.sting.utils.text.XReadLines; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; - -public class GATKReportParser { - private List tables = new ArrayList(); - - public void parse(File file) throws IOException { - InputStream stream = FileUtils.openInputStream(file); - try { - parse(stream); - } finally { - IOUtils.closeQuietly(stream); - } - } - - public void parse(InputStream input) throws IOException { - GATKReportTableParser table = null; - - for (String line: new XReadLines(input)) { - if (line.startsWith("##:GATKReport.v0.1 ")) { - table = newTableParser(line); - tables.add(table); - table.parse(line); - } else if (table != null) { - if (line.trim().length() == 0) - table = null; - else - table.parse(line); - } - } - } - - public String getValue(String tableName, String[] key, String column) { - for (GATKReportTableParser table: tables) - if (table.getTableName().equals(tableName)) - return table.getValue(key, column); - return null; - } - - public String getValue(String tableName, String key, String column) { - for (GATKReportTableParser table: tables) - if (table.getTableName().equals(tableName)) - return table.getValue(key, column); - return null; - } - - private GATKReportTableParser newTableParser(String header) { - return new GATKReportTableParser(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index f7ea25696..5d38295f5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.report; +import org.apache.commons.lang.ObjectUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.PrintStream; @@ -88,17 +89,20 @@ import java.util.regex.Pattern; * but at least the prototype contained herein works. * * @author Kiran Garimella + * @author Khalid Shakir */ public class GATKReportTable { + private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2; private String tableName; private String tableDescription; + private GATKReportVersion version = LATEST_REPORT_VERSION; private String primaryKeyName; private Collection primaryKeyColumn; private boolean primaryKeyDisplay; - boolean sortByPrimaryKey = true; + private boolean sortByPrimaryKey = true; - private LinkedHashMap columns; + private GATKReportColumns columns; /** * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed @@ -113,6 +117,19 @@ public class GATKReportTable { return !m.find(); } + /** + * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed + * + * @param description the name of the table or column + * @return true if the name is valid, false if otherwise + */ + private boolean isValidDescription(String description) { + Pattern p = Pattern.compile("\\r|\\n"); + Matcher m = p.matcher(description); + + return !m.find(); + } + /** * Construct a new GATK report table with the specified name and description * @@ -128,11 +145,23 @@ public class GATKReportTable { throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); } + if (!isValidDescription(tableDescription)) { + throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines."); + } + this.tableName = tableName; this.tableDescription = tableDescription; this.sortByPrimaryKey = sortByPrimaryKey; - columns = new LinkedHashMap(); + columns = new GATKReportColumns(); + } + + public GATKReportVersion getVersion() { + return version; + } + + protected void setVersion(GATKReportVersion version) { + this.version = version; } /** @@ -161,6 +190,57 @@ public class GATKReportTable { primaryKeyDisplay = display; } + /** + * Returns the first primary key matching the dotted column values. + * Ex: dbsnp.eval.called.all.novel.all + * @param dottedColumnValues Period concatenated values. + * @return The first primary key matching the column values or throws an exception. + */ + public Object getPrimaryKey(String dottedColumnValues) { + Object key = findPrimaryKey(dottedColumnValues); + if (key == null) + throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues); + return key; + } + + /** + * Returns true if there is at least on row with the dotted column values. + * Ex: dbsnp.eval.called.all.novel.all + * @param dottedColumnValues Period concatenated values. + * @return true if there is at least one row matching the columns. + */ + public boolean containsPrimaryKey(String dottedColumnValues) { + return findPrimaryKey(dottedColumnValues) != null; + } + + /** + * Returns the first primary key matching the dotted column values. + * Ex: dbsnp.eval.called.all.novel.all + * @param dottedColumnValues Period concatenated values. + * @return The first primary key matching the column values or null. + */ + private Object findPrimaryKey(String dottedColumnValues) { + return findPrimaryKey(dottedColumnValues.split("\\.")); + } + + /** + * Returns the first primary key matching the column values. + * Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" } + * @param columnValues column values. + * @return The first primary key matching the column values. + */ + private Object findPrimaryKey(Object[] columnValues) { + for (Object primaryKey : primaryKeyColumn) { + boolean matching = true; + for (int i = 0; matching && i < columnValues.length; i++) { + matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1)); + } + if (matching) + return primaryKey; + } + return null; + } + /** * Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set. * @@ -230,6 +310,17 @@ public class GATKReportTable { return columns.get(columnName).get(primaryKey); } + /** + * Get a value from the given position in the table + * + * @param primaryKey the primary key value + * @param columnIndex the index of the column + * @return the value stored at the specified position in the table + */ + private Object get(Object primaryKey, int columnIndex) { + return columns.getByIndex(columnIndex).get(primaryKey); + } + /** * Increment an element in the table. This implementation is awful - a functor would probably be better. * @@ -515,7 +606,7 @@ public class GATKReportTable { String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s"; // Emit the table definition - out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription); + out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription); // Emit the table header, taking into account the padding requirement if the primary key is a hidden column boolean needsPadding = false; @@ -545,22 +636,8 @@ public class GATKReportTable { for (String columnName : columns.keySet()) { if (columns.get(columnName).isDisplayable()) { - Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey); - if (needsPadding) { out.printf(" "); } - - String value = "null"; - if (obj != null) { - if (obj instanceof Float) { - value = String.format("%.8f", (Float) obj); - } else if (obj instanceof Double) { - value = String.format("%.8f", (Double) obj); - } else { - value = obj.toString(); - } - } - - //out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString()); + String value = columns.get(columnName).getStringValue(primaryKey); out.printf(columnWidths.get(columnName), value); needsPadding = true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableParser.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableParser.java deleted file mode 100644 index 6fd9f9627..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTableParser.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.report; - -import org.apache.commons.lang.StringUtils; - -import java.util.*; - -public class GATKReportTableParser { - private int lineNum = 0; - private String[] descriptions; - private Map headers = new HashMap(); - private List values = new ArrayList(); - - public void parse(String line) { - lineNum++; - switch (lineNum) { - case 1: - descriptions = parseLine(line); - case 2: - String[] columnHeaders = parseLine(line); - for (int i = 0; i < columnHeaders.length; i++) - headers.put(columnHeaders[i], i); - default: - values.add(parseLine(line)); - } - } - - public String getTableName() { - return descriptions[1]; - } - - public String getValue(String[] key, String column) { - if (!headers.containsKey(column)) - return null; - for (String[] row: values) - if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1))) - return row[headers.get(column)]; - return null; - } - - public String getValue(String key, String column) { - return getValue(key.split("\\."), column); - } - - private String generateKey(String[] row, int i) { - return StringUtils.join(row, ".", 0, i); - } - - private String[] parseLine(String line) { - return line.split(" +"); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java new file mode 100644 index 000000000..5f1159a43 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.report; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +public enum GATKReportVersion { + /** + * Differences between other versions: + * - Does not allow spaces in cells. + * - Mostly fixed width but has a bug where the string width of floating point + * values was not measured correctly leading to columns that aren't aligned + */ + V0_1("v0.1"), + + /** + * Differences between other versions: + * - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6". + * - Fixed width fixed for floating point values + */ + V0_2("v0.2"); + + public final String versionString; + + private GATKReportVersion(String versionString) { + this.versionString = versionString; + } + + @Override + public String toString() { + return versionString; + } + + /** + * Returns the GATK Report Version from the file header. + * @param header Header from the file starting with ##:GATKReport.v[version] + * @return The version as an enum. + */ + public static GATKReportVersion fromHeader(String header) { + if (header.startsWith("##:GATKReport.v0.1 ")) + return GATKReportVersion.V0_1; + + if (header.startsWith("##:GATKReport.v0.2 ")) + return GATKReportVersion.V0_2; + + throw new ReviewedStingException("Unknown GATK report version in header: " + header); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java index 4a4f6f6af..4e3342609 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java @@ -235,7 +235,7 @@ public class DiffEngine { // now that we have a specific list of values we want to show, display them GATKReport report = new GATKReport(); final String tableName = "diffences"; - report.addTable(tableName, "Summarized differences between the master and test files.\nSee http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false); + report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", false); GATKReportTable table = report.getTable(tableName); table.addPrimaryKey("Difference", true); table.addColumn("NumberOfOccurrences", 0); diff --git a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java index 1d4251542..3159f3fb7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/TextFormattingUtils.java @@ -116,4 +116,57 @@ public class TextFormattingUtils { return bundle; } + + /** + * Returns the word starting positions within line, excluding the first position 0. + * The returned list is compatible with splitFixedWidth. + * @param line Text to parse. + * @return the word starting positions within line, excluding the first position 0. + */ + public static List getWordStarts(String line) { + if (line == null) + throw new ReviewedStingException("line is null"); + List starts = new ArrayList(); + int stop = line.length(); + for (int i = 1; i < stop; i++) + if (Character.isWhitespace(line.charAt(i-1))) + if(!Character.isWhitespace(line.charAt(i))) + starts.add(i); + return starts; + } + + /** + * Parses a fixed width line of text. + * @param line Text to parse. + * @param columnStarts the column starting positions within line, excluding the first position 0. + * @return The parsed string array with each entry trimmed. + */ + public static String[] splitFixedWidth(String line, List columnStarts) { + if (line == null) + throw new ReviewedStingException("line is null"); + if (columnStarts == null) + throw new ReviewedStingException("columnStarts is null"); + int startCount = columnStarts.size(); + String[] row = new String[startCount + 1]; + if (startCount == 0) { + row[0] = line.trim(); + } else { + row[0] = line.substring(0, columnStarts.get(0)).trim(); + for (int i = 1; i < startCount; i++) + row[i] = line.substring(columnStarts.get(i - 1), columnStarts.get(i)).trim(); + row[startCount] = line.substring(columnStarts.get(startCount - 1)).trim(); + } + return row; + } + + /** + * Parses a line of text by whitespace. + * @param line Text to parse. + * @return The parsed string array. + */ + public static String[] splitWhiteSpace(String line) { + if (line == null) + throw new ReviewedStingException("line is null"); + return line.trim().split("\\s+"); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java new file mode 100644 index 000000000..02e1ba99a --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.report; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class GATKReportUnitTest extends BaseTest { + @Test + public void testParse() throws Exception { + String reportPath = validationDataLocation + "exampleGATKReport.eval"; + GATKReport report = new GATKReport(reportPath); + + GATKReportTable countVariants = report.getTable("CountVariants"); + Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1); + Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all"); + Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000"); + Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872"); + + GATKReportTable validationReport = report.getTable("ValidationReport"); + Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1); + Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known"); + Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN"); + + GATKReportTable simpleMetricsByAC = report.getTable("SimpleMetricsByAC.metrics"); + Assert.assertEquals(simpleMetricsByAC.getVersion(), GATKReportVersion.V0_1); + Object simpleMetricsByACPK = simpleMetricsByAC.getPrimaryKey("none.eval.none.novel.ac2"); + Assert.assertEquals(simpleMetricsByAC.get(simpleMetricsByACPK, "AC"), "2"); + + Assert.assertFalse(simpleMetricsByAC.containsPrimaryKey("none.eval.none.novel.ac2.bad")); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java index 77159d9c2..f9aaaecc1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -30,8 +30,6 @@ import org.testng.annotations.Test; import java.io.File; import java.util.Arrays; -import java.util.Collections; -import java.util.List; public class DiffObjectsIntegrationTest extends WalkerTest { private class TestParams extends TestDataProvider { @@ -52,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest { @DataProvider(name = "data") public Object[][] createData() { - new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "4d9f4636de05b93c354d05011264546e"); - new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "37e6efd833b5cd6d860a9df3df9713fc"); + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "92311de76dda3f38aac289d807ef23d0"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "0c69412c385fda50210f2a612e1ffe4a"); return TestParams.getTests(TestParams.class); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 23c606ad0..3eeabdc5b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -4,8 +4,6 @@ import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; public class VariantEvalIntegrationTest extends WalkerTest { private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; @@ -45,7 +43,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2") + Arrays.asList("bced1842c78fbabb089dd12b7087050d") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @@ -66,7 +64,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525") + Arrays.asList("06510bd37ffaa39e817ca0dcaf8f8ac2") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -88,7 +86,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd") + Arrays.asList("19c5b1b6396921c5b1059a2849ae4fcc") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -109,7 +107,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("677fe398643e62a10d6739d36a720a12") + Arrays.asList("a71f8d81cf166cd97ac628092650964a") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -130,7 +128,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd") + Arrays.asList("4dabe0658232f6174188515db6dfe112") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -151,7 +149,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2") + Arrays.asList("3340587f10ceff83e5567ddfd1a9a60e") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @@ -172,7 +170,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9") + Arrays.asList("c730c7ee31c8138cef6efd8dd04fbbfc") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @@ -195,7 +193,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8") + Arrays.asList("2559ca8f454b03e81561f6947f79df18") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @@ -220,7 +218,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa") + Arrays.asList("23aa5f97641d2fd033095f21c51d2f37") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @@ -239,7 +237,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("d44c8f44384189a09eea85a8e89d7299") + Arrays.asList("a69dd3f06903b3f374c6d6f010c653e0") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -249,7 +247,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000"; for (String tests : testsEnumerations) { WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("cdbe47ea01b9dd79ff1c5ce6f5fa8bec")); + 1, Arrays.asList("db95c8af8ba549d38ca6741a59fd6892")); executeTestParallel("testSelect1", spec); } } @@ -260,14 +258,14 @@ public class VariantEvalIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, - Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1")); + Arrays.asList("96f27163f16bb945f19c6623cd6db34e")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); } @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d1932be3748fcf6da77dc51aec323710")); executeTestParallel("testCompVsEvalAC",spec); } @@ -278,14 +276,14 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testTranches() { String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe")); executeTestParallel("testTranches",spec); } @Test public void testCompOverlap() { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("462d4784dd55294ef9d5118217b157a5")); executeTestParallel("testCompOverlap",spec); } @@ -299,7 +297,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -D " + dbsnp + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("5b1fc9a4066aca61f1b5f7b933ad37d9")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("61c36fb6cc75172e2b22a44edeae85e0")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -313,7 +311,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6d902d9d4d8fef5219a43e416a51cee6")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("79089484097614b7ab81bbc3ad3a892a")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -330,13 +328,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("55a1c53bced20701c56accfc3eb782a7")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9f906c04a4553d649b51ae67e0a25113")); executeTestParallel("testMultipleCompTracks",spec); } @Test public void testPerSampleAndSubsettedSampleHaveSameResults() { - String md5 = "454a1750fd36525f24172b21af5f49de"; + String md5 = "97a16a99a43d2384cfabc39d36647419"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( @@ -391,7 +389,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371") + Arrays.asList("44464fe7c89a56cf128a932ef640f7da") ); executeTest("testAlleleCountStrat", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java index d7efe4212..d396e5167 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java @@ -98,7 +98,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { " -EV CompOverlap -noEV -noST" + " -o %s", 1, - Arrays.asList("f60729c900bc8368717653b3fad80d1e") //"f60729c900bc8368717653b3fad80d1e" + Arrays.asList("ea09bf764adba9765b99921c5ba2c709") ); executeTest("testVCFStreamingChain", selectTestSpec); diff --git a/public/java/test/org/broadinstitute/sting/utils/text/TextFormattingUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/text/TextFormattingUtilsUnitTest.java new file mode 100644 index 000000000..45a618f71 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/text/TextFormattingUtilsUnitTest.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.text; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; + +public class TextFormattingUtilsUnitTest extends BaseTest { + @Test(expectedExceptions = ReviewedStingException.class) + public void testSplitWhiteSpaceNullLine() { + TextFormattingUtils.splitWhiteSpace(null); + } + + @Test + public void testSplitWhiteSpace() { + Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz"), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz"), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz"), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitWhiteSpace(" foo bar baz "), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("foo bar baz "), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitWhiteSpace("\tfoo\tbar\tbaz\t"), new String[]{"foo", "bar", "baz"}); + } + + @Test(expectedExceptions = ReviewedStingException.class) + public void testGetWordStartsNullLine() { + TextFormattingUtils.getWordStarts(null); + } + + @Test + public void testGetWordStarts() { + Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz"), Arrays.asList(4, 8)); + Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz"), Arrays.asList(5, 10)); + Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz"), Arrays.asList(1, 5, 9)); + Assert.assertEquals(TextFormattingUtils.getWordStarts(" foo bar baz "), Arrays.asList(1, 5, 9)); + Assert.assertEquals(TextFormattingUtils.getWordStarts("foo bar baz "), Arrays.asList(4, 8)); + Assert.assertEquals(TextFormattingUtils.getWordStarts("\tfoo\tbar\tbaz\t"), Arrays.asList(1, 5, 9)); + } + + @Test(expectedExceptions = ReviewedStingException.class) + public void testSplitFixedWidthNullLine() { + TextFormattingUtils.splitFixedWidth(null, Collections.emptyList()); + } + + @Test(expectedExceptions = ReviewedStingException.class) + public void testSplitFixedWidthNullColumnStarts() { + TextFormattingUtils.splitFixedWidth("foo bar baz", null); + } + + @Test + public void testSplitFixedWidth() { + Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz", Arrays.asList(5, 10)), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" foo bar baz ", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitFixedWidth("foo bar baz ", Arrays.asList(4, 8)), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitFixedWidth("\tfoo\tbar\tbaz\t", Arrays.asList(5, 9)), new String[] { "foo", "bar", "baz" }); + Assert.assertEquals(TextFormattingUtils.splitFixedWidth("f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" }); + Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" f o b r b z", Arrays.asList(4, 8)), new String[] { "f o", "b r", "b z" }); + Assert.assertEquals(TextFormattingUtils.splitFixedWidth(" f o b r b z", Arrays.asList(4, 8)), new String[] { "f", "o b", "r b z" }); + } +} diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index c2c956118..27ac559c5 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -34,8 +34,8 @@ import org.broadinstitute.sting.BaseTest import org.broadinstitute.sting.MD5DB import org.broadinstitute.sting.queue.QCommandLine import org.broadinstitute.sting.queue.util.{Logging, ProcessController} -import java.io.{FileNotFoundException, File} -import org.broadinstitute.sting.gatk.report.GATKReportParser +import java.io.File +import org.broadinstitute.sting.gatk.report.GATKReport import org.apache.commons.io.FileUtils import org.broadinstitute.sting.queue.engine.CommandLinePluginManager @@ -118,12 +118,11 @@ object PipelineTest extends BaseTest with Logging { // write the report to the shared validation data location val formatter = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss") val reportLocation = "%s%s/%s/validation.%s.eval".format(validationReportsDataLocation, jobRunner, name, formatter.format(new Date)) - val report = new File(reportLocation) + val reportFile = new File(reportLocation) - FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), report); + FileUtils.copyFile(new File(runDir(name, jobRunner) + evalSpec.evalReport), reportFile); - val parser = new GATKReportParser - parser.parse(report) + val report = new GATKReport(reportFile); var allInRange = true @@ -131,7 +130,9 @@ object PipelineTest extends BaseTest with Logging { println(name + " validation values:") println(" value (min,target,max) table key metric") for (validation <- evalSpec.validations) { - val value = parser.getValue(validation.table, validation.key, validation.metric) + val table = report.getTable(validation.table) + val key = table.getPrimaryKey(validation.key) + val value = String.valueOf(table.get(key, validation.metric)) val inRange = if (value == null) false else validation.inRange(value) val flag = if (!inRange) "*" else " " println(" %s %s (%s,%s,%s) %s %s %s".format(flag, value, validation.min, validation.target, validation.max, validation.table, validation.key, validation.metric)) From a587f3880814c64adad697e47d3640bb6e191d28 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Wed, 3 Aug 2011 02:21:01 -0400 Subject: [PATCH 087/186] Fixed example unified genotyper pipeline to wrap filter expressions with quotes and use rod binding name "variant" instead of "vcf". --- .../queue/qscripts/examples/ExampleUnifiedGenotyper.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala index 4a93233eb..1d473b210 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala @@ -59,10 +59,10 @@ class ExampleUnifiedGenotyper extends QScript { evalUnfiltered.rodBind :+= RodBind("eval", "VCF", genotyper.out) evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval") - variantFilter.rodBind :+= RodBind("vcf", "VCF", genotyper.out) + variantFilter.rodBind :+= RodBind("variant", "VCF", genotyper.out) variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf") variantFilter.filterName = filterNames - variantFilter.filterExpression = filterExpressions + variantFilter.filterExpression = filterExpressions.map("\"" + _ + "\"") evalFiltered.rodBind :+= RodBind("eval", "VCF", variantFilter.out) evalFiltered.out = swapExt(variantFilter.out, "vcf", "eval") From d9bc673ff2f8964a2747115dbe8d4e16bf1bac80 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 09:42:43 -0400 Subject: [PATCH 088/186] Fixed bad constructor in RMDTUnitTest --- .../sting/gatk/refdata/RefMetaDataTrackerUnitTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java index 85c5b3f73..afb6e418a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -225,7 +225,7 @@ public class RefMetaDataTrackerUnitTest { RefMetaDataTracker tracker = test.makeTracker(); for ( String nameAsString : Arrays.asList("A", "B") ) { - RodBinding binding = new RodBinding(Feature.class, nameAsString, "none", new Tags()); + RodBinding binding = new RodBinding(Feature.class, nameAsString, "none", "vcf", new Tags()); List v1 = tracker.getValues(binding); testGetter(nameAsString, v1, test.expected(nameAsString), true, tracker); @@ -246,8 +246,8 @@ public class RefMetaDataTrackerUnitTest { RefMetaDataTracker tracker = test.makeTracker(); String nameAsString = "A+B"; - RodBinding A = new RodBinding(Feature.class, "A", "none", new Tags()); - RodBinding B = new RodBinding(Feature.class, "B", "none", new Tags()); + RodBinding A = new RodBinding(Feature.class, "A", "none", "vcf", new Tags()); + RodBinding B = new RodBinding(Feature.class, "B", "none", "vcf", new Tags()); List> binding = Arrays.asList(A, B); List v1 = tracker.getValues(binding); From 7c89fe01b3f36804ec36d0f310a54cff3451dc75 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 3 Aug 2011 11:00:36 -0400 Subject: [PATCH 089/186] Instead of having the padded reference base be some hackish attribute it is now an actual variable in the Variant Context class. More importantly, we now always require that it be present when padding is necessary - and validate as such upon construction of the VC. This cleans up the interface significantly because we no longer require that a reference base be passed in when writing a VC/VCF record. --- .../gatk/io/storage/VCFWriterStorage.java | 6 +- .../sting/gatk/io/stubs/VCFWriterStub.java | 4 +- .../gatk/refdata/VariantContextAdaptors.java | 15 ++- .../walkers/annotator/VariantAnnotator.java | 4 +- .../beagle/BeagleOutputToVCFWalker.java | 4 +- .../beagle/ProduceBeagleInputWalker.java | 6 +- .../VariantsToBeagleUnphasedWalker.java | 2 +- .../filters/VariantFiltrationWalker.java | 2 +- .../walkers/genotyper/UGCalcLikelihoods.java | 4 +- .../walkers/genotyper/UGCallVariants.java | 2 +- .../walkers/genotyper/UnifiedGenotyper.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 11 +- .../walkers/genotyper/VariantCallContext.java | 11 -- .../indels/SomaticIndelDetectorWalker.java | 8 +- .../phasing/MergeAndMatchHaplotypes.java | 2 +- ...eSegregatingAlternateAllelesVCFWriter.java | 20 ++-- .../walkers/phasing/PhaseByTransmission.java | 2 +- .../sting/gatk/walkers/phasing/WriteVCF.java | 12 +- .../ApplyRecalibration.java | 4 +- .../walkers/variantutils/CombineVariants.java | 6 +- .../variantutils/FilterLiftedVariants.java | 2 +- .../variantutils/LeftAlignVariants.java | 19 ++-- .../variantutils/LiftoverVariants.java | 4 +- .../variantutils/RandomlySplitVariants.java | 4 +- .../walkers/variantutils/SelectVariants.java | 26 ++--- .../VariantValidationAssessor.java | 18 ++- .../walkers/variantutils/VariantsToTable.java | 8 +- .../walkers/variantutils/VariantsToVCF.java | 9 +- .../utils/codecs/vcf/AbstractVCFCodec.java | 3 +- .../codecs/vcf/SortingVCFWriterBase.java | 13 +-- .../utils/codecs/vcf/StandardVCFWriter.java | 12 +- .../sting/utils/codecs/vcf/VCFWriter.java | 2 +- .../variantcontext/MutableVariantContext.java | 6 +- .../utils/variantcontext/VariantContext.java | 107 ++++++++++-------- .../variantcontext/VariantContextUtils.java | 12 +- .../CombineVariantsIntegrationTest.java | 2 +- .../codecs/vcf/IndexFactoryUnitTest.java | 2 +- .../utils/genotype/vcf/VCFWriterUnitTest.java | 4 +- .../VariantContextIntegrationTest.java | 16 +-- 39 files changed, 180 insertions(+), 216 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java index 1da03e9c2..ebb4cbe66 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java @@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage, VCFWriter { writer.writeHeader(stub.getVCFHeader()); } - public void add(VariantContext vc, byte ref) { - writer.add(vc, ref); + public void add(VariantContext vc) { + writer.add(vc); } /** @@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage, VCFWriter { BasicFeatureSource source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false); for ( VariantContext vc : source.iterator() ) { - target.writer.add(vc, vc.getReferenceBaseForIndel()); + target.writer.add(vc); } source.close(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index bb84f9457..7a110fde5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -192,8 +192,8 @@ public class VCFWriterStub implements Stub, VCFWriter { /** * @{inheritDoc} */ - public void add(VariantContext vc, byte ref) { - outputTracker.getStorage(this).add(vc,ref); + public void add(VariantContext vc) { + outputTracker.getStorage(this).add(vc); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index ba9a10d8b..1c451575b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -127,14 +127,13 @@ public class VariantContextAdaptors { Map attributes = new HashMap(); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); - if ( sawNullAllele ) { - int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; - if ( index < 0 ) - return null; // we weren't given enough reference context to create the VariantContext - attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index])); - } - Collection genotypes = null; - VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); + int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; + if ( index < 0 ) + return null; // we weren't given enough reference context to create the VariantContext + Byte refBaseForIndel = new Byte(ref.getBases()[index]); + + Map genotypes = null; + VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel); return vc; } else return null; // can't handle anything else diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index caaa371a6..d39912ed2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -225,12 +225,12 @@ public class VariantAnnotator extends RodWalker { if ( ! indelsOnly ) { for ( VariantContext annotatedVC : annotatedVCs ) - vcfWriter.add(annotatedVC, ref.getBase()); + vcfWriter.add(annotatedVC); } else { // check to see if the buffered context is different (in location) this context if ( indelBufferContext != null && ! VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(getToolkit().getGenomeLocParser(),annotatedVCs.iterator().next())) ) { for ( VariantContext annotatedVC : indelBufferContext ) - vcfWriter.add(annotatedVC, ref.getBase()); + vcfWriter.add(annotatedVC); indelBufferContext = annotatedVCs; } else { indelBufferContext = annotatedVCs; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 21c8ec430..d0bc59fbd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -127,7 +127,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { return 0; if (vc_input.isFiltered()) { - vcfWriter.add(vc_input, ref.getBase()); + vcfWriter.add(vc_input); return 1; } List r2rods = tracker.getReferenceMetaData(R2_ROD_NAME); @@ -333,7 +333,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { } - vcfWriter.add(VariantContext.modifyAttributes(filteredVC,attributes), ref.getBase()); + vcfWriter.add(VariantContext.modifyAttributes(filteredVC,attributes)); return 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 3eed12992..2fc0d2368 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -171,20 +171,20 @@ public class ProduceBeagleInputWalker extends RodWalker { logger.debug(String.format("boot: %d, test: %d, total: %d", bootstrapSetSize, testSetSize, bootstrapSetSize+testSetSize+1)); if ( (bootstrapSetSize+1.0)/(1.0+bootstrapSetSize+testSetSize) <= bootstrap ) { if ( bootstrapVCFOutput != null ) { - bootstrapVCFOutput.add(VariantContext.modifyFilters(validation, BOOTSTRAP_FILTER), ref.getBase() ); + bootstrapVCFOutput.add(VariantContext.modifyFilters(validation, BOOTSTRAP_FILTER)); } bootstrapSetSize++; return true; } else { if ( bootstrapVCFOutput != null ) { - bootstrapVCFOutput.add(validation,ref.getBase()); + bootstrapVCFOutput.add(validation); } testSetSize++; return false; } } else { if ( validation != null && bootstrapVCFOutput != null ) { - bootstrapVCFOutput.add(validation,ref.getBase()); + bootstrapVCFOutput.add(validation); } return false; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index f6cd1d636..5d716bed4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -110,7 +110,7 @@ public class VariantsToBeagleUnphasedWalker extends RodWalker // if we are holding it back and we are writing a bootstrap VCF, write it out if ( makeMissing && bootstrapVCFOutput != null ) { - bootstrapVCFOutput.add(vc, ref.getBase()); + bootstrapVCFOutput.add(vc); } // regardless, all sites are written to the unphased genotypes file, marked as missing if appropriate diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 6c023573a..2507eabbb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -272,7 +272,7 @@ public class VariantFiltrationWalker extends RodWalker { else filteredVC = new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes()); - writer.add( filteredVC, context.getReferenceContext().getBase() ); + writer.add(filteredVC); } public Integer reduce(Integer value, Integer sum) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java index 22c3081a3..e5e78905f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java @@ -93,7 +93,7 @@ public class UGCalcLikelihoods extends LocusWalker public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { VariantContext call = UG_engine.calculateLikelihoods(tracker, refContext, rawContext); - return call == null ? null : new VariantCallContext(call, refContext.getBase(), true); + return call == null ? null : new VariantCallContext(call, true); } public Integer reduceInit() { return 0; } @@ -107,7 +107,7 @@ public class UGCalcLikelihoods extends LocusWalker return sum; try { - writer.add(value, value.refBase); + writer.add(value); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index a3b9f379e..fd29ff87e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -115,7 +115,7 @@ public class UGCallVariants extends RodWalker { try { Map attrs = new HashMap(value.getAttributes()); VariantContextUtils.calculateChromosomeCounts(value, attrs, true); - writer.add(VariantContext.modifyAttributes(value, attrs), value.refBase); + writer.add(VariantContext.modifyAttributes(value, attrs)); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name"); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index c673f7b3b..d379b05a1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -227,7 +227,7 @@ public class UnifiedGenotyper extends LocusWalker GLs) { @@ -300,7 +300,8 @@ public class UnifiedGenotyperEngine { genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, - null); + null, + refContext.getBase()); } // private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine @@ -425,7 +426,7 @@ public class UnifiedGenotyperEngine { myAlleles.add(vc.getReference()); } VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc, - myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes); + myAlleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes, refContext.getBase()); if ( annotationEngine != null ) { // first off, we want to use the *unfiltered* and *unBAQed* context for the annotations @@ -439,9 +440,7 @@ public class UnifiedGenotyperEngine { vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall); } - VariantCallContext call = new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); - call.setRefBase(refContext.getBase()); - return call; + return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } private int calculateEndPos(Set alleles, Allele refAllele, GenomeLoc loc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java index 5896e784e..423c80112 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/VariantCallContext.java @@ -36,7 +36,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; * Useful helper class to communicate the results of calculateGenotype to framework */ public class VariantCallContext extends VariantContext { - public byte refBase; // Was the site called confidently, either reference or variant? public boolean confidentlyCalled = false; @@ -55,16 +54,6 @@ public class VariantCallContext extends VariantContext { this.shouldEmit = shouldEmit; } - VariantCallContext(VariantContext vc, byte ref, boolean confidentlyCalledP) { - super(vc); - this.refBase = ref; - this.confidentlyCalled = confidentlyCalledP; - } - - public void setRefBase(byte ref) { - this.refBase = ref; - } - /* these methods are only implemented for GENOTYPE_GIVEN_ALLELES MODE */ //todo -- expand these methods to all modes diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 443e6e9f2..3e3ee7364 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -1033,8 +1033,8 @@ public class SomaticIndelDetectorWalker extends ReadWalker { filters.add("NoCall"); } VariantContext vc = new VariantContext("IGv2_Indel_call", refName, start, stop, alleles, genotypes, - -1.0 /* log error */, filters, null); - vcf.add(vc,refBases[(int)start-1]); + -1.0 /* log error */, filters, null, refBases[(int)start-1]); + vcf.add(vc); } /** Fills l with appropriate alleles depending on whether call is insertion or deletion @@ -1130,8 +1130,8 @@ public class SomaticIndelDetectorWalker extends ReadWalker { } VariantContext vc = new VariantContext("IGv2_Indel_call", refName, start, stop, alleles, genotypes, - -1.0 /* log error */, filters, attrs); - vcf.add(vc,refBases[(int)start-1]); + -1.0 /* log error */, filters, attrs, refBases[(int)start-1]); + vcf.add(vc); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java index 298d8d6c8..83216d214 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypes.java @@ -91,7 +91,7 @@ public class MergeAndMatchHaplotypes extends RodWalker { } VariantContext newvc = new VariantContext(SOURCE_NAME, pbt.getChr(), pbt.getStart(), pbt.getStart(), pbt.getAlleles(), genotypes, pbt.getNegLog10PError(), pbt.getFilters(), pbt.getAttributes()); - vcfWriter.add(newvc, ref.getBase()); + vcfWriter.add(newvc); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java index b0491a281..53cfaa3a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesVCFWriter.java @@ -118,7 +118,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { innerWriter.close(); } - public void add(VariantContext vc, byte refBase) { + public void add(VariantContext vc) { if (useSingleSample != null) { // only want to output context for one sample Genotype sampGt = vc.getGenotype(useSingleSample); if (sampGt != null) // TODO: subContextFromGenotypes() does not handle any INFO fields [AB, HaplotypeScore, MQ, etc.]. Note that even SelectVariants.subsetRecord() only handles AC,AN,AF, and DP! @@ -138,11 +138,11 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { if (curVcIsNotFiltered) { // still need to wait before can release vc logger.debug("Waiting for new variant " + VariantContextUtils.getLocation(genomeLocParser, vc)); - vcfrWaitingToMerge = new VCFRecord(vc, refBase, false); + vcfrWaitingToMerge = new VCFRecord(vc, false); } else if (!emitOnlyMergedRecords) { // filtered records are never merged logger.debug("DIRECTLY output " + VariantContextUtils.getLocation(genomeLocParser, vc)); - innerWriter.add(vc, refBase); + innerWriter.add(vc); } } else { // waiting to merge vcfrWaitingToMerge @@ -151,7 +151,7 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { if (!curVcIsNotFiltered) { if (!emitOnlyMergedRecords) { // filtered records are never merged logger.debug("Caching unprocessed output " + VariantContextUtils.getLocation(genomeLocParser, vc)); - filteredVcfrList.add(new VCFRecord(vc, refBase, false)); + filteredVcfrList.add(new VCFRecord(vc, false)); } } else { // waiting to merge vcfrWaitingToMerge, and curVcIsNotFiltered. So, attempt to merge them: @@ -188,14 +188,14 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { addedAttribs.putAll(mergedVc.getAttributes()); mergedVc = VariantContext.modifyAttributes(mergedVc, addedAttribs); - vcfrWaitingToMerge = new VCFRecord(mergedVc, vcfrWaitingToMerge.refBase, true); + vcfrWaitingToMerge = new VCFRecord(mergedVc, true); numMergedRecords++; } } if (!mergedRecords) { stopWaitingToMerge(); - vcfrWaitingToMerge = new VCFRecord(vc, refBase, false); + vcfrWaitingToMerge = new VCFRecord(vc, false); } logger.debug("Merged? = " + mergedRecords); } @@ -210,11 +210,11 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { } if (!emitOnlyMergedRecords || vcfrWaitingToMerge.resultedFromMerge) - innerWriter.add(vcfrWaitingToMerge.vc, vcfrWaitingToMerge.refBase); + innerWriter.add(vcfrWaitingToMerge.vc); vcfrWaitingToMerge = null; for (VCFRecord vcfr : filteredVcfrList) - innerWriter.add(vcfr.vc, vcfr.refBase); + innerWriter.add(vcfr.vc); filteredVcfrList.clear(); } @@ -257,12 +257,10 @@ public class MergeSegregatingAlternateAllelesVCFWriter implements VCFWriter { private static class VCFRecord { public VariantContext vc; - public byte refBase; public boolean resultedFromMerge; - public VCFRecord(VariantContext vc, byte refBase, boolean resultedFromMerge) { + public VCFRecord(VariantContext vc, boolean resultedFromMerge) { this.vc = vc; - this.refBase = refBase; this.resultedFromMerge = resultedFromMerge; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index cf4afbb6d..992e4d9d3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -312,7 +312,7 @@ public class PhaseByTransmission extends RodWalker { VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap); - vcfWriter.add(newvc, ref.getBase()); + vcfWriter.add(newvc); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java index 2851ace0d..c10eaa2da 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/WriteVCF.java @@ -25,20 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.phasing; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; -import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; public class WriteVCF { public static void writeVCF(VariantContext vc, VCFWriter writer, Logger logger) { - byte refBase; - if (!vc.isIndel()) { - Allele refAllele = vc.getReference(); - refBase = SNPallelePair.getSingleBase(refAllele); - } - else { - refBase = vc.getReferenceBaseForIndel(); - } - - writer.add(vc, refBase); + writer.add(vc); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index b195fd35f..33504f96e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -204,9 +204,9 @@ public class ApplyRecalibration extends RodWalker { filters.add(filterString); vc = VariantContext.modifyFilters(vc, filters); } - vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() ); + vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs) ); } else { // valid VC but not compatible with this mode, so just emit the variant untouched - vcfWriter.add( vc, ref.getBase() ); + vcfWriter.add( vc ); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 9c2a520ef..57e2746f3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -158,7 +158,7 @@ public class CombineVariants extends RodWalker { if ( ASSUME_IDENTICAL_SAMPLES ) { for ( final VariantContext vc : vcs ) { - vcfWriter.add( vc, ref.getBase() ); + vcfWriter.add(vc); } return vcs.isEmpty() ? 0 : 1; @@ -183,7 +183,7 @@ public class CombineVariants extends RodWalker { if ( VCsByType.containsKey(type) ) mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, - ref.getBase(), SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); + SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } } @@ -198,7 +198,7 @@ public class CombineVariants extends RodWalker { VariantContext annotatedMergedVC = VariantContext.modifyAttributes(mergedVC, attributes); if ( minimalVCF ) annotatedMergedVC = VariantContextUtils.pruneVariantContext(annotatedMergedVC, Arrays.asList(SET_KEY)); - vcfWriter.add(annotatedMergedVC, ref.getBase()); + vcfWriter.add(annotatedMergedVC); } return vcs.isEmpty() ? 0 : 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index b45ee1b67..fc9947e20 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -78,7 +78,7 @@ public class FilterLiftedVariants extends RodWalker { if ( failed ) failedLocs++; else - writer.add(vc, ref[0]); + writer.add(vc); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 2ebd183f4..5ff3921de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -93,7 +93,7 @@ public class LeftAlignVariants extends RodWalker { if ( vc.isBiallelic() && vc.isIndel() ) return writeLeftAlignedIndel(vc, ref); else { - writer.add(vc, ref.getBase()); + writer.add(vc); return 0; } } @@ -109,7 +109,7 @@ public class LeftAlignVariants extends RodWalker { indelLength = vc.getAlternateAllele(0).length(); if ( indelLength > 200 ) { - writer.add(vc, ref.getBase()); + writer.add(vc); return 0; } @@ -137,17 +137,12 @@ public class LeftAlignVariants extends RodWalker { byte[] newBases = new byte[indelLength]; System.arraycopy((vc.isDeletion() ? refSeq : originalIndel), indelIndex, newBases, 0, indelLength); Allele newAllele = Allele.create(newBases, vc.isDeletion()); - newVC = updateAllele(newVC, newAllele); + newVC = updateAllele(newVC, newAllele, refSeq[indelIndex-1]); - // we need to update the reference base just in case it changed - Map attrs = new HashMap(newVC.getAttributes()); - attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refSeq[indelIndex-1]); - newVC = VariantContext.modifyAttributes(newVC, attrs); - - writer.add(newVC, refSeq[indelIndex-1]); + writer.add(newVC); return 1; } else { - writer.add(vc, ref.getBase()); + writer.add(vc); return 0; } } @@ -173,7 +168,7 @@ public class LeftAlignVariants extends RodWalker { return hap; } - public static VariantContext updateAllele(VariantContext vc, Allele newAllele) { + public static VariantContext updateAllele(VariantContext vc, Allele newAllele, Byte refBaseForIndel) { // create a mapping from original allele to new allele HashMap alleleMap = new HashMap(vc.getAlleles().size()); if ( newAllele.isReference() ) { @@ -197,6 +192,6 @@ public class LeftAlignVariants extends RodWalker { newGenotypes.put(genotype.getKey(), Genotype.modifyAlleles(genotype.getValue(), newAlleles)); } - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes()); + return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), alleleMap.values(), newGenotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), refBaseForIndel); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 4f05c8aac..b33f4d26a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -125,14 +125,14 @@ public class LiftoverVariants extends RodWalker { vc = VariantContext.modifyAttributes(vc, attrs); } - VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false); + VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, false); if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) { logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s", originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(), originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0))); } - writer.add(vc, ref.getBase()); + writer.add(vc); successfulIntervals++; } else { failedIntervals++; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index f0756d884..257bda372 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -101,9 +101,9 @@ public class RandomlySplitVariants extends RodWalker { for ( VariantContext vc : vcs ) { int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000); if ( random < iFraction ) - vcfWriter1.add(vc, ref.getBase()); + vcfWriter1.add(vc); else - vcfWriter2.add(vc, ref.getBase()); + vcfWriter2.add(vc); } return 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index e1a3659b8..41374a349 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -25,38 +25,29 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; -import java.lang.annotation.AnnotationFormatError; import java.util.*; /** @@ -140,16 +131,13 @@ public class SelectVariants extends RodWalker { /* Private class used to store the intermediate variants in the integer random selection process */ private class RandomVariantStructure { private VariantContext vc; - private byte refBase; - RandomVariantStructure(VariantContext vcP, byte refBaseP) { + RandomVariantStructure(VariantContext vcP) { vc = vcP; - refBase = refBaseP; } - public void set (VariantContext vcP, byte refBaseP) { + public void set (VariantContext vcP) { vc = vcP; - refBase = refBaseP; } } @@ -374,7 +362,7 @@ public class SelectVariants extends RodWalker { randomlyAddVariant(++variantNumber, sub, ref.getBase()); } else if (!SELECT_RANDOM_FRACTION || (!KEEP_AF_SPECTRUM && GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) { - vcfWriter.add(sub, ref.getBase()); + vcfWriter.add(sub); } else { if (SELECT_RANDOM_FRACTION && KEEP_AF_SPECTRUM ) { @@ -422,7 +410,7 @@ public class SelectVariants extends RodWalker { //System.out.format("%s .. %4.4f\n",afo.toString(), af); if (GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom * afBoost * afBoost) - vcfWriter.add(sub, ref.getBase()); + vcfWriter.add(sub); } @@ -529,7 +517,7 @@ public class SelectVariants extends RodWalker { if (SELECT_RANDOM_NUMBER) { int positionToPrint = positionToAdd; for (int i=0; i { private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) { if (nVariantsAdded < numRandom) - variantArray[nVariantsAdded++] = new RandomVariantStructure(vc, refBase); + variantArray[nVariantsAdded++] = new RandomVariantStructure(vc); else { double v = GenomeAnalysisEngine.getRandomGenerator().nextDouble(); double t = (1.0/(rank-numRandom+1)); if ( v < t) { - variantArray[positionToAdd].set(vc, refBase); + variantArray[positionToAdd].set(vc); nVariantsAdded++; positionToAdd = nextCircularPosition(positionToAdd); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 86bb3b0e8..ca6533721 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -46,7 +46,7 @@ import java.util.*; */ @Reference(window=@Window(start=0,stop=40)) @Requires(value={},referenceMetaData=@RMD(name=VariantValidationAssessor.INPUT_VARIANT_ROD_BINDING_NAME, type=VariantContext.class)) -public class VariantValidationAssessor extends RodWalker,Integer> { +public class VariantValidationAssessor extends RodWalker { public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant"; @@ -68,7 +68,7 @@ public class VariantValidationAssessor extends RodWalker sampleNames = null; // variant context records - private ArrayList> records = new ArrayList>(); + private ArrayList records = new ArrayList(); // statistics private int numRecords = 0; @@ -89,7 +89,7 @@ public class VariantValidationAssessor extends RodWalker map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null ) return null; @@ -104,7 +104,7 @@ public class VariantValidationAssessor extends RodWalker call, Integer numVariants) { + public Integer reduce(VariantContext call, Integer numVariants) { if ( call != null ) { numVariants++; records.add(call); @@ -155,12 +155,12 @@ public class VariantValidationAssessor extends RodWalker record : records ) - vcfwriter.add(record.first, record.second); + for ( VariantContext record : records ) + vcfwriter.add(record); } - private Pair addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) { + private VariantContext addVariantInformationToCall(ReferenceContext ref, VariantContext vContext) { // check possible filters double hwPvalue = hardyWeinbergCalculation(vContext); @@ -202,9 +202,7 @@ public class VariantValidationAssessor extends RodWalker(vContext, ref.getBase()); + return VariantContext.modifyAttributes(vContext, infoMap); } private double hardyWeinbergCalculation(VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 39358dad5..b2b6d4815 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -78,8 +78,8 @@ public class VariantsToTable extends RodWalker { getters.put("REF", new Getter() { public String get(VariantContext vc) { String x = ""; - if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { - Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + if ( vc.hasReferenceBaseForIndel() ) { + Byte refByte = vc.getReferenceBaseForIndel(); x=x+new String(new byte[]{refByte}); } return x+vc.getReference().getDisplayString(); @@ -90,8 +90,8 @@ public class VariantsToTable extends RodWalker { StringBuilder x = new StringBuilder(); int n = vc.getAlternateAlleles().size(); if ( n == 0 ) return "."; - if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { - Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); + if ( vc.hasReferenceBaseForIndel() ) { + Byte refByte = vc.getReferenceBaseForIndel(); x.append(new String(new byte[]{refByte})); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index aa0e5987f..c9b63878d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -149,9 +149,10 @@ public class VariantsToVCF extends RodWalker { VariantContext vc = VariantContextAdaptors.toVariantContext(INPUT_ROD_NAME, hapmap, ref); if ( vc != null ) { if ( refBase != null ) { - Map attrs = new HashMap(vc.getAttributes()); - attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refBase); - vc = VariantContext.modifyAttributes(vc, attrs); + // TODO -- fix me + //Map attrs = new HashMap(vc.getAttributes()); + //attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refBase); + //vc = VariantContext.modifyAttributes(vc, attrs); } hapmapVCs.add(vc); } @@ -233,7 +234,7 @@ public class VariantsToVCF extends RodWalker { } vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); - vcfwriter.add(vc, ref); + vcfwriter.add(vc); } public Integer reduceInit() { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 710127f7a..9788f8654 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -567,7 +567,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, // set the reference base for indels in the attributes Map attributes = new TreeMap(inputVC.getAttributes()); - attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(inputVC.getReference().getBases()[0])); Map originalToTrimmedAlleleMap = new HashMap(); @@ -611,7 +610,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, genotypes.put(sample.getKey(), Genotype.modifyAlleles(sample.getValue(), trimmedAlleles)); } - return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes); + return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.filtersWereApplied() ? inputVC.getFilters() : null, attributes, new Byte(inputVC.getReference().getBases()[0])); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java index 311aaecf7..c299511db 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java @@ -105,9 +105,8 @@ public abstract class SortingVCFWriterBase implements VCFWriter { * add a record to the file * * @param vc the Variant Context object - * @param refBase the ref base */ - public void add(VariantContext vc, byte refBase) { + public void add(VariantContext vc) { /* Note that the code below does not prevent the successive add()-ing of: (chr1, 10), (chr20, 200), (chr15, 100) since there is no implicit ordering of chromosomes: */ @@ -122,7 +121,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter { noteCurrentRecord(vc); // possibly overwritten - queue.add(new VCFRecord(vc, refBase)); + queue.add(new VCFRecord(vc)); emitSafeRecords(); } @@ -133,7 +132,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter { // No need to wait, waiting for nothing, or before what we're waiting for: if (emitUnsafe || mostUpstreamWritableLoc == null || firstRec.vc.getStart() <= mostUpstreamWritableLoc) { queue.poll(); - innerWriter.add(firstRec.vc, firstRec.refBase); + innerWriter.add(firstRec.vc); } else { break; @@ -143,7 +142,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter { /** * Gets a string representation of this object. - * @return + * @return a string representation of this object */ @Override public String toString() { @@ -158,11 +157,9 @@ public abstract class SortingVCFWriterBase implements VCFWriter { private static class VCFRecord { public VariantContext vc; - public byte refBase; - public VCFRecord(VariantContext vc, byte refBase) { + public VCFRecord(VariantContext vc) { this.vc = vc; - this.refBase = refBase; } } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index b7f4be39a..d3705813c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -202,20 +202,18 @@ public class StandardVCFWriter implements VCFWriter { * add a record to the file * * @param vc the Variant Context object - * @param refBase the ref base used for indels */ - public void add(VariantContext vc, byte refBase) { - add(vc, refBase, false); + public void add(VariantContext vc) { + add(vc, false); } /** * add a record to the file * * @param vc the Variant Context object - * @param refBase the ref base used for indels * @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD) */ - public void add(VariantContext vc, byte refBase, boolean refBaseShouldBeAppliedToEndOfAlleles) { + public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) { if ( mHeader == null ) throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString()); @@ -223,7 +221,7 @@ public class StandardVCFWriter implements VCFWriter { vc = VariantContext.modifyGenotypes(vc, null); try { - vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBase, refBaseShouldBeAppliedToEndOfAlleles); + vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles); // if we are doing on the fly indexing, add the record ***before*** we write any bytes if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition()); @@ -285,7 +283,7 @@ public class StandardVCFWriter implements VCFWriter { Map infoFields = new TreeMap(); for ( Map.Entry field : vc.getAttributes().entrySet() ) { String key = field.getKey(); - if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) ) + if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) ) continue; String outputValue = formatVCFField(field.getValue()); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFWriter.java index 0d23fe455..55749d26e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFWriter.java @@ -14,5 +14,5 @@ public interface VCFWriter { */ public void close(); - public void add(VariantContext vc, byte refBase); + public void add(VariantContext vc); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java index a191670a4..a752f4a1b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/MutableVariantContext.java @@ -27,15 +27,15 @@ public class MutableVariantContext extends VariantContext { } public MutableVariantContext(String source, String contig, long start, long stop, Collection alleles) { - this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); + super(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); } public MutableVariantContext(String source, String contig, long start, long stop, Collection alleles, Collection genotypes) { - this(source, contig, start, stop, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); + super(source, contig, start, stop, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); } public MutableVariantContext(VariantContext parent) { - this(parent.getSource(), parent.contig, parent.start, parent.stop, parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes()); + super(parent.getSource(), parent.contig, parent.start, parent.stop, parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes(), parent.getReferenceBaseForIndel()); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index eab392c4d..3ea1bb5d6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -5,6 +5,7 @@ import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.*; @@ -163,11 +164,12 @@ import java.util.*; public class VariantContext implements Feature { // to enable tribble intergration protected InferredGeneticContext commonInfo = null; public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR; - public final static String REFERENCE_BASE_FOR_INDEL_KEY = "_REFERENCE_BASE_FOR_INDEL_"; public final static String UNPARSED_GENOTYPE_MAP_KEY = "_UNPARSED_GENOTYPE_MAP_"; public final static String UNPARSED_GENOTYPE_PARSER_KEY = "_UNPARSED_GENOTYPE_PARSER_"; public final static String ID_KEY = "ID"; + private final Byte REFERENCE_BASE_FOR_INDEL; + public final static Set PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet()); /** The location of this VariantContext */ @@ -205,6 +207,24 @@ public class VariantContext implements Feature { // to enable tribble intergrati // --------------------------------------------------------------------------------------------------------- + /** + * the complete constructor. Makes a complete VariantContext from its arguments + * + * @param source source + * @param contig the contig + * @param start the start base (one based) + * @param stop the stop reference base (one based) + * @param alleles alleles + * @param genotypes genotypes map + * @param negLog10PError qual + * @param filters filters: use null for unfiltered and empty set for passes filters + * @param attributes attributes + * @param referenceBaseForIndel padded reference base + */ + public VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes, Byte referenceBaseForIndel) { + this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, referenceBaseForIndel, false); + } + /** * the complete constructor. Makes a complete VariantContext from its arguments * @@ -219,7 +239,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param attributes attributes */ public VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes) { - this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, false); + this(source, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes, null, false); } /** @@ -239,7 +259,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param attributes attributes */ public VariantContext(String source, String contig, long start, long stop, Collection alleles, double negLog10PError, Set filters, Map attributes) { - this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, true); + this(source, contig, start, stop, alleles, NO_GENOTYPES, negLog10PError, filters, attributes, null, true); } /** @@ -256,7 +276,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param attributes attributes */ public VariantContext(String source, String contig, long start, long stop, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { - this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes, false); + this(source, contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes, null, false); } /** @@ -269,7 +289,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param alleles alleles */ public VariantContext(String source, String contig, long start, long stop, Collection alleles) { - this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, false); + this(source, contig, start, stop, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, null, false); } /** @@ -292,7 +312,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param other the VariantContext to copy */ public VariantContext(VariantContext other) { - this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), false); + this(other.getSource(), other.getChr(), other.getStart(), other.getEnd() , other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.filtersWereApplied() ? other.getFilters() : null, other.getAttributes(), other.REFERENCE_BASE_FOR_INDEL, false); } /** @@ -307,8 +327,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param negLog10PError qual * @param filters filters: use null for unfiltered and empty set for passes filters * @param attributes attributes + * @param referenceBaseForIndel padded reference base + * @param genotypesAreUnparsed true if the genotypes have not yet been parsed */ - private VariantContext(String source, String contig, long start, long stop, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes, boolean genotypesAreUnparsed) { + private VariantContext(String source, String contig, long start, long stop, + Collection alleles, Map genotypes, + double negLog10PError, Set filters, Map attributes, + Byte referenceBaseForIndel, boolean genotypesAreUnparsed) { if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } this.contig = contig; this.start = start; @@ -323,6 +348,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati this.commonInfo = new InferredGeneticContext(source, negLog10PError, filters, attributes); filtersWereAppliedToContext = filters != null; + REFERENCE_BASE_FOR_INDEL = referenceBaseForIndel; if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); } @@ -355,23 +381,23 @@ public class VariantContext implements Feature { // to enable tribble intergrati // --------------------------------------------------------------------------------------------------------- public static VariantContext modifyGenotypes(VariantContext vc, Map genotypes) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), false); + return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), false); } public static VariantContext modifyLocation(VariantContext vc, String chr, int start, int end) { - return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), true); + return new VariantContext(vc.getSource(), chr, start, end, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true); } public static VariantContext modifyFilters(VariantContext vc, Set filters) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap(vc.getAttributes()), true); + return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, new HashMap(vc.getAttributes()), vc.getReferenceBaseForIndel(), true); } public static VariantContext modifyAttributes(VariantContext vc, Map attributes) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, true); + return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true); } public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set filters, Map attributes) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, true); + return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true); } // --------------------------------------------------------------------------------------------------------- @@ -603,6 +629,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati return (String)commonInfo.getAttribute(ID_KEY); } + public boolean hasReferenceBaseForIndel() { + return REFERENCE_BASE_FOR_INDEL != null; + } + + // the indel base that gets stripped off for indels + public Byte getReferenceBaseForIndel() { + return REFERENCE_BASE_FOR_INDEL; + } + // --------------------------------------------------------------------------------------------------------- // // get routines to access context info fields @@ -1151,6 +1186,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati private boolean validate(boolean throwException) { try { + validateReferencePadding(); validateAlleles(); validateGenotypes(); } catch ( IllegalArgumentException e ) { @@ -1163,6 +1199,13 @@ public class VariantContext implements Feature { // to enable tribble intergrati return true; } + private void validateReferencePadding() { + boolean needsPadding = hasSymbolicAlleles() || (getReference().length() == getEnd() - getStart()); // off by one because padded base was removed + + if ( needsPadding && !hasReferenceBaseForIndel() ) + throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + getStart() + "; no padded reference base was provided."); + } + private void validateAlleles() { // check alleles boolean alreadySeenRef = false, alreadySeenNull = false; @@ -1221,16 +1264,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati // // --------------------------------------------------------------------------------------------------------- - // the indel base that gets stripped off for indels - public boolean hasReferenceBaseForIndel() { - return hasAttribute(REFERENCE_BASE_FOR_INDEL_KEY); - } - - // the indel base that gets stripped off for indels - public byte getReferenceBaseForIndel() { - return hasReferenceBaseForIndel() ? (Byte)getAttribute(REFERENCE_BASE_FOR_INDEL_KEY) : (byte)'N'; - } - private void determineType() { if ( type == null ) { switch ( getNAlleles() ) { @@ -1357,8 +1390,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati return false; } - public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, byte inputRefBase, boolean refBaseShouldBeAppliedToEndOfAlleles) { - Allele refAllele = inputVC.getReference(); + public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, boolean refBaseShouldBeAppliedToEndOfAlleles) { // see if we need to pad common reference base from all alleles boolean padVC; @@ -1368,31 +1400,20 @@ public class VariantContext implements Feature { // to enable tribble intergrati long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1; if (inputVC.hasSymbolicAlleles()) padVC = true; - else if (refAllele.length() == locLength) + else if (inputVC.getReference().length() == locLength) padVC = false; - else if (refAllele.length() == locLength-1) + else if (inputVC.getReference().length() == locLength-1) padVC = true; else throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) + " in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size"); - // nothing to do if we don't need to pad bases if (padVC) { - Byte refByte; - Map attributes = inputVC.getAttributes(); + if ( !inputVC.hasReferenceBaseForIndel() ) + throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available."); - // upper-case for consistency; note that we can safely make these casts because the input is constrained to be a byte - inputRefBase = (byte)Character.toUpperCase((char)inputRefBase); - if (attributes.containsKey(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) - refByte = (Byte)attributes.get(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY); - else if (inputRefBase == 'A' || inputRefBase == 'T' || inputRefBase == 'C' || inputRefBase == 'G' || inputRefBase == 'N') - refByte = inputRefBase; - else - throw new IllegalArgumentException("Error when trying to pad Variant Context at location " + String.valueOf(inputVC.getStart()) - + " in contig " + inputVC.getChr() + - ". Either input reference base ("+(char)inputRefBase+ - ", ascii code="+inputRefBase+") must be a regular base, or input VC must contain reference base key"); + Byte refByte = inputVC.getReferenceBaseForIndel(); List alleles = new ArrayList(); Map genotypes = new TreeMap(); @@ -1444,11 +1465,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati // Do not change the filter state if filters were not applied to this context Set inputVCFilters = inputVC.filtersWereAppliedToContext ? inputVC.getFilters() : null; - return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), - inputVCFilters, attributes); - - - + return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes()); } else return inputVC; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 212600360..7d10749ee 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -295,10 +295,7 @@ public class VariantContextUtils { @Requires("vc != null") @Ensures("result != null") public static VariantContext sitesOnlyVariantContext(VariantContext vc) { - return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), - vc.getAlleles(), vc.getNegLog10PError(), - vc.filtersWereApplied() ? vc.getFilters() : null, - vc.getAttributes()); + return VariantContext.modifyGenotypes(vc, null); } /** @@ -449,7 +446,7 @@ public class VariantContextUtils { FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, boolean annotateOrigin, boolean printMessages, byte inputRefBase ) { - return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, inputRefBase, "set", false, false); + return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false); } /** @@ -464,7 +461,6 @@ public class VariantContextUtils { * @param genotypeMergeOptions merge option for genotypes * @param annotateOrigin should we annotate the set it came from? * @param printMessages should we print messages? - * @param inputRefBase the ref base * @param setKey the key name of the set * @param filteredAreUncalled are filtered records uncalled? * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? @@ -472,7 +468,7 @@ public class VariantContextUtils { */ public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection unsortedVCs, List priorityListOfVCs, FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions, - boolean annotateOrigin, boolean printMessages, byte inputRefBase, String setKey, + boolean annotateOrigin, boolean printMessages, String setKey, boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) { if ( unsortedVCs == null || unsortedVCs.size() == 0 ) return null; @@ -490,7 +486,7 @@ public class VariantContextUtils { for (VariantContext vc : prepaddedVCs) { // also a reasonable place to remove filtered calls, if needed if ( ! filteredAreUncalled || vc.isNotFiltered() ) - VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc,inputRefBase,false)); + VCs.add(VariantContext.createVariantContextWithPaddedAlleles(vc, false)); } if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled return null; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 904a5b29b..9b152bc71 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -120,6 +120,6 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); } @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); } - @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f72a178137e25dbe0b931934cdc0079d"); } + @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); } @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java index 68a2ecf8d..d08cda949 100755 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/IndexFactoryUnitTest.java @@ -70,7 +70,7 @@ public class IndexFactoryUnitTest { CloseableTribbleIterator it = source.iterator(); while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) { VariantContext vc = it.next(); - writer.add(vc, vc.getReferenceBaseForIndel()); + writer.add(vc); } writer.close(); diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index 34a2e616a..57f72d931 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -57,8 +57,8 @@ public class VCFWriterUnitTest extends BaseTest { VCFHeader header = createFakeHeader(metaData,additionalColumns); VCFWriter writer = new StandardVCFWriter(fakeVCFFile); writer.writeHeader(header); - writer.add(createVC(header),"A".getBytes()[0]); - writer.add(createVC(header),"A".getBytes()[0]); + writer.add(createVC(header)); + writer.add(createVC(header)); writer.close(); VCFCodec reader = new VCFCodec(); AsciiLineReader lineReader; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 6ed00f0ea..bde4c4ae3 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -19,14 +19,14 @@ public class VariantContextIntegrationTest extends WalkerTest { static HashMap expectations = new HashMap(); static { - expectations.put("-L 1:1-10000 --printPerLocus", "e9d96677a57bc3a10fb6d9ba942c19f0"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "8a1174d2b18b98e624abbe93e6af8fdd"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "3933f1fae5453c54c3f791a23de07599"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "c9cf2f01bf045a58dcc7649fd6ea2396"); + expectations.put("-L 1:1-10000 --printPerLocus", "c44a48dd9062a435a3579145ce8d1684"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "fa5762fa7dcb2652ed34bcdce9ecf455"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "dfdc554c52707541d335c3fb849feaba"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "db8ba72b557ebd698215281e5656b59c"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "2097e32988d603d3b353b50218c86d3b"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "a103d856e8bc558c949c6e3f184e8913"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "5f2265ac6c6d80d64dc6e69a05c1250b"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "06a3ae4c0afa23b429a9491ab7707f3c"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "7f5eadb2098aafdef8bb45aac3722d03"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "a31b76fb8ed727616d8fb823c62bf677"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "f9d30920c8834ec7c7892507a5052fb7"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); } @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { // this really just tests that we are seeing the same number of objects over all of chr1 WalkerTestSpec spec = new WalkerTestSpec( root + " -L 1" + " -o %s", 1, // just one output file - Arrays.asList("045a5b02c86aeb9301dc0b724da0c8f7")); + Arrays.asList("137258e1dc490bfa83a2294c52e97ba9")); executeTest("testLargeScaleConversion", spec); } } From 85c67e9891f74b6906980cd7cf4ca31ccd8bb1cc Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 11:16:06 -0400 Subject: [PATCH 090/186] Contracts and documentation for Rodbinding --- .../sting/commandline/RodBinding.java | 100 +++++++++++++++--- 1 file changed, 86 insertions(+), 14 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index ab6b0ea06..af5031716 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -24,6 +24,8 @@ package org.broadinstitute.sting.commandline; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import org.broad.tribble.Feature; import java.util.*; @@ -31,29 +33,69 @@ import java.util.*; /** * A RodBinding representing a walker argument that gets bound to a ROD track. * - * There is no constraint on the type of the ROD bound. + * The RodBinding is a formal GATK argument that bridges between a walker and + * the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding + * is explicitly typed with type of the Tribble.Feature expected to be produced by this + * argument. The GATK Engine takes care of initializing the binding and connecting it + * to the RMD system. + * + * It is recommended that optional RodBindings be initialized to the value returned + * by the static method makeUnbound(). + * + * Note that this class is immutable. */ -public class RodBinding { +public final class RodBinding { protected final static String UNBOUND_VARIABLE_NAME = ""; protected final static String UNBOUND_SOURCE = "UNBOUND"; - protected final static String UNBOUND_TRIBBLE_TYPE = null; + protected final static String UNBOUND_TRIBBLE_TYPE = ""; + + /** + * Create an unbound Rodbinding of type. This is the correct programming + * style for an optional RodBinding + * + * At Input() + * RodBinding x = RodBinding.makeUnbound(T.class) + * + * The unbound binding is guaranteed to never match any binding. It uniquely + * returns false to isBound(). + * + * @param type the Class type produced by this unbound object + * @param any class extending Tribble Feature + * @return the UNBOUND RodBinding producing objects of type T + */ + @Requires("type != null") public final static RodBinding makeUnbound(Class type) { return new RodBinding(type); } + /** The name of this binding. Often the name of the field itself, but can be overridden on cmdline */ final private String name; + /** where the data for this ROD is coming from. A file or special value if coming from stdin */ final private String source; + /** the string name of the tribble type, such as vcf, bed, etc. */ final private String tribbleType; + /** The command line tags associated with this RodBinding */ final private Tags tags; + /** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */ final private Class type; + /** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */ final private boolean bound; + /** + * The name counter. This is how we create unique names for collections of RodBindings + * on the command line. If you have provide the GATK with -X file1 and -X file2 to a + * RodBinding argument as List> then each binding will receive automatically + * the name of X and X2. + */ final private static Map nameCounter = new HashMap(); + /** for UnitTests */ final protected static void resetNameCounter() { nameCounter.clear(); } + @Requires("rawName != null") + @Ensures("result != null") final private static synchronized String countedVariableName(final String rawName) { Integer count = nameCounter.get(rawName); if ( count == null ) { @@ -65,10 +107,7 @@ public class RodBinding { } } - public boolean isBound() { - return bound; - } - + @Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"}) public RodBinding(Class type, final String rawName, final String source, final String tribbleType, final Tags tags) { this.type = type; this.name = countedVariableName(rawName); @@ -79,9 +118,10 @@ public class RodBinding { } /** - * Make an unbound RodBinding - * @param type + * Make an unbound RodBinding. Only available for creating the globally unique UNBOUND object + * @param type class this unbound RodBinding creates */ + @Requires({"type != null"}) private RodBinding(Class type) { this.type = type; this.name = UNBOUND_VARIABLE_NAME; // special value can never be found in RefMetaDataTracker @@ -91,24 +131,56 @@ public class RodBinding { this.bound = false; } - public String getName() { + + /** + * @return True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments + */ + final public boolean isBound() { + return bound; + } + + /** + * @return The name of this binding. Often the name of the field itself, but can be overridden on cmdline + */ + @Ensures({"result != null"}) + final public String getName() { return name; } - public Class getType() { + + /** + * @return the string name of the tribble type, such as vcf, bed, etc. + */ + @Ensures({"result != null"}) + final public Class getType() { return type; } - public String getSource() { + + /** + * @return where the data for this ROD is coming from. A file or special value if coming from stdin + */ + @Ensures({"result != null"}) + final public String getSource() { return source; } - public Tags getTags() { + /** + * @return The command line tags associated with this RodBinding. Will include the tags used to + * determine the name and type of this RodBinding + */ + @Ensures({"result != null"}) + final public Tags getTags() { return tags; } - public String getTribbleType() { + /** + * @return The Java class expected for this RodBinding. Must correspond to the type emited by Tribble + */ + @Ensures({"result != null"}) + final public String getTribbleType() { return tribbleType; } + @Override public String toString() { return String.format("(RodBinding name=%s source=%s)", getName(), getSource()); } From f6648e01446e699735e1d502c2d03a1b44f3771c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 3 Aug 2011 12:03:50 -0400 Subject: [PATCH 091/186] Don't left-align complex indels because it's too complicated. --- .../sting/gatk/walkers/variantutils/LeftAlignVariants.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 5ff3921de..5ab326418 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -90,7 +90,7 @@ public class LeftAlignVariants extends RodWalker { private int alignAndWrite(VariantContext vc, final ReferenceContext ref) { - if ( vc.isBiallelic() && vc.isIndel() ) + if ( vc.isBiallelic() && vc.isIndel() && !vc.isComplexIndel() ) return writeLeftAlignedIndel(vc, ref); else { writer.add(vc); From 020b2408a894b7ef56db9dcba49259bd2a3e7677 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 3 Aug 2011 12:19:44 -0400 Subject: [PATCH 092/186] Adding integration test for left alignment of indels --- .../LeftAlignVariantsIntegrationTest.java | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java new file mode 100644 index 000000000..da6277242 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Tests LeftAlignVariants + */ +public class LeftAlignVariantsIntegrationTest extends WalkerTest { + + @Test + public void testLeftAlignment() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T LeftAlignVariants -o %s -R " + b37KGReference + " -B:variant,vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER", + 1, + Arrays.asList("158b1d71b28c52e2789f164500b53732")); + executeTest("test left alignment", spec); + } +} From db2e0aaa1a533eaffd979892962b8d17e2a4a99c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 3 Aug 2011 12:31:08 -0400 Subject: [PATCH 093/186] Darn, forgot to update unit tests. --- .../utils/genotype/vcf/VCFWriterUnitTest.java | 2 +- .../VariantContextUnitTest.java | 26 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index 57f72d931..e3a926fb9 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -135,7 +135,7 @@ public class VCFWriterUnitTest extends BaseTest { genotypes.put(name,gt); } - return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes); + return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes, (byte)'A'); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index e82817714..d8fa0eae4 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -92,45 +92,45 @@ public class VariantContextUnitTest { // test INDELs alleles = Arrays.asList(Aref, ATC); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); alleles = Arrays.asList(ATCref, A); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); alleles = Arrays.asList(Tref, TA, TC); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); alleles = Arrays.asList(ATCref, A, AC); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); alleles = Arrays.asList(ATCref, A, Allele.create("ATCTC")); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+2, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); // test MIXED alleles = Arrays.asList(TAref, T, TC); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED); alleles = Arrays.asList(TAref, T, AC); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED); alleles = Arrays.asList(ACref, ATC, AT); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop+1, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED); alleles = Arrays.asList(Aref, T, symbolic); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED); // test SYMBOLIC alleles = Arrays.asList(Tref, symbolic); - vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles); + vc = new VariantContext("test", snpLoc,snpLocStart, snpLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getType(), VariantContext.Type.SYMBOLIC); } @@ -191,7 +191,7 @@ public class VariantContextUnitTest { @Test public void testCreatingDeletionVariantContext() { List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContext("test", delLoc, delLocStart, delLocStop, alleles); + VariantContext vc = new VariantContext("test", delLoc, delLocStart, delLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getChr(), delLoc); Assert.assertEquals(vc.getStart(), delLocStart); @@ -218,7 +218,7 @@ public class VariantContextUnitTest { @Test public void testCreatingInsertionVariantContext() { List alleles = Arrays.asList(delRef, ATC); - VariantContext vc = new VariantContext("test", insLoc, insLocStart, insLocStop, alleles); + VariantContext vc = new VariantContext("test", insLoc, insLocStart, insLocStop, alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); Assert.assertEquals(vc.getChr(), insLoc); Assert.assertEquals(vc.getStart(), insLocStart); @@ -251,7 +251,7 @@ public class VariantContextUnitTest { new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, del)); } - @Test (expectedExceptions = IllegalArgumentException.class) + @Test (expectedExceptions = IllegalStateException.class) public void testBadConstructorArgs3() { new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(del)); } From 3de10b1ef8864926466496c82828f3062eaa1664 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 3 Aug 2011 12:37:50 -0400 Subject: [PATCH 094/186] Fixing misprint from Ryan's commit --- .../recalibration/RecalibrationWalkersPerformanceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java index 08b9e0431..f89b80ead 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java @@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L chr1:1-50,000,000" + " -standard" + " -OQ" + - " D:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_hg18.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_hg18.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); From b25140db83b2acbb2dc2e0520244b64a7f207e44 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 13:34:20 -0400 Subject: [PATCH 095/186] Contracts and documentation for some of RefMetaDataTracker Continuing to fix integration tests that don't pass / run --- build.xml | 10 +- .../sting/commandline/RodBinding.java | 2 +- .../gatk/refdata/RefMetaDataTracker.java | 243 +++++++++++------- .../walkers/qc/ValidatingPileupWalker.java | 21 +- .../validation/ValidationAmplicons.java | 22 +- .../ReferenceOrderedViewUnitTest.java | 6 +- .../refdata/RefMetaDataTrackerUnitTest.java | 5 + .../VariantAnnotatorIntegrationTest.java | 28 +- .../VariantFiltrationIntegrationTest.java | 2 +- .../utils/codecs/vcf/VCFIntegrationTest.java | 2 +- 10 files changed, 213 insertions(+), 128 deletions(-) diff --git a/build.xml b/build.xml index 1e4badc2c..9af8949ba 100644 --- a/build.xml +++ b/build.xml @@ -973,11 +973,11 @@ - - - - + + + + + diff --git a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java index af5031716..41b5bf6f3 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/RodBinding.java @@ -90,7 +90,7 @@ public final class RodBinding { final private static Map nameCounter = new HashMap(); /** for UnitTests */ - final protected static void resetNameCounter() { + final public static void resetNameCounter() { nameCounter.clear(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index e1b9fedf6..73685d758 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.refdata; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.RodBinding; @@ -18,10 +20,12 @@ import java.util.*; * The standard interaction model is: * * Traversal system arrives at a site, which has a bunch of RMDs covering it - Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs - * Traversal passes tracker to the walker - * walker calls lookup(name, default) to obtain the RMDs values at this site, or default if none was - * bound at this site. + * Traversal passes creates a tracker and passes it to the walker + * walker calls get(rodBinding) to obtain the RMDs values at this site for the track + * associated with rodBinding. + * + * Note that this is an immutable class. Once created the underlying data structures + * cannot be modified * * User: mdepristo * Date: Apr 3, 2009 @@ -45,14 +49,19 @@ public class RefMetaDataTracker { public RefMetaDataTracker(final Collection allBindings, final ReferenceContext ref) { this.ref = ref; + + // set up the map if ( allBindings.isEmpty() ) map = Collections.emptyMap(); else { - map = new HashMap(allBindings.size()); + Map tmap = new HashMap(allBindings.size()); for ( RODRecordList rod : allBindings ) { if ( rod != null ) - map.put(canonicalName(rod.getName()), rod); + tmap.put(canonicalName(rod.getName()), rod); } + + // ensure that no one modifies the map itself + map = Collections.unmodifiableMap(tmap); } } @@ -64,76 +73,120 @@ public class RefMetaDataTracker { // // ------------------------------------------------------------------------------------------ + /** + * Gets all of the Tribble features spanning this locus, returning them as a list of specific + * type T extending Feature. This function looks across all tracks to find the Features, so + * if you have two tracks A and B each containing 1 Feature, then getValues will return + * a list containing both features. + * + * Note that this function assumes that all of the bound features are instances of or + * subclasses of T. A ClassCastException will occur if this isn't the case. If you want + * to get all Features without any danger of such an exception use the root Tribble + * interface Feature. + * + * @param type The type of the underlying objects bound here + * @param as above + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ + @Requires({"type != null"}) + @Ensures("result != null") public List getValues(final Class type) { return addValues(map.keySet(), type, new ArrayList(), null, false, false); } + + /** + * Provides the same functionality as @link #getValues(Class) but will only include + * Features that start as the GenomeLoc provide onlyAtThisLoc. + * + * @param type The type of the underlying objects bound here + * @param onlyAtThisLoc + * @param as above + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ + @Requires({"type != null", "onlyAtThisLoc != null"}) + @Ensures("result != null") public List getValues(final Class type, final GenomeLoc onlyAtThisLoc) { return addValues(map.keySet(), type, new ArrayList(), onlyAtThisLoc, true, false); } - public List getValues(final Class type, final String name) { - return addValues(name, type, new ArrayList(), getTrackDataByName(name), null, false, false); - } - public List getValues(final Class type, final String name, final GenomeLoc onlyAtThisLoc) { - return addValues(name, type, new ArrayList(), getTrackDataByName(name), onlyAtThisLoc, true, false); - } - public List getValues(final Class type, final Collection names) { - return addValues(names, type, new ArrayList(), null, false, false); - } - public List getValues(final Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { - return addValues(names, type, new ArrayList(), onlyAtThisLoc, true, false); - } + /** + * Uses the same logic as @link #getValues(Class) but arbitrary select one of the resulting + * elements of the list to return. That is, if there would be two elements in the result of + * @link #getValues(Class), one of these two is selected, and which one it will be isn't + * specified. Consequently, this method is only really safe if (1) you absolutely know + * that only one binding will meet the constraints of @link #getValues(Class) or (2) + * you truly don't care which of the multiple bindings available you are going to examine. + * + * If there are no bindings here, getFirstValue() return null + * + * @param type The type of the underlying objects bound here + * @param as above + * @return A random single element the RODs bound here, or null if none are bound. + */ + @Requires({"type != null"}) public T getFirstValue(final Class type) { return safeGetFirst(getValues(type)); } + + /** + * Uses the same logic as @link #getValue(Class,GenomeLoc) to determine the list + * of eligible Features and @link #getFirstValue(Class) to select a single + * element from the interval list. + * + * @param type The type of the underlying objects bound here + * @param as above + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A random single element the RODs bound here starting at onlyAtThisLoc, or null if none are bound. + */ + @Requires({"type != null", "onlyAtThisLoc != null"}) public T getFirstValue(final Class type, final GenomeLoc onlyAtThisLoc) { return safeGetFirst(getValues(type, onlyAtThisLoc)); } - public T getFirstValue(final Class type, final String name) { - return safeGetFirst(getValues(type, name)); - } - public T getFirstValue(final Class type, final String name, final GenomeLoc onlyAtThisLoc) { - return safeGetFirst(getValues(type, name, onlyAtThisLoc)); - } - public T getFirstValue(final Class type, final Collection names) { - return safeGetFirst(getValues(type, names)); - } - public T getFirstValue(final Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { - return safeGetFirst(getValues(type, names, onlyAtThisLoc)); - } // // ROD binding accessors // + @Requires({"rodBinding != null"}) + @Ensures("result != null") public List getValues(final RodBinding rodBinding) { - return getValues(rodBinding.getType(), rodBinding.getName()); + return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList(1), getTrackDataByName(rodBinding), null, false, false); } + @Requires({"rodBindings != null"}) + @Ensures("result != null") public List getValues(final Collection> rodBindings) { - List results = new ArrayList(); + List results = new ArrayList(1); for ( RodBinding rodBinding : rodBindings ) results.addAll(getValues(rodBinding)); return results; } + @Requires({"rodBinding != null", "onlyAtThisLoc != null"}) + @Ensures("result != null") public List getValues(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { - return getValues(rodBinding.getType(), rodBinding.getName(), onlyAtThisLoc); + return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList(1), getTrackDataByName(rodBinding), onlyAtThisLoc, true, false); } + @Requires({"rodBindings != null", "onlyAtThisLoc != null"}) + @Ensures("result != null") public List getValues(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) { - List results = new ArrayList(); + List results = new ArrayList(1); for ( RodBinding rodBinding : rodBindings ) results.addAll(getValues(rodBinding, onlyAtThisLoc)); return results; } + @Requires({"rodBinding != null"}) public T getFirstValue(final RodBinding rodBinding) { - return getFirstValue(rodBinding.getType(), rodBinding.getName()); - } - public T getFirstValue(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { - return getFirstValue(rodBinding.getType(), rodBinding.getName(), onlyAtThisLoc); + return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), null, false, true)); } + @Requires({"rodBinding != null", "onlyAtThisLoc != null"}) + public T getFirstValue(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { + return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), onlyAtThisLoc, true, true)); + } + + @Requires({"rodBindings != null"}) public T getFirstValue(final Collection> rodBindings) { for ( RodBinding rodBinding : rodBindings ) { T val = getFirstValue(rodBinding); @@ -143,6 +196,7 @@ public class RefMetaDataTracker { return null; } + @Requires({"rodBindings != null", "onlyAtThisLoc != null"}) public T getFirstValue(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) { for ( RodBinding rodBinding : rodBindings ) { T val = getFirstValue(rodBinding, onlyAtThisLoc); @@ -152,42 +206,58 @@ public class RefMetaDataTracker { return null; } - - + /** + * Is there a binding at this site to a ROD/track with the specified name? + * + * @param rodBinding the rod binding we want to know about + * @return true if any Features are bound in this tracker to rodBinding + */ + @Requires({"rodBinding != null"}) public boolean hasValues(final RodBinding rodBinding) { - return hasValues(rodBinding.getName()); - } - - public List getValuesAsGATKFeatures(final RodBinding rodBinding) { - return getValuesAsGATKFeatures(rodBinding.getName()); + return map.containsKey(canonicalName(rodBinding.getName())); } /** * Helper function for getFirst() operations that takes a list of and * returns the first element, or null if no such element exists. * - * TODO: determine specific behavior for l.size() > 1. Do we turn first or an error? - * TODO: right now we return the first. Should be clearer - * * @param l * @param * @return */ + @Requires({"l != null"}) final private T safeGetFirst(final List l) { - // todo: should we be warning people here? Throwing an error? return l.isEmpty() ? null : l.get(0); } - /** - * Is there a binding at this site to a ROD/track with the specified name? - * - * @param name the name of the rod - * @return true if it has the rod - */ + // + // Deprecated accessors -- will be removed + // + @Deprecated public boolean hasValues(final String name) { return map.containsKey(canonicalName(name)); } + @Deprecated + public List getValues(final Class type, final String name) { + return addValues(name, type, new ArrayList(), getTrackDataByName(name), null, false, false); + } + @Deprecated + public List getValues(final Class type, final String name, final GenomeLoc onlyAtThisLoc) { + return addValues(name, type, new ArrayList(), getTrackDataByName(name), onlyAtThisLoc, true, false); + } + @Deprecated + public List getValues(final Class type, final Collection names, final GenomeLoc onlyAtThisLoc) { + return addValues(names, type, new ArrayList(), onlyAtThisLoc, true, false); + } + @Deprecated + public T getFirstValue(final Class type, final String name) { + return safeGetFirst(getValues(type, name)); + } + @Deprecated + public T getFirstValue(final Class type, final String name, final GenomeLoc onlyAtThisLoc) { + return safeGetFirst(getValues(type, name, onlyAtThisLoc)); + } /** * Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records @@ -195,6 +265,7 @@ public class RefMetaDataTracker { * * @return collection of all rods */ + @Deprecated public List getAllValuesAsGATKFeatures() { List l = new ArrayList(); for ( RODRecordList rl : map.values() ) { @@ -204,6 +275,12 @@ public class RefMetaDataTracker { return l; } + @Deprecated + public List getValuesAsGATKFeatures(final RodBinding rodBinding) { + return getValuesAsGATKFeatures(rodBinding.getName()); + } + + /** * get all the GATK features associated with a specific track name * @param name the name of the track we're looking for @@ -211,6 +288,7 @@ public class RefMetaDataTracker { * * Important: The list returned by this function is guaranteed not to be null, but may be empty! */ + @Deprecated public List getValuesAsGATKFeatures(final String name) { return getTrackDataByName(name); } @@ -264,33 +342,6 @@ public class RefMetaDataTracker { } - /** - * get a singleton record, given the name and a type. This function will return the first record at the - * current position seen. The object is cast into a type clazz, or thoses an error if this isn't possible. - * - * * WARNING: we now suppport more than one RMD at a single position for all tracks. If there are - * are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets - * picked may change from time to time! BE WARNED! - * - * @param name the name of the track - * @param clazz the underlying type to return - * @param the type to parameterize on, matching the clazz argument - * @return a record of type T, or null if no record is present. - */ - @Deprecated - public T getFirstValue(final String name, final Class clazz) { - RODRecordList objects = getTrackDataByName(name); - - if (objects.isEmpty()) return null; - - Object obj = objects.get(0).getUnderlyingObject(); - if (!(clazz.isAssignableFrom(obj.getClass()))) - throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString() - + " it's of type " + obj.getClass()); - else - return (T)obj; - } - // ------------------------------------------------------------------------------------------ // // @@ -301,13 +352,15 @@ public class RefMetaDataTracker { private List addValues(final Collection names, final Class type, - final List values, + List values, final GenomeLoc curLocation, final boolean requireStartHere, final boolean takeFirstOnly ) { for ( String name : names ) { RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match - addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly ); + values = addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly ); + if ( takeFirstOnly && ! values.isEmpty() ) + break; } return values; @@ -317,7 +370,7 @@ public class RefMetaDataTracker { private List addValues(final String name, final Class type, - final List values, + List values, final RODRecordList rodList, final GenomeLoc curLocation, final boolean requireStartHere, @@ -329,15 +382,23 @@ public class RefMetaDataTracker { throw new UserException.CommandLineException("Unable to cast track named " + name + " to type of " + type.toString() + " it's of type " + obj.getClass()); - values.add((T)obj); + T objT = (T)obj; + if ( takeFirstOnly ) { + if ( values == null ) + values = Arrays.asList(objT); + else + values.add(objT); - if ( takeFirstOnly ) - // we only want the first passing instance, so break the loop over records in rodList break; + } else { + if ( values == null ) + values = new ArrayList(); + values.add(objT); + } } } - return values; + return values == null ? Collections.emptyList() : values; } /** @@ -358,6 +419,10 @@ public class RefMetaDataTracker { return l == null ? EMPTY_ROD_RECORD_LIST : l; } + private RODRecordList getTrackDataByName(final RodBinding binding) { + return getTrackDataByName(binding.getName()); + } + /** * Returns the canonical name of the rod name (lowercases it) * @param name the name of the rod diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java index 0054354c7..bd25a73e0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupWalker.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.qc; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -45,8 +47,11 @@ import java.util.Arrays; * each overlapping read, and quality score) to the reference pileup data generated by samtools. Samtools' pileup data * should be specified using the command-line argument '-B pileup,SAMPileup,'. */ -@Requires(value={DataSource.READS,DataSource.REFERENCE},referenceMetaData=@RMD(name="pileup",type=SAMPileupFeature.class)) +@Requires(value={DataSource.READS,DataSource.REFERENCE}) public class ValidatingPileupWalker extends LocusWalker implements TreeReducible { + @Input(fullName = "pileup", doc="The SAMPileup containing the expected output", required = true) + RodBinding pileup; + @Output private PrintStream out; @@ -130,17 +135,17 @@ public class ValidatingPileupWalker extends LocusWalker { + @Input(fullName = "ProbeIntervals", doc="Chris document me", required=true) + RodBinding probeIntervals; + + @Input(fullName = "ValidateAlleles", doc="Chris document me", required=true) + RodBinding validateAlleles; + + @Input(fullName = "MaskAlleles", doc="Chris document me", required=true) + RodBinding maskAlleles; + @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) boolean lowerCaseSNPs = false; @@ -99,9 +109,9 @@ public class ValidationAmplicons extends RodWalker { } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null || ! tracker.hasValues("ProbeIntervals")) { return null; } + if ( tracker == null || ! tracker.hasValues(probeIntervals)) { return null; } - TableFeature feature = tracker.getFirstValue("ProbeIntervals", TableFeature.class); + TableFeature feature = tracker.getFirstValue(probeIntervals); GenomeLoc interval = feature.getLocation(); //logger.debug(interval); if ( prevInterval == null || ! interval.equals(prevInterval) ) { @@ -138,8 +148,8 @@ public class ValidationAmplicons extends RodWalker { // step 3 (or 1 if not new): // build up the sequence - VariantContext mask = tracker.getFirstValue(VariantContext.class, "MaskAlleles",ref.getLocus()); - VariantContext validate = tracker.getFirstValue(VariantContext.class, "ValidateAlleles",ref.getLocus()); + VariantContext mask = tracker.getFirstValue(maskAlleles, ref.getLocus()); + VariantContext validate = tracker.getFirstValue(validateAlleles,ref.getLocus()); if ( mask == null && validate == null ) { if ( indelCounter > 0 ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index 52d8fd4d0..dbfaedc1b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -88,7 +88,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null); - TableFeature datum = tracker.getFirstValue("tableTest", TableFeature.class); + TableFeature datum = tracker.getFirstValue(TableFeature.class, "tableTest"); Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect"); Assert.assertEquals(datum.get("COL2"),"D","datum parameter for COL2 is incorrect"); @@ -114,13 +114,13 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null); - TableFeature datum1 = tracker.getFirstValue("tableTest1", TableFeature.class); + TableFeature datum1 = tracker.getFirstValue(TableFeature.class, "tableTest1"); Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect"); Assert.assertEquals(datum1.get("COL2"),"D","datum1 parameter for COL2 is incorrect"); Assert.assertEquals(datum1.get("COL3"),"E","datum1 parameter for COL3 is incorrect"); - TableFeature datum2 = tracker.getFirstValue("tableTest2", TableFeature.class); + TableFeature datum2 = tracker.getFirstValue(TableFeature.class, "tableTest2"); Assert.assertEquals(datum2.get("COL1"),"C","datum2 parameter for COL1 is incorrect"); Assert.assertEquals(datum2.get("COL2"),"D","datum2 parameter for COL2 is incorrect"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java index afb6e418a..4c5bdbcda 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -76,6 +76,11 @@ public class RefMetaDataTrackerUnitTest { span10_20 = makeSpan(10, 20); } + @BeforeMethod + public void reset() { + RodBinding.resetNameCounter(); + } + private class MyTest extends BaseTest.TestDataProvider { public RODRecordList AValues, BValues; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 5a6a66bbd..84e52f037 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -14,7 +14,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test file has annotations, not asking for annotations, #1", spec); } @@ -22,7 +22,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("964f1016ec9a3c55333f62dd834c14d6")); executeTest("test file has annotations, not asking for annotations, #2", spec); } @@ -30,7 +30,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, Arrays.asList("8e7de435105499cd71ffc099e268a83e")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -38,7 +38,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("64b6804cb1e27826e3a47089349be581")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -46,7 +46,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, Arrays.asList("42ccee09fa9f8c58f4a0d4f1139c094f")); executeTest("test file doesn't have annotations, not asking for annotations, #1", spec); } @@ -54,7 +54,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @@ -62,7 +62,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -70,7 +70,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("09f8e840770a9411ff77508e0ed0837f")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -78,7 +78,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" -B:variant,VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, + baseTestString() + " -G \"Standard\" --variants:VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, Arrays.asList("78d2c19f8107d865970dbaf3e12edd92")); executeTest("test overwriting header", spec); } @@ -86,7 +86,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, + baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, Arrays.asList("16e3a1403fc376320d7c69492cad9345")); executeTest("not passing it any reads", spec); } @@ -94,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, + baseTestString() + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d")); executeTest("getting DB tag with dbSNP", spec); } @@ -102,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, + baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688")); executeTest("getting DB tag with HM3", spec); } @@ -110,7 +110,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1, + baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variants", 1, Arrays.asList("e9c0d832dc6b4ed06c955060f830c140")); executeTest("using expression", spec); } @@ -120,7 +120,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -A HomopolymerRun -B:variant,VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1, + baseTestString() + " -A HomopolymerRun --variants:VCF " + validationDataLocation + "/" + file + " -BTI variants -NO_HEADER", 1, Arrays.asList(MD5)); executeTest("Testing lookup vcf tabix vs. vcf tribble", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 85b7ea8e7..05c0c0982 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -86,7 +86,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testDeletions() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variants:VCF3 " + validationDataLocation + "twoDeletions.vcf", 1, + baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variants:VCF " + validationDataLocation + "twoDeletions.vcf", 1, Arrays.asList("569546fd798afa0e65c5b61b440d07ac")); executeTest("test deletions", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 741e0bd17..ae64ba6f8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -17,7 +17,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; - String test1 = baseCommand + "-T VariantAnnotator -BTI variant --variants:vcf " + testVCF; + String test1 = baseCommand + "-T VariantAnnotator -BTI variants --variants:vcf " + testVCF; WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF)); List result = executeTest("Test Variant Annotator with no changes", spec1).getFirst(); From f62f47d476bbc728a7ff8742e96c34558d14de78 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 3 Aug 2011 14:27:07 -0400 Subject: [PATCH 096/186] Not sure why this didn't fail before, but bringing VE up to date with previous changes --- .../sting/utils/variantcontext/VariantContext.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 3ea1bb5d6..1712f6f7b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -440,7 +440,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return vc subcontext */ public VariantContext subContextFromGenotypes(Collection genotypes, Set alleles) { - return new VariantContext(getSource(), contig, start, stop, alleles, genotypes, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes()); + return new VariantContext(getSource(), contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), getReferenceBaseForIndel()); } From 38efd3066ceb91791e1629170eea1a66b2f9bbcb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 14:58:18 -0400 Subject: [PATCH 097/186] Bug fix for mask RodBinding --- .../sting/gatk/walkers/filters/VariantFiltrationWalker.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 4eaed4840..cad3569e5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -55,6 +55,9 @@ public class VariantFiltrationWalker extends RodWalker { @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; + @Input(fullName="mask", doc="Input ROD mask", required=false) + public RodBinding mask = RodBinding.makeUnbound(Feature.class); + @Output(doc="File to which variants should be written", required=true) protected VCFWriter writer = null; @@ -77,8 +80,6 @@ public class VariantFiltrationWalker extends RodWalker { protected Integer MASK_EXTEND = 0; @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false) protected String MASK_NAME = "Mask"; - @Input(fullName="mask", doc="Input ROD mask", required=false) - public RodBinding mask; @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?", required=false) protected Boolean FAIL_MISSING_VALUES = false; From f6563c0f9f9434501d5e59ff13f20b556c2eb47a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 15:36:55 -0400 Subject: [PATCH 098/186] Removed support for RMD in @Requires and @Allows Merge as well Conflicts: private/java/src/org/broadinstitute/sting/gatk/walkers/qc/TestVariantContextWalker.java public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java --- .../sting/gatk/GenomeAnalysisEngine.java | 28 --- .../sting/gatk/WalkerManager.java | 36 +-- .../sting/gatk/walkers/Allows.java | 1 - .../walkers/genotyper/UGCalcLikelihoods.java | 2 +- .../gatk/walkers/phasing/MergeMNPsWalker.java | 2 +- ...ergeSegregatingAlternateAllelesWalker.java | 2 +- .../walkers/phasing/PhaseByTransmission.java | 3 +- .../phasing/ReadBackedPhasingWalker.java | 2 +- .../walkers/variantutils/TestRodBindings.java | 228 ------------------ .../VariantValidationAssessor.java | 2 +- 10 files changed, 9 insertions(+), 297 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/TestRodBindings.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index c5b2a840c..b0c4e203b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -370,33 +370,6 @@ public class GenomeAnalysisEngine { throw new ArgumentException("Walker does not allow a reference but one was provided."); } - /** - * Verifies that all required reference-ordered data has been supplied, and any reference-ordered data that was not - * 'allowed' is still present. - * - * @param rods Reference-ordered data to load. - */ - protected void validateSuppliedReferenceOrderedData(List rods) { - // Check to make sure that all required metadata is present. - List allRequired = WalkerManager.getRequiredMetaData(walker); - for (RMD required : allRequired) { - boolean found = false; - for (ReferenceOrderedDataSource rod : rods) { - if (rod.matchesNameAndRecordType(required.name(), required.type())) - found = true; - } - if (!found) - throw new ArgumentException(String.format("Walker requires reference metadata to be supplied named '%s' of type '%s', but this metadata was not provided. " + - "Please supply the specified metadata file.", required.name(), required.type().getSimpleName())); - } - - // Check to see that no forbidden rods are present. - for (ReferenceOrderedDataSource rod : rods) { - if (!WalkerManager.isAllowed(walker, rod)) - throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName())); - } - } - protected void validateSuppliedIntervals() { // Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped. if(!(walker instanceof ReadWalker)) { @@ -936,7 +909,6 @@ public class GenomeAnalysisEngine { flashbackData())); // validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match. - validateSuppliedReferenceOrderedData(dataSources); validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder); return dataSources; diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java index 6aeb42faa..f053c299c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java @@ -177,19 +177,7 @@ public class WalkerManager extends PluginManager { * @return The list of allowed reference meta data. */ public static List getAllowsMetaData(Class walkerClass) { - Allows allowsDataSource = getWalkerAllowed(walkerClass); - if (allowsDataSource == null) - return Collections.emptyList(); - return Arrays.asList(allowsDataSource.referenceMetaData()); - } - - /** - * Get a list of RODs allowed by the walker. - * @param walker Walker to query. - * @return The list of allowed reference meta data. - */ - public static List getAllowsMetaData(Walker walker) { - return getAllowsMetaData(walker.getClass()); + return Collections.emptyList(); } /** @@ -226,24 +214,7 @@ public class WalkerManager extends PluginManager { * @return True if the walker forbids this data type. False otherwise. */ public static boolean isAllowed(Class walkerClass, ReferenceOrderedDataSource rod) { - Allows allowsDataSource = getWalkerAllowed(walkerClass); - - // Allows is less restrictive than requires. If an allows - // clause is not specified, any kind of data is allowed. - if( allowsDataSource == null ) - return true; - - // The difference between unspecified RMD and the empty set of metadata can't be detected. - // Treat an empty 'allows' as 'allow everything'. Maybe we can have a special RMD flag to account for this - // case in the future. - if( allowsDataSource.referenceMetaData().length == 0 ) - return true; - - for( RMD allowed: allowsDataSource.referenceMetaData() ) { - if( rod.matchesNameAndRecordType(allowed.name(),allowed.type()) ) - return true; - } - return false; + return true; } /** @@ -283,8 +254,7 @@ public class WalkerManager extends PluginManager { * @return The list of required reference meta data. */ public static List getRequiredMetaData(Class walkerClass) { - Requires requiresDataSource = getWalkerRequirements(walkerClass); - return Arrays.asList(requiresDataSource.referenceMetaData()); + return Collections.emptyList(); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java index 2541921e9..80cb30598 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Allows.java @@ -23,5 +23,4 @@ import java.lang.annotation.*; @Target(ElementType.TYPE) public @interface Allows { DataSource[] value(); - RMD[] referenceMetaData() default {}; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java index e5e78905f..4f78fab36 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java @@ -49,7 +49,7 @@ import java.util.TreeSet; * the name 'allele' so we know which alternate allele to use at each site. */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) -@Requires(value={},referenceMetaData=@RMD(name="alleles", type= VariantContext.class)) +@Requires(value={}) @Reference(window=@Window(start=-200,stop=200)) @By(DataSource.READS) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java index de62f9652..14f54ec7d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsWalker.java @@ -46,7 +46,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr * Walks along all variant ROD loci, and merges consecutive sites if they segregate in all samples in the ROD. */ @Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) +@Requires(value = {DataSource.REFERENCE}) @By(DataSource.REFERENCE_ORDERED_DATA) public class MergeMNPsWalker extends RodWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java index bf26b327d..e1be5e5c5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesWalker.java @@ -49,7 +49,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr * Walks along all variant ROD loci, and merges consecutive sites if some sample has segregating alt alleles in the ROD. */ @Allows(value = {DataSource.REFERENCE}) -@Requires(value = {DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) +@Requires(value = {DataSource.REFERENCE}) @By(DataSource.REFERENCE_ORDERED_DATA) public class MergeSegregatingAlternateAllelesWalker extends RodWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index fab4f2253..0178cdfaf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -311,8 +311,7 @@ public class PhaseByTransmission extends RodWalker { VariantContext newvc = VariantContext.modifyGenotypes(vc, genotypeMap); - vcfWriter.add(newvc); - } + vcfWriter.add(newvc); } return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index dd7c68247..8e62999b5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -56,7 +56,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr * Walks along all variant ROD loci, caching a user-defined window of VariantContext sites, and then finishes phasing them when they go out of range (using upstream and downstream reads). */ @Allows(value = {DataSource.READS, DataSource.REFERENCE}) -@Requires(value = {DataSource.READS, DataSource.REFERENCE}, referenceMetaData = @RMD(name = "variant", type = ReferenceOrderedDatum.class)) +@Requires(value = {DataSource.READS, DataSource.REFERENCE}) @By(DataSource.READS) @ReadFilters({MappingQualityZeroReadFilter.class}) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/TestRodBindings.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/TestRodBindings.java deleted file mode 100644 index 9ca7b89a6..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/TestRodBindings.java +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.variantutils; - -import org.broad.tribble.Feature; -import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; - -import java.io.PrintStream; -import java.util.*; - -/** - * Emits specific fields as dictated by the user from one or more VCF files. - */ -public class TestRodBindings extends RodWalker { - @Output(doc="File to which results should be written",required=true) - protected PrintStream out; - - @Argument(fullName="fields", shortName="F", doc="Fields to emit from the VCF, allows any VCF field, any info field, and some meta fields like nHets", required=true) - public ArrayList fieldsToTake = new ArrayList(); - - @Argument(fullName="showFiltered", shortName="raw", doc="Include filtered records") - public boolean showFiltered = false; - - @Argument(fullName="maxRecords", shortName="M", doc="Maximum number of records to emit, if provided", required=false) - public int MAX_RECORDS = -1; - int nRecords = 0; - - @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) - public boolean keepMultiAllelic = false; - - @Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false) - public boolean ALLOW_MISSING_DATA = false; - - @Input(fullName="variants", shortName="V", doc="The variant file we will convert to a table", required=true) - public RodBinding variants; - - @Input(fullName="rodList", shortName="RL", doc="A list of ROD types that we will convert to a table", required=true) - public List> variantsList; - - public void initialize() { - out.println(Utils.join("\t", fieldsToTake)); - } - - public static abstract class Getter { public abstract String get(VariantContext vc); } - public static Map getters = new HashMap(); - - static { - // #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT - getters.put("CHROM", new Getter() { public String get(VariantContext vc) { return vc.getChr(); } }); - getters.put("POS", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getStart()); } }); - getters.put("REF", new Getter() { - public String get(VariantContext vc) { - String x = ""; - if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { - Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); - x=x+new String(new byte[]{refByte}); - } - return x+vc.getReference().getDisplayString(); - } - }); - getters.put("ALT", new Getter() { - public String get(VariantContext vc) { - StringBuilder x = new StringBuilder(); - int n = vc.getAlternateAlleles().size(); - if ( n == 0 ) return "."; - if (vc.hasAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)) { - Byte refByte = (Byte)(vc.getAttribute(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY)); - x.append(new String(new byte[]{refByte})); - } - - for ( int i = 0; i < n; i++ ) { - if ( i != 0 ) x.append(","); - x.append(vc.getAlternateAllele(i).getDisplayString()); - } - return x.toString(); - } - }); - getters.put("QUAL", new Getter() { public String get(VariantContext vc) { return Double.toString(vc.getPhredScaledQual()); } }); - getters.put("TRANSITION", new Getter() { public String get(VariantContext vc) { - if ( vc.isSNP() && vc.isBiallelic() ) - return VariantContextUtils.isTransition(vc) ? "1" : "0"; - else - return "-1"; - }}); - getters.put("FILTER", new Getter() { public String get(VariantContext vc) { - return vc.isNotFiltered() ? "PASS" : Utils.join(",", vc.getFilters()); } - }); - - getters.put("HET", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount()); } }); - getters.put("HOM-REF", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomRefCount()); } }); - getters.put("HOM-VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHomVarCount()); } }); - getters.put("NO-CALL", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNoCallCount()); } }); - getters.put("VAR", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getHetCount() + vc.getHomVarCount()); } }); - getters.put("NSAMPLES", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples()); } }); - getters.put("NCALLED", new Getter() { public String get(VariantContext vc) { return Integer.toString(vc.getNSamples() - vc.getNoCallCount()); } }); - getters.put("GQ", new Getter() { public String get(VariantContext vc) { - if ( vc.getNSamples() > 1 ) throw new UserException("Cannot get GQ values for multi-sample VCF"); - return String.format("%.2f", 10 * vc.getGenotype(0).getNegLog10PError()); - }}); - } - - - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) // RodWalkers can make funky map calls - return 0; - - for ( RodBinding binding : variantsList ) - System.out.printf("VariantList binding %s tags=%s%n", binding, binding.getTags().getPositionalTags()); - - if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { - VariantContext vc = tracker.getFirstValue(variants, context.getLocation()); - if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { - List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); - out.println(Utils.join("\t", vals)); - } - return 1; - } else { - if ( nRecords >= MAX_RECORDS ) { - logger.warn("Calling sys exit to leave after " + nRecords + " records"); - System.exit(0); // todo -- what's the recommend way to abort like this? - } - return 0; - } - } - - private static final boolean isWildCard(String s) { - return s.endsWith("*"); - } - - public static List extractFields(VariantContext vc, List fields, boolean allowMissingData) { - List vals = new ArrayList(); - - for ( String field : fields ) { - String val = "NA"; - - if ( getters.containsKey(field) ) { - val = getters.get(field).get(vc); - } else if ( vc.hasAttribute(field) ) { - val = vc.getAttributeAsString(field); - } else if ( isWildCard(field) ) { - Set wildVals = new HashSet(); - for ( Map.Entry elt : vc.getAttributes().entrySet()) { - if ( elt.getKey().startsWith(field.substring(0, field.length() - 1)) ) { - wildVals.add(elt.getValue().toString()); - } - } - - if ( wildVals.size() > 0 ) { - List toVal = new ArrayList(wildVals); - Collections.sort(toVal); - val = Utils.join(",", toVal); - } - } else if ( ! allowMissingData ) { - throw new UserException(String.format("Missing field %s in vc %s at %s", field, vc.getSource(), vc)); - } - - if (field.equals("AF") || field.equals("AC")) { - String afo = val; - - double af=0; - if (afo.contains(",")) { - String[] afs = afo.split(","); - afs[0] = afs[0].substring(1,afs[0].length()); - afs[afs.length-1] = afs[afs.length-1].substring(0,afs[afs.length-1].length()-1); - - double[] afd = new double[afs.length]; - - for (int k=0; k < afd.length; k++) - afd[k] = Double.valueOf(afs[k]); - - af = MathUtils.arrayMax(afd); - //af = Double.valueOf(afs[0]); - - } - else - if (!afo.equals("NA")) - af = Double.valueOf(afo); - - val = Double.toString(af); - - } - vals.add(val); - } - - return vals; - } - - public Integer reduceInit() { - return 0; - } - - public Integer reduce(Integer counter, Integer sum) { - return counter + sum; - } - - public void onTraversalDone(Integer sum) {} -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index c6f965423..218f5377e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -48,7 +48,7 @@ import java.util.*; */ @Reference(window=@Window(start=0,stop=40)) @Requires(value={}) -public class VariantValidationAssessor extends RodWalker,Integer> { +public class VariantValidationAssessor extends RodWalker { @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; From 8981367307b5634138f9bc70573623295e259722 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 3 Aug 2011 15:48:28 -0400 Subject: [PATCH 099/186] Updating memory usage for picard programs --- .../qscripts/DataProcessingPipeline.scala | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 959d073c7..d1e734cbb 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -2,7 +2,6 @@ package org.broadinstitute.sting.queue.qscripts import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.QScript -import org.broadinstitute.sting.queue.function.ListWriterFunction import org.broadinstitute.sting.queue.extensions.picard._ import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode @@ -12,6 +11,7 @@ import net.sf.samtools.SAMFileReader import net.sf.samtools.SAMFileHeader.SortOrder import org.broadinstitute.sting.queue.util.QScriptUtils +import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction} class DataProcessingPipeline extends QScript { qscript => @@ -283,12 +283,6 @@ class DataProcessingPipeline extends QScript { ****************************************************************************/ - // General arguments to GATK walkers - trait CommandLineGATKArgs extends CommandLineGATK { - this.reference_sequence = qscript.reference - this.memoryLimit = 4 - this.isIntermediate = true - } // General arguments to non-GATK tools trait ExternalCommonArgs extends CommandLineFunction { @@ -296,6 +290,14 @@ class DataProcessingPipeline extends QScript { this.isIntermediate = true } + // General arguments to GATK walkers + trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs { + this.reference_sequence = qscript.reference + } + + trait SAMargs extends PicardBamFunction with ExternalCommonArgs { + this.maxRecordsInRam = 100000 + } case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs { if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) @@ -393,7 +395,6 @@ class DataProcessingPipeline extends QScript { case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs { this.input = List(inBam) this.output = outLog - this.maxRecordsInRam = 100000 this.REFERENCE_SEQUENCE = qscript.reference this.isIntermediate = false this.analysisName = queueLogDir + outLog + ".validate" @@ -412,8 +413,6 @@ class DataProcessingPipeline extends QScript { this.RGPL = readGroup.pl this.RGPU = readGroup.pu this.RGSM = readGroup.sm - this.memoryLimit = 4 - this.isIntermediate = true this.analysisName = queueLogDir + outBam + ".rg" this.jobName = queueLogDir + outBam + ".rg" } @@ -439,6 +438,7 @@ class DataProcessingPipeline extends QScript { @Input(doc="bwa alignment index file") var sai = inSai @Output(doc="output aligned bam file") var alignedBam = outBam def commandLine = bwaPath + " samse " + reference + " " + sai + " " + bam + " > " + alignedBam + this.memoryLimit = 6 this.analysisName = queueLogDir + outBam + ".bwa_sam_se" this.jobName = queueLogDir + outBam + ".bwa_sam_se" } @@ -449,6 +449,7 @@ class DataProcessingPipeline extends QScript { @Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2 @Output(doc="output aligned bam file") var alignedBam = outBam def commandLine = bwaPath + " sampe " + reference + " " + sai1 + " " + sai2 + " " + bam + " " + bam + " > " + alignedBam + this.memoryLimit = 6 this.analysisName = queueLogDir + outBam + ".bwa_sam_pe" this.jobName = queueLogDir + outBam + ".bwa_sam_pe" } @@ -459,6 +460,4 @@ class DataProcessingPipeline extends QScript { this.analysisName = queueLogDir + outBamList + ".bamList" this.jobName = queueLogDir + outBamList + ".bamList" } - - } From 800bb97f0b3951f4f7d074712fcd0630a504cf38 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 16:04:51 -0400 Subject: [PATCH 100/186] Removed getFeaturesAsGATKFeature and created createGenomeLoc(Feature) in genomeLocParser Updated all walkers that used the now deleted methods. --- .../gatk/refdata/RefMetaDataTracker.java | 114 +++++++++++++----- .../sting/utils/GenomeLocParser.java | 23 ++-- .../refdata/RefMetaDataTrackerUnitTest.java | 8 -- 3 files changed, 92 insertions(+), 53 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index 73685d758..ab6ce9ce9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -56,7 +56,7 @@ public class RefMetaDataTracker { else { Map tmap = new HashMap(allBindings.size()); for ( RODRecordList rod : allBindings ) { - if ( rod != null ) + if ( rod != null && ! rod.isEmpty() ) tmap.put(canonicalName(rod.getName()), rod); } @@ -141,17 +141,37 @@ public class RefMetaDataTracker { @Requires({"type != null", "onlyAtThisLoc != null"}) public T getFirstValue(final Class type, final GenomeLoc onlyAtThisLoc) { return safeGetFirst(getValues(type, onlyAtThisLoc)); + } - // - // ROD binding accessors - // + /** + * Gets all of the Tribble features bound to RodBinding spanning this locus, returning them as + * a list of specific type T extending Feature. + * + * Note that this function assumes that all of the bound features are instances of or + * subclasses of T. A ClassCastException will occur if this isn't the case. + * + * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched + * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ @Requires({"rodBinding != null"}) @Ensures("result != null") public List getValues(final RodBinding rodBinding) { return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList(1), getTrackDataByName(rodBinding), null, false, false); } + /** + * Gets all of the Tribble features bound to any RodBinding in rodBindings, + * spanning this locus, returning them as a list of specific type T extending Feature. + * + * Note that this function assumes that all of the bound features are instances of or + * subclasses of T. A ClassCastException will occur if this isn't the case. + * + * @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched + * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ @Requires({"rodBindings != null"}) @Ensures("result != null") public List getValues(final Collection> rodBindings) { @@ -161,12 +181,28 @@ public class RefMetaDataTracker { return results; } + /** + * The same logic as @link #getValues(RodBinding) but enforces that each Feature start at onlyAtThisLoc + * + * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched + * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ @Requires({"rodBinding != null", "onlyAtThisLoc != null"}) @Ensures("result != null") public List getValues(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList(1), getTrackDataByName(rodBinding), onlyAtThisLoc, true, false); } + /** + * The same logic as @link #getValues(List) but enforces that each Feature start at onlyAtThisLoc + * + * @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched + * @param The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A freshly allocated list of all of the bindings, or an empty list if none are bound. + */ @Requires({"rodBindings != null", "onlyAtThisLoc != null"}) @Ensures("result != null") public List getValues(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) { @@ -176,16 +212,44 @@ public class RefMetaDataTracker { return results; } + /** + * Uses the same logic as @getValues(RodBinding) to determine the list + * of eligible Features and select a single element from the resulting set + * of eligible features. + * + * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched + * @param as above + * @return A random single element the eligible Features found, or null if none are bound. + */ @Requires({"rodBinding != null"}) public T getFirstValue(final RodBinding rodBinding) { return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), null, false, true)); } + /** + * Uses the same logic as @getValues(RodBinding, GenomeLoc) to determine the list + * of eligible Features and select a single element from the resulting set + * of eligible features. + * + * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched + * @param as above + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A random single element the eligible Features found, or null if none are bound. + */ @Requires({"rodBinding != null", "onlyAtThisLoc != null"}) public T getFirstValue(final RodBinding rodBinding, final GenomeLoc onlyAtThisLoc) { return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), onlyAtThisLoc, true, true)); } + /** + * Uses the same logic as @getValues(List) to determine the list + * of eligible Features and select a single element from the resulting set + * of eligible features. + * + * @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched + * @param as above + * @return A random single element the eligible Features found, or null if none are bound. + */ @Requires({"rodBindings != null"}) public T getFirstValue(final Collection> rodBindings) { for ( RodBinding rodBinding : rodBindings ) { @@ -196,6 +260,16 @@ public class RefMetaDataTracker { return null; } + /** + * Uses the same logic as @getValues(RodBinding,GenomeLoc) to determine the list + * of eligible Features and select a single element from the resulting set + * of eligible features. + * + * @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched + * @param as above + * @param onlyAtThisLoc only Features starting at this site are considered + * @return A random single element the eligible Features found, or null if none are bound. + */ @Requires({"rodBindings != null", "onlyAtThisLoc != null"}) public T getFirstValue(final Collection> rodBindings, final GenomeLoc onlyAtThisLoc) { for ( RodBinding rodBinding : rodBindings ) { @@ -275,24 +349,6 @@ public class RefMetaDataTracker { return l; } - @Deprecated - public List getValuesAsGATKFeatures(final RodBinding rodBinding) { - return getValuesAsGATKFeatures(rodBinding.getName()); - } - - - /** - * get all the GATK features associated with a specific track name - * @param name the name of the track we're looking for - * @return a list of GATKFeatures for the target rmd - * - * Important: The list returned by this function is guaranteed not to be null, but may be empty! - */ - @Deprecated - public List getValuesAsGATKFeatures(final String name) { - return getTrackDataByName(name); - } - /** * Get all of the RMD tracks at the current site. Each track is returned as a single compound * object (RODRecordList) that may contain multiple RMD records associated with the current site. @@ -300,13 +356,7 @@ public class RefMetaDataTracker { * @return List of all tracks */ public List getBoundRodTracks() { - LinkedList bound = new LinkedList(); - - for ( RODRecordList value : map.values() ) { - if ( value.size() != 0 ) bound.add(value); - } - - return bound; + return new ArrayList(map.values()); } /** @@ -361,10 +411,10 @@ public class RefMetaDataTracker { values = addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly ); if ( takeFirstOnly && ! values.isEmpty() ) break; - } + } - return values; - } + return values; + } diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index a5c6e0537..8d9768681 100644 --- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -34,6 +34,7 @@ import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -443,6 +444,15 @@ public class GenomeLocParser { } } + /** + * Creates a GenomeLoc from a Tribble feature + * @param feature + * @return + */ + public GenomeLoc createGenomeLoc(final Feature feature) { + return createGenomeLoc(feature.getChr(), feature.getStart(), feature.getEnd()); + } + /** * create a new genome loc, given the contig name, and a single position. Must be on the reference * @@ -457,19 +467,6 @@ public class GenomeLocParser { return createGenomeLoc(contig, getContigIndex(contig), pos, pos); } -// /** -// * Creates a new GenomeLoc without performing any validation on its contig or bounds. -// * FOR UNIT TESTING PURPOSES ONLY! -// * -// * @param contig the contig name -// * @param start start position of the interval -// * @param stop stop position of the interval -// * @return a new GenomeLoc representing the specified location -// */ -// public GenomeLoc createGenomeLocWithoutValidation( String contig, int start, int stop ) { -// return new GenomeLoc(contig, getContigIndexWithoutException(contig), start, stop); -// } - /** * create a new genome loc from an existing loc, with a new start position * Note that this function will NOT explicitly check the ending offset, in case someone wants to diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java index 4c5bdbcda..ec05ae2a1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -181,20 +181,12 @@ public class RefMetaDataTrackerUnitTest { private void testSimpleBindings(String name, RefMetaDataTracker tracker, RODRecordList expected) { List asValues = tracker.getValues(Feature.class, name); - List asFeatures = tracker.getValuesAsGATKFeatures(name); Assert.assertEquals(tracker.hasValues(name), expected != null); - Assert.assertEquals(asFeatures.size(), expected == null ? 0 : expected.size()); Assert.assertEquals(asValues.size(), expected == null ? 0 : expected.size()); if ( expected != null ) { for ( GATKFeature e : expected ) { - boolean foundFeature = false; - for ( GATKFeature f : asFeatures ) { - if ( e.getUnderlyingObject() == f.getUnderlyingObject() ) foundFeature = true; - } - Assert.assertTrue(foundFeature, "Never found expected GATKFeature " + e + " bound to " + name + " in " + tracker); - boolean foundValue = false; for ( Feature f : asValues ) { if ( e.getUnderlyingObject() == f ) foundValue = true; From 8f696c7731f9dc603ae2bde219eb1f6af9567a6f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 17:19:28 -0400 Subject: [PATCH 101/186] Continuing progress towards RodBinding 1.0 -- Cleaning up old interface to RMDT, docs and contracts added -- Proper type checking for RodBinding for cases where the Tribble type isn't found or is the wrong type --- .../gatk/refdata/RefMetaDataTracker.java | 104 ++++-------- .../gatk/refdata/features/DbSNPHelper.java | 9 +- .../gatk/refdata/tracks/RMDTrackBuilder.java | 18 +- .../sting/gatk/walkers/PrintRODsWalker.java | 8 +- .../annotator/VariantAnnotatorEngine.java | 5 +- .../coverage/CompareCallableLociWalker.java | 3 +- .../filters/VariantFiltrationWalker.java | 8 +- .../sting/gatk/walkers/qc/CountIntervals.java | 3 +- .../walkers/qc/RodSystemValidationWalker.java | 155 ------------------ .../recalibration/CountCovariatesWalker.java | 10 +- .../variantutils/ValidateVariants.java | 3 +- .../walkers/variantutils/VariantsToVCF.java | 7 +- .../sting/utils/exceptions/UserException.java | 7 + .../sting/utils/text/ListFileUtils.java | 6 +- .../gatk/EngineFeaturesIntegrationTest.java | 13 +- .../ReferenceOrderedViewUnitTest.java | 3 +- .../refdata/RefMetaDataTrackerUnitTest.java | 5 +- ...astaAlternateReferenceIntegrationTest.java | 2 +- .../PhaseByTransmissionIntegrationTest.java | 3 +- .../qc/ValidatingPileupIntegrationTest.java | 2 +- .../ValidationAmpliconsIntegrationTest.java | 12 +- 21 files changed, 107 insertions(+), 279 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index ab6ce9ce9..b9aaf47de 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -292,21 +292,33 @@ public class RefMetaDataTracker { } /** - * Helper function for getFirst() operations that takes a list of and - * returns the first element, or null if no such element exists. + * Get all of the RMD tracks at the current site. Each track is returned as a single compound + * object (RODRecordList) that may contain multiple RMD records associated with the current site. * - * @param l - * @param - * @return + * @return List of all tracks */ - @Requires({"l != null"}) - final private T safeGetFirst(final List l) { - return l.isEmpty() ? null : l.get(0); + public List getBoundRodTracks() { + return new ArrayList(map.values()); } + /** + * The number of tracks with at least one value bound here + * @return the number of tracks with at least one bound Feature + */ + public int getNTracksWithBoundFeatures() { + return map.size(); + } + + // ------------------------------------------------------------------------------------------ // - // Deprecated accessors -- will be removed // + // old style accessors + // + // TODO -- DELETE ME + // + // + // ------------------------------------------------------------------------------------------ + @Deprecated public boolean hasValues(final String name) { return map.containsKey(canonicalName(name)); @@ -333,73 +345,27 @@ public class RefMetaDataTracker { return safeGetFirst(getValues(type, name, onlyAtThisLoc)); } - /** - * Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records - * at the current site, they all will be added to the list as separate elements. - * - * @return collection of all rods - */ - @Deprecated - public List getAllValuesAsGATKFeatures() { - List l = new ArrayList(); - for ( RODRecordList rl : map.values() ) { - if ( rl != null ) - l.addAll(rl); - } - return l; - } + // ------------------------------------------------------------------------------------------ + // + // + // Private utility functions + // + // + // ------------------------------------------------------------------------------------------ /** - * Get all of the RMD tracks at the current site. Each track is returned as a single compound - * object (RODRecordList) that may contain multiple RMD records associated with the current site. + * Helper function for getFirst() operations that takes a list of and + * returns the first element, or null if no such element exists. * - * @return List of all tracks - */ - public List getBoundRodTracks() { - return new ArrayList(map.values()); - } - - /** - * The number of tracks with at least one value bound here + * @param l + * @param * @return */ - public int getNumberOfTracksWithValue() { - int n = 0; - for ( RODRecordList value : map.values() ) { - if ( ! value.isEmpty() ) { - n++; - } - } - return n; + @Requires({"l != null"}) + final private T safeGetFirst(final List l) { + return l.isEmpty() ? null : l.get(0); } - // ------------------------------------------------------------------------------------------ - // - // - // old style Generic accessors - // - // TODO -- DELETE ME - // - // - // ------------------------------------------------------------------------------------------ - - /** - * No-assumption version of getValues(name, class). Returns Objects. - */ - @Deprecated - public List getValues(final String name) { - return (List)(List)getValues(Feature.class, name); - } - - - // ------------------------------------------------------------------------------------------ - // - // - // VariantContext helpers - // - // - // ------------------------------------------------------------------------------------------ - private List addValues(final Collection names, final Class type, List values, diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java index 675ed98fb..f62a157f0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.refdata.features; import net.sf.samtools.util.SequenceUtil; +import org.broad.tribble.Feature; import org.broad.tribble.annotation.Strand; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.utils.Utils; @@ -58,12 +59,12 @@ public class DbSNPHelper { return dbsnp; } - public static String rsIDOfFirstRealSNP(List featureList) { + public static String rsIDOfFirstRealSNP(List featureList) { if (featureList == null) return null; String rsID = null; - for ( Object d : featureList ) { + for ( Feature d : featureList ) { if ( d instanceof DbSNPFeature ) { if ( DbSNPHelper.isSNP((DbSNPFeature)d) ) { rsID = ((DbSNPFeature)d).getRsID(); @@ -80,12 +81,12 @@ public class DbSNPHelper { return rsID; } - public static String rsIDOfFirstRealIndel(List featureList) { + public static String rsIDOfFirstRealIndel(List featureList) { if (featureList == null) return null; String rsID = null; - for ( Object d : featureList ) { + for ( Feature d : featureList ) { if ( d instanceof DbSNPFeature ) { if ( DbSNPHelper.isIndel((DbSNPFeature) d) ) { rsID = ((DbSNPFeature)d).getRsID(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index 248c454d7..f285f1263 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SequenceDictionaryUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -133,7 +134,7 @@ public class RMDTrackBuilder extends PluginManager { HashMap classToRecord = new HashMap(); for (String name: this.getPluginsByName().keySet()) { FeatureCodec codec = this.createByName(name); - classToRecord.put(name, codec.getFeatureType()); + classToRecord.put(name.toUpperCase(), codec.getFeatureType()); } return classToRecord; } @@ -142,10 +143,25 @@ public class RMDTrackBuilder extends PluginManager { return getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); } + /** + * Returns the FeatureClass (BeagleFeature) produced by an RMDTriplet, or null + * if no such binding is found + * + * @param fileDescriptor + * @return + */ public Class getFeatureClass(RMDTriplet fileDescriptor) { return getAvailableTrackNamesAndRecordTypes().get(fileDescriptor.getType().toUpperCase()); } + /** + * Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load + * @return + */ + public String getAvailableTribbleFeatureNames() { + return Utils.join(",", getAvailableTrackNamesAndRecordTypes().keySet()); + } + /** * create a RMDTrack of the specified type * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java index 88fdd0f69..84549b13a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -61,11 +62,8 @@ public class PrintRODsWalker extends RodWalker { if ( tracker == null ) return 0; - Iterator rods = tracker.getAllValuesAsGATKFeatures().iterator(); - while ( rods.hasNext() ) { - Object rod = rods.next().getUnderlyingObject(); - if (VariantContextAdaptors.canBeConvertedToVariantContext(rod) ) - out.println(rod.toString()); + for ( Feature feature : tracker.getValues(Feature.class) ) { + out.println(feature.toString()); } return 1; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 9aace7213..8636736bf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -154,9 +155,9 @@ public class VariantAnnotatorEngine { String rsID = null; if (vc.isSNP()) - rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); else if (vc.isIndel()) - rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null ); // annotate dbsnp id if available and not already there if ( rsID != null && (!vc.hasID() || vc.getID().equals(VCFConstants.EMPTY_ID_FIELD)) ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java index 79de45d16..becbbeedf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java @@ -22,6 +22,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage; +import org.broad.tribble.Feature; import org.broad.tribble.bed.FullBEDFeature; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; @@ -92,7 +93,7 @@ public class CompareCallableLociWalker extends RodWalker bindings = tracker.getValues(track); + List bindings = tracker.getValues(Feature.class, track); if ( bindings.size() != 1 || ! (bindings.get(0) instanceof FullBEDFeature)) { throw new UserException.MalformedFile(String.format("%s track isn't a properly formated CallableBases object!", track)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index f3c6cd687..d8e6ad227 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -116,12 +116,8 @@ public class VariantFiltrationWalker extends RodWalker { if ( genotypeFilterExps.size() > 0 ) hInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter")); - List dataSources = getToolkit().getRodDataSources(); - for ( ReferenceOrderedDataSource source : dataSources ) { - if ( source.getName().equals("mask") ) { - hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask")); - break; - } + if ( mask.isBound() ) { + hInfo.add(new VCFFilterHeaderLine(MASK_NAME, "Overlaps a user-input mask")); } writer.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames))); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java index b4e13f879..640cff2ba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -36,7 +37,7 @@ public class CountIntervals extends RefWalker { return null; } - List checkIntervals = tracker.getValues("check"); + List checkIntervals = tracker.getValues(Feature.class, "check"); return (long) checkIntervals.size(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java deleted file mode 100644 index 22b145911..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java +++ /dev/null @@ -1,155 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.qc; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.io.*; -import java.math.BigInteger; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.Collection; -import java.util.List; - -/** - * a walker for validating (in the style of validating pile-up) the ROD system. - */ -@Reference(window=@Window(start=-40,stop=40)) -public class RodSystemValidationWalker extends RodWalker { - - // the divider to use in some of the text output - private static final String DIVIDER = ","; - - @Output - public PrintStream out; - - @Argument(fullName="PerLocusEqual",required=false,doc="Should we check that all records at the same site produce equivilent variant contexts") - public boolean allRecordsVariantContextEquivalent = false; - - // used to calculate the MD5 of a file - MessageDigest digest = null; - - // we sometimes need to know what rods the engine's seen - List rodList; - - /** - * emit the md5 sums for each of the input ROD files (will save up a lot of time if and when the ROD files change - * underneath us). - */ - public void initialize() { - // setup the MD5-er - try { - digest = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new ReviewedStingException("Unable to find MD5 checksumer"); - } - out.println("Header:"); - // enumerate the list of ROD's we've loaded - rodList = this.getToolkit().getRodDataSources(); - for (ReferenceOrderedDataSource rod : rodList) { - out.println(rod.getName() + DIVIDER + rod.getType()); - out.println(rod.getName() + DIVIDER + rod.getFile()); - out.println(rod.getName() + DIVIDER + md5sum(rod.getFile())); - } - out.println("Data:"); - } - - /** - * - * @param tracker the ref meta data tracker to get RODs - * @param ref reference context - * @param context the reads - * @return an 1 for each site with a rod(s), 0 otherwise - */ - @Override - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - int ret = 0; - if (tracker != null && tracker.getAllValuesAsGATKFeatures().size() > 0) { - out.print(context.getLocation() + DIVIDER); - Collection features = tracker.getAllValuesAsGATKFeatures(); - for (GATKFeature feat : features) - out.print(feat.getName() + DIVIDER); - out.println(";"); - ret++; - } - - // if the argument was set, check for equivalence - if (allRecordsVariantContextEquivalent && tracker != null) { - Collection col = tracker.getValues(VariantContext.class); - VariantContext con = null; - for (VariantContext contextInList : col) - if (con == null) con = contextInList; - else if (!con.equals(col)) out.println("FAIL: context " + col + " doesn't match " + con); - } - return ret; - } - - /** - * Provide an initial value for reduce computations. - * - * @return Initial value of reduce. - */ - @Override - public Integer reduceInit() { - return 0; - } - - /** - * Reduces a single map with the accumulator provided as the ReduceType. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * @return accumulator with result of the map taken into account. - */ - @Override - public Integer reduce(Integer value, Integer sum) { - return value + sum; - } - - @Override - public void onTraversalDone(Integer result) { - // Double check traversal result to make count is the same. - // TODO: Is this check necessary? - out.println("[REDUCE RESULT] Traversal result is: " + result); - } - - // shamelessly absconded and adapted from http://www.javalobby.org/java/forums/t84420.html - private String md5sum(File f) { - InputStream is; - try { - is = new FileInputStream(f); - } catch (FileNotFoundException e) { - return "Not a file"; - } - byte[] buffer = new byte[8192]; - int read = 0; - try { - while ((read = is.read(buffer)) > 0) { - digest.update(buffer, 0, read); - } - byte[] md5sum = digest.digest(); - BigInteger bigInt = new BigInteger(1, md5sum); - return bigInt.toString(16); - } - catch (IOException e) { - throw new RuntimeException("Unable to process file for MD5", e); - } - finally { - try { - is.close(); - } - catch (IOException e) { - throw new RuntimeException("Unable to close input stream for MD5 calculation", e); - } - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index d5fc4e09e..914f54363 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; @@ -291,15 +290,8 @@ public class CountCovariatesWalker extends LocusWalker 0; // Only use data from non-dbsnp sites // Assume every mismatch at a non-dbsnp site is indicative of poor quality diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 625635c89..966ec120d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.Argument; @@ -140,7 +141,7 @@ public class ValidateVariants extends RodWalker { // get the RS IDs Set rsIDs = null; if ( tracker.hasValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { - List dbsnpList = tracker.getValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME); + List dbsnpList = tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME); rsIDs = new HashSet(); for ( Object d : dbsnpList ) { if (d instanceof DbSNPFeature ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 232ae91a8..63b090b71 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import net.sf.samtools.util.CloseableIterator; +import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPCodec; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.Argument; @@ -88,7 +89,7 @@ public class VariantsToVCF extends RodWalker { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) return 0; - String rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + String rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); Collection contexts = getVariantContexts(tracker, ref); @@ -117,7 +118,7 @@ public class VariantsToVCF extends RodWalker { private Collection getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref) { // we need to special case the HapMap format because indels aren't handled correctly - List features = tracker.getValues(variants.getName()); + List features = tracker.getValues(Feature.class, variants.getName()); if ( features.size() > 0 && features.get(0) instanceof HapMapFeature ) { ArrayList hapmapVCs = new ArrayList(features.size()); for ( Object feature : features ) { @@ -223,7 +224,7 @@ public class VariantsToVCF extends RodWalker { samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); if ( samples.isEmpty() ) { - List rods = tracker.getValues(variants.getName()); + List rods = tracker.getValues(Feature.class, variants.getName()); if ( rods.size() == 0 ) throw new IllegalStateException("No rod data is present"); diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 3c3299ff5..b3524c0d8 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -87,6 +87,13 @@ public class UserException extends ReviewedStingException { } } + public static class UnknownTribbleType extends CommandLineException { + public UnknownTribbleType(String type, String message) { + super(String.format("Unknown tribble type %s: %s", type, message)); + } + } + + public static class BadTmpDir extends UserException { public BadTmpDir(String message) { super(String.format("Failure working with the tmp directory %s. Override with -Djava.io.tmpdir=X on the command line to a bigger/better file system. Exact error was %s", System.getProperties().get("java.io.tmpdir"), message)); diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 97c7c1714..82a8f86d9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -154,7 +154,11 @@ public class ListFileUtils { // validate triplet type Class typeFromTribble = builderForValidation.getFeatureClass(triplet); - if ( typeFromTribble != null && ! rodBinding.getType().isAssignableFrom(typeFromTribble) ) + if ( typeFromTribble == null ) + throw new UserException.UnknownTribbleType(rodBinding.getTribbleType(), + String.format("Field %s had provided type %s but there's no such Tribble type. Available types are %s", + rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.getAvailableTribbleFeatureNames())); + if ( ! rodBinding.getType().isAssignableFrom(typeFromTribble) ) throw new UserException.BadArgumentValue(rodBinding.getName(), String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s", rodBinding.getName(), rodBinding.getType(), typeFromTribble)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 31ba9269a..cdca08abd 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -42,28 +42,27 @@ import java.util.List; * */ public class EngineFeaturesIntegrationTest extends WalkerTest { - private void testBadRODBindingInput(String type, String name) { + private void testBadRODBindingInput(String type, String name, Class c) { WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -L 1:1 --variants:" + type + " " + b37dbSNP132 + " -R " + b37KGReference + " -o %s", - 1, UserException.class); + 1, c); executeTest(name, spec); } - @Test() private void testBadRODBindingInputType1() { - testBadRODBindingInput("beagle", "BEAGLE input to VCF expecting walker"); + testBadRODBindingInput("beagle", "BEAGLE input to VCF expecting walker", UserException.BadArgumentValue.class); } @Test() private void testBadRODBindingInputType2() { - testBadRODBindingInput("vcf3", "VCF3 input to VCF expecting walker"); + testBadRODBindingInput("vcf3", "VCF3 input to VCF expecting walker", UserException.class); } @Test() private void testBadRODBindingInputType3() { - testBadRODBindingInput("bed", "Bed input to VCF expecting walker"); + testBadRODBindingInput("bed", "Bed input to VCF expecting walker", UserException.BadArgumentValue.class); } @Test() private void testBadRODBindingInputTypeUnknownType() { - testBadRODBindingInput("bedXXX", "Unknown input to VCF expecting walker"); + testBadRODBindingInput("bedXXX", "Unknown input to VCF expecting walker", UserException.UnknownTribbleType.class); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index dbfaedc1b..f782580e2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.datasources.providers; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -70,7 +71,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest { ReferenceOrderedView view = new ManagingReferenceOrderedView( provider ); RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10), null); - Assert.assertEquals(tracker.getAllValuesAsGATKFeatures().size(), 0, "The tracker should not have produced any data"); + Assert.assertEquals(tracker.getValues(Feature.class).size(), 0, "The tracker should not have produced any data"); } /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java index ec05ae2a1..fbd30bc8a 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -27,8 +27,6 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.samtools.SAMFileHeader; import org.apache.log4j.Logger; import org.broad.tribble.Feature; -import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.Tags; @@ -38,7 +36,6 @@ import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -173,7 +170,7 @@ public class RefMetaDataTrackerUnitTest { public void testRawBindings(MyTest test) { logger.warn("Testing " + test + " for number of bound tracks"); RefMetaDataTracker tracker = test.makeTracker(); - Assert.assertEquals(tracker.getNumberOfTracksWithValue(), test.nBoundTracks()); + Assert.assertEquals(tracker.getNTracksWithBoundFeatures(), test.nBoundTracks()); testSimpleBindings("A", tracker, test.AValues); testSimpleBindings("B", tracker, test.BValues); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 325d4a960..6d2841d6f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -26,7 +26,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { WalkerTestSpec spec2 = new WalkerTestSpec( "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", 1, - Arrays.asList("3a48986c3832a768b478c3e95f994b0f")); + Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); executeTest("testFastaAlternateReferenceIndels", spec2); // TODO : Eric, update with new DBSNP diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java index f62f12082..59750e18f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -14,13 +14,14 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T PhaseByTransmission", + "-NO_HEADER", "-R " + b37KGReference, "-B:variant,VCF " + fundamentalTestVCF, "-f NA12892+NA12891=NA12878", "-o %s" ), 1, - Arrays.asList("45fef0e23113e2fcd9570379e2fc1b75") + Arrays.asList("") ); executeTest("testBasicFunctionality", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupIntegrationTest.java index c5cdf9f02..ad190fae6 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/ValidatingPileupIntegrationTest.java @@ -18,7 +18,7 @@ public class ValidatingPileupIntegrationTest extends WalkerTest { "-T ValidatingPileup" + " -I " + validationDataLocation + "MV1994.selected.bam" + " -R " + validationDataLocation + "Escherichia_coli_K12_MG1655.fasta" + - " -B:pileup,SAMPileup "+ validationDataLocation + "MV1994.selected.pileup" + + " --pileup:SAMPileup "+ validationDataLocation + "MV1994.selected.pileup" + " -S SILENT -nt 8",0, Collections.emptyList()); executeTest("testEcoliThreaded",spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java index 95f4ac0ae..0a0d8c5b2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmpliconsIntegrationTest.java @@ -19,8 +19,8 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { String siteVCF = validationDataLocation + "sites_to_validate.vcf"; String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; - String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; - testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s"; + testArgs += " --ProbeIntervals:table "+intervalTable+" -BTI ProbeIntervals --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, Arrays.asList("27f9450afa132888a8994167f0035fd7")); @@ -32,8 +32,8 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { String siteVCF = validationDataLocation + "sites_to_validate.vcf"; String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; - String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; - testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s"; + testArgs += " --ProbeIntervals:table "+intervalTable+" -BTI ProbeIntervals --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --doNotUseBWA"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, Arrays.asList("f2611ff1d9cd5bedaad003251fed8bc1")); @@ -45,8 +45,8 @@ public class ValidationAmpliconsIntegrationTest extends WalkerTest { String siteVCF = validationDataLocation + "sites_to_validate.vcf"; String maskVCF = validationDataLocation + "amplicon_mask_sites.vcf"; String intervalTable = validationDataLocation + "amplicon_interval_table1.table"; - String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons -B:ValidateAlleles,VCF "+siteVCF+" -o %s"; - testArgs += " -B:ProbeIntervals,table "+intervalTable+" -BTI ProbeIntervals -B:MaskAlleles,VCF "+maskVCF; + String testArgs = "-R " + b37KGReference + " -T ValidationAmplicons --ValidateAlleles:VCF "+siteVCF+" -o %s"; + testArgs += " --ProbeIntervals:table "+intervalTable+" -BTI ProbeIntervals --MaskAlleles:VCF "+maskVCF; testArgs += " --virtualPrimerSize 30 --filterMonomorphic"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, Arrays.asList("77b3f30e38fedad812125bdf6cf3255f")); From acbd3d092206f8ec10e80616de2602c579bd71be Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 17:26:35 -0400 Subject: [PATCH 102/186] Fixing up integration tests so more --- .../sting/gatk/walkers/PileupWalker.java | 3 +- .../beagle/BeagleOutputToVCFWalker.java | 14 +- .../beagle/ProduceBeagleInputWalker.java | 2 +- .../VariantEvalIntegrationTest.java | 788 +++++++++--------- .../LeftAlignVariantsIntegrationTest.java | 2 +- 5 files changed, 403 insertions(+), 406 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index bd661389c..e998c8452 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Collections; import java.util.List; /** @@ -71,7 +72,7 @@ public class PileupWalker extends LocusWalker implements TreeR public boolean SHOW_INDEL_PILEUPS = false; @Argument(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false) - public List> rods; + public List> rods = Collections.emptyList(); public void initialize() { } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 9a432d4bf..1c155e786 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -58,7 +58,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { public RodBinding variants; @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) - public RodBinding comp; + public RodBinding comp = RodBinding.makeUnbound(VariantContext.class); @Input(fullName="beagleR2", shortName = "beagleR2", doc="VCF file", required=true) public RodBinding beagleR2; @@ -105,14 +105,10 @@ public class BeagleOutputToVCFWalker extends RodWalker { // Open output file specified by output VCF ROD final List dataSources = this.getToolkit().getRodDataSources(); - for( final ReferenceOrderedDataSource source : dataSources ) { - if (source.getName().equals(comp.getName())) { - hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); - hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); - hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); - break; - } - + if ( comp.isBound() ) { + hInfo.add(new VCFInfoHeaderLine("ACH", 1, VCFHeaderLineType.Integer, "Allele Count from Comparison ROD at this site")); + hInfo.add(new VCFInfoHeaderLine("ANH", 1, VCFHeaderLineType.Integer, "Allele Frequency from Comparison ROD at this site")); + hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); } Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 0ccba13a2..5bca61873 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -56,7 +56,7 @@ public class ProduceBeagleInputWalker extends RodWalker { public RodBinding variants; @Input(fullName="validation", shortName = "validation", doc="Input VCF file", required=false) - public RodBinding validation; + public RodBinding validation = RodBinding.makeUnbound(VariantContext.class); @Output(doc="File to which BEAGLE input should be written",required=true) protected PrintStream beagleWriter = null; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 9a536146c..be6631d29 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -1,394 +1,394 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval; - -import org.broadinstitute.sting.WalkerTest; -import org.testng.annotations.Test; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - -public class VariantEvalIntegrationTest extends WalkerTest { - private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; - private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; - private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; - private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; - - private static String cmdRoot = "-T VariantEval" + - " -R " + b36KGReference; - - private static String root = cmdRoot + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + - " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + - " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; - - private static String rootGZ = cmdRoot + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + - " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + - " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; - - // TODO -- I can't seem to reindex this VCF using Tabix without it causing failures. Looking into it. [EB] - // private static String[] testsEnumerations = {root, rootGZ}; - private static String[] testsEnumerations = {root}; - - @Test - public void testFundamentalsCountVariantsSNPsAndIndels() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2") - ); - executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); - } - - @Test - public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST Novelty", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525") - ); - executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); - } - - @Test - public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST Novelty", - "-ST Filter", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd") - ); - executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); - } - - @Test - public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST CpG", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("677fe398643e62a10d6739d36a720a12") - ); - executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); - } - - @Test - public void testFundamentalsCountVariantsSNPsAndIndelsWithFunctionalClasses() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST FunctionalClass", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd") - ); - executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); - } - - @Test - public void testFundamentalsCountVariantsSNPsAndIndelsWithDegeneracy() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST Degeneracy", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2") - ); - executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); - } - - @Test - public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST Sample", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9") - ); - executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); - } - - @Test - public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST JexlExpression", - "-select 'DP < 20'", - "-selectName DepthSelect", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8") - ); - executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); - } - - @Test - public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST JexlExpression", - "-select 'DP < 20'", - "-selectName DepthLt20", - "-select 'DP > 20'", - "-selectName DepthGt20", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa") - ); - executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); - } - - @Test - public void testFundamentalsCountVariantsNoCompRod() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:eval,VCF " + fundamentalTestVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("d44c8f44384189a09eea85a8e89d7299") - ); - executeTest("testFundamentalsCountVariantsNoCompRod", spec); - } - - @Test - public void testSelect1() { - String extraArgs = "-L 1:1-10,000,000"; - for (String tests : testsEnumerations) { - WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42")); - executeTestParallel("testSelect1", spec); - } - } - - @Test - public void testVEGenotypeConcordance() { - String vcfFile = "GenotypeConcordanceEval.vcf"; - - WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", - 1, - Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1")); - executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); - } - - @Test - public void testCompVsEvalAC() { - String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69")); - executeTestParallel("testCompVsEvalAC",spec); - } - - private static String withSelect(String cmd, String select, String name) { - return String.format("%s -select '%s' -selectName %s", cmd, select, name); - } - - @Test - public void testTranches() { - String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9")); - executeTestParallel("testTranches",spec); - } - - @Test - public void testCompOverlap() { - String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda")); - executeTestParallel("testCompOverlap",spec); - } - - @Test - public void testEvalTrackWithoutGenotypes() { - String extraArgs = "-T VariantEval -R " + - b37KGReference + - " -L 20" + - " -B:dbsnp,vcf " + b37dbSNP132 + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + - " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738")); - executeTestParallel("testEvalTrackWithoutGenotypes",spec); - } - - @Test - public void testMultipleEvalTracksWithoutGenotypes() { - String extraArgs = "-T VariantEval -R " + b37KGReference + - " -L 20" + - " -B:dbsnp,vcf " + b37dbSNP132 + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + - " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + - " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c")); - executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); - } - - @Test - public void testMultipleCompTracks() { - String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; - - String extraArgs = "-T VariantEval" + - " -R " + b37KGReference + - " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + - " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + - " -B:dbsnp,VCF " + dbsnp + - " -L 20:10000000-10100000" + - " -noST -noEV -ST Novelty -EV CompOverlap" + - " -o %s"; - - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1")); - executeTestParallel("testMultipleCompTracks",spec); - } - - @Test - public void testPerSampleAndSubsettedSampleHaveSameResults() { - String md5 = "9d61f6e2c8592dcf616712a2c587b2af"; - - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestSNPsVCF, - "-noEV", - "-EV CompOverlap", - "-sn HG00625", - "-noST", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList(md5) - ); - executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-subset", spec); - - WalkerTestSpec spec2 = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, - "-noEV", - "-EV CompOverlap", - "-noST", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList(md5) - ); - executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2); - } - - - @Test - public void testAlleleCountStrat() { - WalkerTestSpec spec = new WalkerTestSpec( - buildCommandLine( - "-T VariantEval", - "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", - "-B:eval,VCF " + fundamentalTestSNPsVCF, - "-noEV", - "-EV CountVariants", - "-noST", - "-ST AlleleCount", - "-BTI eval", - "-o %s" - ), - 1, - Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371") - ); - executeTest("testAlleleCountStrat", spec); - } -} +//package org.broadinstitute.sting.gatk.walkers.varianteval; +// +//import org.broadinstitute.sting.WalkerTest; +//import org.testng.annotations.Test; +// +//import java.util.Arrays; +//import java.util.HashMap; +//import java.util.Map; +// +//public class VariantEvalIntegrationTest extends WalkerTest { +// private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; +// private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; +// private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; +// private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; +// +// private static String cmdRoot = "-T VariantEval" + +// " -R " + b36KGReference; +// +// private static String root = cmdRoot + +// " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + +// " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + +// " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; +// +// private static String rootGZ = cmdRoot + +// " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + +// " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + +// " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; +// +// // TODO -- I can't seem to reindex this VCF using Tabix without it causing failures. Looking into it. [EB] +// // private static String[] testsEnumerations = {root, rootGZ}; +// private static String[] testsEnumerations = {root}; +// +// @Test +// public void testFundamentalsCountVariantsSNPsAndIndels() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2") +// ); +// executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); +// } +// +// @Test +// public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-ST Novelty", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525") +// ); +// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); +// } +// +// @Test +// public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-ST Novelty", +// "-ST Filter", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd") +// ); +// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); +// } +// +// @Test +// public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-ST CpG", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("677fe398643e62a10d6739d36a720a12") +// ); +// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); +// } +// +// @Test +// public void testFundamentalsCountVariantsSNPsAndIndelsWithFunctionalClasses() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-ST FunctionalClass", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd") +// ); +// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); +// } +// +// @Test +// public void testFundamentalsCountVariantsSNPsAndIndelsWithDegeneracy() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-ST Degeneracy", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2") +// ); +// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); +// } +// +// @Test +// public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-ST Sample", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9") +// ); +// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); +// } +// +// @Test +// public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-ST JexlExpression", +// "-select 'DP < 20'", +// "-selectName DepthSelect", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8") +// ); +// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); +// } +// +// @Test +// public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-ST JexlExpression", +// "-select 'DP < 20'", +// "-selectName DepthLt20", +// "-select 'DP > 20'", +// "-selectName DepthGt20", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa") +// ); +// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); +// } +// +// @Test +// public void testFundamentalsCountVariantsNoCompRod() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:eval,VCF " + fundamentalTestVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("d44c8f44384189a09eea85a8e89d7299") +// ); +// executeTest("testFundamentalsCountVariantsNoCompRod", spec); +// } +// +// @Test +// public void testSelect1() { +// String extraArgs = "-L 1:1-10,000,000"; +// for (String tests : testsEnumerations) { +// WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", +// 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42")); +// executeTestParallel("testSelect1", spec); +// } +// } +// +// @Test +// public void testVEGenotypeConcordance() { +// String vcfFile = "GenotypeConcordanceEval.vcf"; +// +// WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", +// 1, +// Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1")); +// executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); +// } +// +// @Test +// public void testCompVsEvalAC() { +// String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; +// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69")); +// executeTestParallel("testCompVsEvalAC",spec); +// } +// +// private static String withSelect(String cmd, String select, String name) { +// return String.format("%s -select '%s' -selectName %s", cmd, select, name); +// } +// +// @Test +// public void testTranches() { +// String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; +// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9")); +// executeTestParallel("testTranches",spec); +// } +// +// @Test +// public void testCompOverlap() { +// String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; +// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda")); +// executeTestParallel("testCompOverlap",spec); +// } +// +// @Test +// public void testEvalTrackWithoutGenotypes() { +// String extraArgs = "-T VariantEval -R " + +// b37KGReference + +// " -L 20" + +// " -B:dbsnp,vcf " + b37dbSNP132 + +// " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + +// " -noST -ST Novelty -o %s"; +// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738")); +// executeTestParallel("testEvalTrackWithoutGenotypes",spec); +// } +// +// @Test +// public void testMultipleEvalTracksWithoutGenotypes() { +// String extraArgs = "-T VariantEval -R " + b37KGReference + +// " -L 20" + +// " -B:dbsnp,vcf " + b37dbSNP132 + +// " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + +// " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + +// " -noST -ST Novelty -o %s"; +// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c")); +// executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); +// } +// +// @Test +// public void testMultipleCompTracks() { +// String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; +// +// String extraArgs = "-T VariantEval" + +// " -R " + b37KGReference + +// " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + +// " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + +// " -B:dbsnp,VCF " + dbsnp + +// " -L 20:10000000-10100000" + +// " -noST -noEV -ST Novelty -EV CompOverlap" + +// " -o %s"; +// +// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1")); +// executeTestParallel("testMultipleCompTracks",spec); +// } +// +// @Test +// public void testPerSampleAndSubsettedSampleHaveSameResults() { +// String md5 = "9d61f6e2c8592dcf616712a2c587b2af"; +// +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestSNPsVCF, +// "-noEV", +// "-EV CompOverlap", +// "-sn HG00625", +// "-noST", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList(md5) +// ); +// executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-subset", spec); +// +// WalkerTestSpec spec2 = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, +// "-noEV", +// "-EV CompOverlap", +// "-noST", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList(md5) +// ); +// executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2); +// } +// +// +// @Test +// public void testAlleleCountStrat() { +// WalkerTestSpec spec = new WalkerTestSpec( +// buildCommandLine( +// "-T VariantEval", +// "-R " + b37KGReference, +// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", +// "-B:eval,VCF " + fundamentalTestSNPsVCF, +// "-noEV", +// "-EV CountVariants", +// "-noST", +// "-ST AlleleCount", +// "-BTI eval", +// "-o %s" +// ), +// 1, +// Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371") +// ); +// executeTest("testAlleleCountStrat", spec); +// } +//} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java index da6277242..2139a53e7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java @@ -38,7 +38,7 @@ public class LeftAlignVariantsIntegrationTest extends WalkerTest { @Test public void testLeftAlignment() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LeftAlignVariants -o %s -R " + b37KGReference + " -B:variant,vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER", + "-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER", 1, Arrays.asList("158b1d71b28c52e2789f164500b53732")); executeTest("test left alignment", spec); From b68ed62632fa32186f29739967e5d0a4f849913f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 18:11:22 -0400 Subject: [PATCH 103/186] Clean now removes gatkdocs --- build.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 9af8949ba..64faf4962 100644 --- a/build.xml +++ b/build.xml @@ -468,6 +468,10 @@ + + + + @@ -1076,7 +1080,7 @@ - + From 0ef85647f79be3bfcfa8af3bf9a5d16124a13713 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 18:21:18 -0400 Subject: [PATCH 104/186] A working version of a GATKReportDiffableReader for the diffEngine! --- .../sting/gatk/report/GATKReport.java | 8 +- .../sting/gatk/report/GATKReportColumn.java | 4 + .../sting/gatk/report/GATKReportTable.java | 12 + .../diffengine/GATKReportDiffableReader.java | 98 +++ .../VariantEvalIntegrationTest.java | 788 +++++++++--------- 5 files changed, 515 insertions(+), 395 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index dc3a617e7..608b5d1d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.io.*; +import java.util.Collection; import java.util.List; import java.util.TreeMap; @@ -12,6 +13,7 @@ import java.util.TreeMap; * Container class for GATK report tables */ public class GATKReport { + public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v"; private TreeMap tables = new TreeMap(); /** @@ -53,7 +55,7 @@ public class GATKReport { String line; while ( (line = reader.readLine()) != null ) { - if (line.startsWith("##:GATKReport.v")) { + if (line.startsWith(GATKREPORT_HEADER_PREFIX)) { version = GATKReportVersion.fromHeader(line); @@ -169,4 +171,8 @@ public class GATKReport { } } } + + public Collection getTables() { + return tables.values(); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 1c46b3bac..347e870c8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -106,4 +106,8 @@ public class GATKReportColumn extends TreeMap { } return value; } + + public String getColumnName() { + return columnName; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 5d38295f5..152e1a57b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -654,4 +654,16 @@ public class GATKReportTable { public int getNumRows() { return primaryKeyColumn.size(); } + + public String getTableName() { + return tableName; + } + + public String getTableDescription() { + return tableDescription; + } + + public GATKReportColumns getColumns() { + return columns; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java new file mode 100644 index 000000000..ef47ee33c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportColumn; +import org.broadinstitute.sting.gatk.report.GATKReportTable; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Map; + + +/** + * Class implementing diffnode reader for GATKReports + */ +public class GATKReportDiffableReader implements DiffableReader { + @Override + public String getName() { return "GATKReport"; } + + @Override + public DiffElement readFromFile(File file, int maxElementsToRead) { + DiffNode root = DiffNode.rooted(file.getName()); + try { + // one line reads the whole thing into memory + GATKReport report = new GATKReport(file); + + for (GATKReportTable table : report.getTables() ) { + root.add(tableToNode(table, root)); + } + + return root.getBinding(); + } catch ( Exception e ) { + return null; + } + } + + private DiffNode tableToNode(GATKReportTable table, DiffNode root) { + DiffNode tableRoot = DiffNode.empty(table.getTableName(), root); + + tableRoot.add("Description", table.getTableDescription()); + tableRoot.add("NumberOfRows", table.getNumRows()); + tableRoot.add("Version", table.getVersion()); + + for ( GATKReportColumn column : table.getColumns().values() ) { + DiffNode columnRoot = DiffNode.empty(column.getColumnName(), tableRoot); + + columnRoot.add("Width", column.getColumnWidth()); + columnRoot.add("Displayable", column.isDisplayable()); + + int n = 1; + for ( Object elt : column.values() ) { + String name = column.getColumnName() + n++; + columnRoot.add(name, elt.toString()); + } + + tableRoot.add(columnRoot); + } + + return tableRoot; + } + + @Override + public boolean canRead(File file) { + try { + final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX; + char[] buff = new char[HEADER.length()]; + new FileReader(file).read(buff, 0, HEADER.length()); + String firstLine = new String(buff); + return firstLine.startsWith(HEADER); + } catch ( IOException e ) { + return false; + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index be6631d29..9a536146c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -1,394 +1,394 @@ -//package org.broadinstitute.sting.gatk.walkers.varianteval; -// -//import org.broadinstitute.sting.WalkerTest; -//import org.testng.annotations.Test; -// -//import java.util.Arrays; -//import java.util.HashMap; -//import java.util.Map; -// -//public class VariantEvalIntegrationTest extends WalkerTest { -// private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; -// private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; -// private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; -// private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; -// -// private static String cmdRoot = "-T VariantEval" + -// " -R " + b36KGReference; -// -// private static String root = cmdRoot + -// " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + -// " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + -// " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; -// -// private static String rootGZ = cmdRoot + -// " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + -// " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + -// " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; -// -// // TODO -- I can't seem to reindex this VCF using Tabix without it causing failures. Looking into it. [EB] -// // private static String[] testsEnumerations = {root, rootGZ}; -// private static String[] testsEnumerations = {root}; -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndels() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST Novelty", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST Novelty", -// "-ST Filter", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST CpG", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("677fe398643e62a10d6739d36a720a12") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithFunctionalClasses() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST FunctionalClass", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithDegeneracy() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST Degeneracy", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST Sample", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST JexlExpression", -// "-select 'DP < 20'", -// "-selectName DepthSelect", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST JexlExpression", -// "-select 'DP < 20'", -// "-selectName DepthLt20", -// "-select 'DP > 20'", -// "-selectName DepthGt20", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsNoCompRod() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("d44c8f44384189a09eea85a8e89d7299") -// ); -// executeTest("testFundamentalsCountVariantsNoCompRod", spec); -// } -// -// @Test -// public void testSelect1() { -// String extraArgs = "-L 1:1-10,000,000"; -// for (String tests : testsEnumerations) { -// WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", -// 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42")); -// executeTestParallel("testSelect1", spec); -// } -// } -// -// @Test -// public void testVEGenotypeConcordance() { -// String vcfFile = "GenotypeConcordanceEval.vcf"; -// -// WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", -// 1, -// Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1")); -// executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); -// } -// -// @Test -// public void testCompVsEvalAC() { -// String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69")); -// executeTestParallel("testCompVsEvalAC",spec); -// } -// -// private static String withSelect(String cmd, String select, String name) { -// return String.format("%s -select '%s' -selectName %s", cmd, select, name); -// } -// -// @Test -// public void testTranches() { -// String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9")); -// executeTestParallel("testTranches",spec); -// } -// -// @Test -// public void testCompOverlap() { -// String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda")); -// executeTestParallel("testCompOverlap",spec); -// } -// -// @Test -// public void testEvalTrackWithoutGenotypes() { -// String extraArgs = "-T VariantEval -R " + -// b37KGReference + -// " -L 20" + -// " -B:dbsnp,vcf " + b37dbSNP132 + -// " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + -// " -noST -ST Novelty -o %s"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738")); -// executeTestParallel("testEvalTrackWithoutGenotypes",spec); -// } -// -// @Test -// public void testMultipleEvalTracksWithoutGenotypes() { -// String extraArgs = "-T VariantEval -R " + b37KGReference + -// " -L 20" + -// " -B:dbsnp,vcf " + b37dbSNP132 + -// " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + -// " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + -// " -noST -ST Novelty -o %s"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c")); -// executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); -// } -// -// @Test -// public void testMultipleCompTracks() { -// String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; -// -// String extraArgs = "-T VariantEval" + -// " -R " + b37KGReference + -// " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + -// " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + -// " -B:dbsnp,VCF " + dbsnp + -// " -L 20:10000000-10100000" + -// " -noST -noEV -ST Novelty -EV CompOverlap" + -// " -o %s"; -// -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1")); -// executeTestParallel("testMultipleCompTracks",spec); -// } -// -// @Test -// public void testPerSampleAndSubsettedSampleHaveSameResults() { -// String md5 = "9d61f6e2c8592dcf616712a2c587b2af"; -// -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestSNPsVCF, -// "-noEV", -// "-EV CompOverlap", -// "-sn HG00625", -// "-noST", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList(md5) -// ); -// executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-subset", spec); -// -// WalkerTestSpec spec2 = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, -// "-noEV", -// "-EV CompOverlap", -// "-noST", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList(md5) -// ); -// executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2); -// } -// -// -// @Test -// public void testAlleleCountStrat() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestSNPsVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST AlleleCount", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371") -// ); -// executeTest("testAlleleCountStrat", spec); -// } -//} +package org.broadinstitute.sting.gatk.walkers.varianteval; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +public class VariantEvalIntegrationTest extends WalkerTest { + private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; + private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; + private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; + private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; + + private static String cmdRoot = "-T VariantEval" + + " -R " + b36KGReference; + + private static String root = cmdRoot + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + + " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; + + private static String rootGZ = cmdRoot + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + + " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; + + // TODO -- I can't seem to reindex this VCF using Tabix without it causing failures. Looking into it. [EB] + // private static String[] testsEnumerations = {root, rootGZ}; + private static String[] testsEnumerations = {root}; + + @Test + public void testFundamentalsCountVariantsSNPsAndIndels() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Novelty", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Novelty", + "-ST Filter", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST CpG", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("677fe398643e62a10d6739d36a720a12") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithFunctionalClasses() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST FunctionalClass", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithDegeneracy() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Degeneracy", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Sample", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST JexlExpression", + "-select 'DP < 20'", + "-selectName DepthSelect", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST JexlExpression", + "-select 'DP < 20'", + "-selectName DepthLt20", + "-select 'DP > 20'", + "-selectName DepthGt20", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); + } + + @Test + public void testFundamentalsCountVariantsNoCompRod() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("d44c8f44384189a09eea85a8e89d7299") + ); + executeTest("testFundamentalsCountVariantsNoCompRod", spec); + } + + @Test + public void testSelect1() { + String extraArgs = "-L 1:1-10,000,000"; + for (String tests : testsEnumerations) { + WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", + 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42")); + executeTestParallel("testSelect1", spec); + } + } + + @Test + public void testVEGenotypeConcordance() { + String vcfFile = "GenotypeConcordanceEval.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", + 1, + Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1")); + executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); + } + + @Test + public void testCompVsEvalAC() { + String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69")); + executeTestParallel("testCompVsEvalAC",spec); + } + + private static String withSelect(String cmd, String select, String name) { + return String.format("%s -select '%s' -selectName %s", cmd, select, name); + } + + @Test + public void testTranches() { + String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9")); + executeTestParallel("testTranches",spec); + } + + @Test + public void testCompOverlap() { + String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda")); + executeTestParallel("testCompOverlap",spec); + } + + @Test + public void testEvalTrackWithoutGenotypes() { + String extraArgs = "-T VariantEval -R " + + b37KGReference + + " -L 20" + + " -B:dbsnp,vcf " + b37dbSNP132 + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738")); + executeTestParallel("testEvalTrackWithoutGenotypes",spec); + } + + @Test + public void testMultipleEvalTracksWithoutGenotypes() { + String extraArgs = "-T VariantEval -R " + b37KGReference + + " -L 20" + + " -B:dbsnp,vcf " + b37dbSNP132 + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + + " -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c")); + executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); + } + + @Test + public void testMultipleCompTracks() { + String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; + + String extraArgs = "-T VariantEval" + + " -R " + b37KGReference + + " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + + " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + + " -B:dbsnp,VCF " + dbsnp + + " -L 20:10000000-10100000" + + " -noST -noEV -ST Novelty -EV CompOverlap" + + " -o %s"; + + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1")); + executeTestParallel("testMultipleCompTracks",spec); + } + + @Test + public void testPerSampleAndSubsettedSampleHaveSameResults() { + String md5 = "9d61f6e2c8592dcf616712a2c587b2af"; + + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestSNPsVCF, + "-noEV", + "-EV CompOverlap", + "-sn HG00625", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList(md5) + ); + executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-subset", spec); + + WalkerTestSpec spec2 = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, + "-noEV", + "-EV CompOverlap", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList(md5) + ); + executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2); + } + + + @Test + public void testAlleleCountStrat() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestSNPsVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST AlleleCount", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371") + ); + executeTest("testAlleleCountStrat", spec); + } +} From 41b3840d26cd2992921ad406fdd20368aa863799 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 18:40:32 -0400 Subject: [PATCH 105/186] Took latest VEIT and updated to use dbsnp132 vcf --- .../VariantEvalIntegrationTest.java | 94 ++++++++----------- 1 file changed, 40 insertions(+), 54 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 9a536146c..38a852eb5 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -4,8 +4,6 @@ import org.broadinstitute.sting.WalkerTest; import org.testng.annotations.Test; import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; public class VariantEvalIntegrationTest extends WalkerTest { private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; @@ -16,27 +14,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; - private static String root = cmdRoot + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + - " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + - " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; - - private static String rootGZ = cmdRoot + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + - " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + - " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; - - // TODO -- I can't seem to reindex this VCF using Tabix without it causing failures. Looking into it. [EB] - // private static String[] testsEnumerations = {root, rootGZ}; - private static String[] testsEnumerations = {root}; - @Test public void testFundamentalsCountVariantsSNPsAndIndels() { WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -45,7 +29,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2") + Arrays.asList("bced1842c78fbabb089dd12b7087050d") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @@ -56,7 +40,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -66,7 +50,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525") + Arrays.asList("06510bd37ffaa39e817ca0dcaf8f8ac2") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -77,7 +61,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -88,7 +72,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd") + Arrays.asList("19c5b1b6396921c5b1059a2849ae4fcc") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -99,7 +83,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -109,7 +93,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("677fe398643e62a10d6739d36a720a12") + Arrays.asList("a71f8d81cf166cd97ac628092650964a") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -120,7 +104,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -130,7 +114,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd") + Arrays.asList("4dabe0658232f6174188515db6dfe112") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -141,7 +125,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -151,7 +135,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2") + Arrays.asList("3340587f10ceff83e5567ddfd1a9a60e") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @@ -162,7 +146,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -172,7 +156,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9") + Arrays.asList("c730c7ee31c8138cef6efd8dd04fbbfc") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @@ -183,7 +167,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -195,7 +179,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8") + Arrays.asList("2559ca8f454b03e81561f6947f79df18") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @@ -206,7 +190,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", @@ -220,7 +204,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa") + Arrays.asList("23aa5f97641d2fd033095f21c51d2f37") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @@ -239,7 +223,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("d44c8f44384189a09eea85a8e89d7299") + Arrays.asList("a69dd3f06903b3f374c6d6f010c653e0") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -247,11 +231,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testSelect1() { String extraArgs = "-L 1:1-10,000,000"; - for (String tests : testsEnumerations) { - WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42")); - executeTestParallel("testSelect1", spec); - } + String tests = cmdRoot + + " -B:dbsnp,VCF " + b36dbSNP129 + + " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + + " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; + WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", + 1, Arrays.asList("db95c8af8ba549d38ca6741a59fd6892")); + executeTestParallel("testSelect1", spec); } @Test @@ -260,14 +246,14 @@ public class VariantEvalIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, - Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1")); + Arrays.asList("96f27163f16bb945f19c6623cd6db34e")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); } @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d1932be3748fcf6da77dc51aec323710")); executeTestParallel("testCompVsEvalAC",spec); } @@ -278,14 +264,14 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testTranches() { String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe")); executeTestParallel("testTranches",spec); } @Test public void testCompOverlap() { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("462d4784dd55294ef9d5118217b157a5")); executeTestParallel("testCompOverlap",spec); } @@ -294,10 +280,10 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L 20" + - " -B:dbsnp,vcf " + b37dbSNP132 + + " -B:dbsnp,VCF " + b37dbSNP132 + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("61c36fb6cc75172e2b22a44edeae85e0")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -305,11 +291,11 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testMultipleEvalTracksWithoutGenotypes() { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L 20" + - " -B:dbsnp,vcf " + b37dbSNP132 + + " -B:dbsnp,VCF " + b37dbSNP132 + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("79089484097614b7ab81bbc3ad3a892a")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -326,19 +312,19 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9f906c04a4553d649b51ae67e0a25113")); executeTestParallel("testMultipleCompTracks",spec); } @Test public void testPerSampleAndSubsettedSampleHaveSameResults() { - String md5 = "9d61f6e2c8592dcf616712a2c587b2af"; + String md5 = "97a16a99a43d2384cfabc39d36647419"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestSNPsVCF, "-noEV", "-EV CompOverlap", @@ -356,7 +342,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, "-noEV", "-EV CompOverlap", @@ -377,7 +363,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:dbsnp,VCF " + b37dbSNP132, "-B:eval,VCF " + fundamentalTestSNPsVCF, "-noEV", "-EV CountVariants", @@ -387,7 +373,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371") + Arrays.asList("44464fe7c89a56cf128a932ef640f7da") ); executeTest("testAlleleCountStrat", spec); } From d8f1ebf8c69b1d1ca94f6517a857c4d040bb4737 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 20:06:00 -0400 Subject: [PATCH 106/186] Parameterized RecalibrationWalkers with clean unstable database --- .../RecalibrationWalkersIntegrationTest.java | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 97748cf7f..ecef7ca90 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -13,7 +13,6 @@ import java.io.File; public class RecalibrationWalkersIntegrationTest extends WalkerTest { static HashMap paramsFiles = new HashMap(); - static HashMap paramsFilesNoReadGroupTest = new HashMap(); static HashMap paramsFilesSolidIndels = new HashMap(); private static final class CCTest extends TestDataProvider { @@ -24,15 +23,19 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { this.file = file; this.md5 = md5; } + + public String toString() { + return "CCTest: " + file; + } } @DataProvider(name = "cctestdata") public Object[][] createCCTestData() { - new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "" ); - new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", ""); - new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "" ); - new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "" ); + new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "5a52b00d9794d27af723bcf93366681e" ); + new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941"); + new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" ); + new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "64e9f17a1cf6fc04c1f2717c2d2eca67" ); return CCTest.getTests(CCTest.class); } @@ -78,14 +81,18 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { this.file = file; this.md5 = md5; } + + public String toString() { + return "TRTest: " + file; + } } @DataProvider(name = "trtestdata") public Object[][] createTRTestData() { - new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "" ); - new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", ""); - new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "" ); - new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "" ); + new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "2864f231fab7030377f3c8826796e48f" ); + new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "c164dd635721ba6df3f06dac1877c32d"); + new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "74314e5562c1a65547bb0edaacffe602" ); + new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "2a37c6001826bfabf87063b1dfcf594f" ); return TRTest.getTests(TRTest.class); } @@ -115,7 +122,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesUseOriginalQuals() { HashMap e = new HashMap(); - e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", ""); + e.put( validationDataLocation + "originalQuals.1kg.chr1.1-1K.bam", "278846c55d97bd9812b758468a83f559"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -139,7 +146,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "2864f231fab7030377f3c8826796e48f" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -168,7 +175,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "8379f24cf5312587a1f92c162ecc220f" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -194,7 +201,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorSolidIndelsRemoveRefBias() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "" ); + e.put( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "7d5edb75b176e4151de225f699719ee4" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -222,7 +229,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -246,7 +253,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesBED() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "b460478d9683e827784e42bc352db8bb"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -270,7 +277,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCFPlusDBsnp() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", ""); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "9131d96f39badbf9753653f55b148012"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -298,7 +305,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "8993d32df5cb66c7149f59eccbd57f4c" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -324,7 +331,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoIndex() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "5f913c98ca99754902e9d34f99df468f" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); From d0279bb28c4b2d8c6ebf7bf9b0c565813f470f5e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 20:48:11 -0400 Subject: [PATCH 107/186] RodBinding names are now defaulting to the ArgumentTypeDescriptor fullname Nearly all of the tools are passing integrationtests --- .../commandline/ArgumentTypeDescriptor.java | 2 +- .../walkers/qc/RodSystemValidationWalker.java | 179 ++++++++++++++++++ .../commandline/ParsingEngineUnitTest.java | 35 +++- .../LeftAlignVariantsIntegrationTest.java | 2 +- .../VariantContextIntegrationTest.java | 18 +- 5 files changed, 219 insertions(+), 17 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 8685487ee..0882f5385 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -312,7 +312,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); try { - String name = source.field.getName(); + String name = defaultDefinition.fullName; String tribbleType; Tags tags = getArgumentTags(matches); // must have one or two tag values here diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java new file mode 100644 index 000000000..edfaea768 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.gatk.walkers.Reference; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.*; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Collection; +import java.util.List; + +/** + * a walker for validating (in the style of validating pile-up) the ROD system. + */ +@Reference(window=@Window(start=-40,stop=40)) +public class RodSystemValidationWalker extends RodWalker { + + // the divider to use in some of the text output + private static final String DIVIDER = ","; + + @Output + public PrintStream out; + + @Argument(fullName="PerLocusEqual",required=false,doc="Should we check that all records at the same site produce equivilent variant contexts") + public boolean allRecordsVariantContextEquivalent = false; + + // used to calculate the MD5 of a file + MessageDigest digest = null; + + // we sometimes need to know what rods the engine's seen + List rodList; + + /** + * emit the md5 sums for each of the input ROD files (will save up a lot of time if and when the ROD files change + * underneath us). + */ + public void initialize() { + // setup the MD5-er + try { + digest = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException e) { + throw new ReviewedStingException("Unable to find MD5 checksumer"); + } + out.println("Header:"); + // enumerate the list of ROD's we've loaded + rodList = this.getToolkit().getRodDataSources(); + for (ReferenceOrderedDataSource rod : rodList) { + out.println(rod.getName() + DIVIDER + rod.getType()); + out.println(rod.getName() + DIVIDER + rod.getFile()); + out.println(rod.getName() + DIVIDER + md5sum(rod.getFile())); + } + out.println("Data:"); + } + + /** + * + * @param tracker the ref meta data tracker to get RODs + * @param ref reference context + * @param context the reads + * @return an 1 for each site with a rod(s), 0 otherwise + */ + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + int ret = 0; + if (tracker != null && tracker.getNTracksWithBoundFeatures() > 0) { + out.print(context.getLocation() + DIVIDER); + for (RODRecordList rod: tracker.getBoundRodTracks()) + out.print(rod.getName() + DIVIDER); + out.println(";"); + ret++; + } + + // if the argument was set, check for equivalence + if (allRecordsVariantContextEquivalent && tracker != null) { + Collection col = tracker.getValues(VariantContext.class); + VariantContext con = null; + for (VariantContext contextInList : col) + if (con == null) con = contextInList; + else if (!con.equals(col)) out.println("FAIL: context " + col + " doesn't match " + con); + } + return ret; + } + + /** + * Provide an initial value for reduce computations. + * + * @return Initial value of reduce. + */ + @Override + public Integer reduceInit() { + return 0; + } + + /** + * Reduces a single map with the accumulator provided as the ReduceType. + * + * @param value result of the map. + * @param sum accumulator for the reduce. + * @return accumulator with result of the map taken into account. + */ + @Override + public Integer reduce(Integer value, Integer sum) { + return value + sum; + } + + @Override + public void onTraversalDone(Integer result) { + // Double check traversal result to make count is the same. + // TODO: Is this check necessary? + out.println("[REDUCE RESULT] Traversal result is: " + result); + } + + // shamelessly absconded and adapted from http://www.javalobby.org/java/forums/t84420.html + private String md5sum(File f) { + InputStream is; + try { + is = new FileInputStream(f); + } catch (FileNotFoundException e) { + return "Not a file"; + } + byte[] buffer = new byte[8192]; + int read = 0; + try { + while ((read = is.read(buffer)) > 0) { + digest.update(buffer, 0, read); + } + byte[] md5sum = digest.digest(); + BigInteger bigInt = new BigInteger(1, md5sum); + return bigInt.toString(16); + } + catch (IOException e) { + throw new RuntimeException("Unable to process file for MD5", e); + } + finally { + try { + is.close(); + } + catch (IOException e) { + throw new RuntimeException("Unable to close input stream for MD5 calculation", e); + } + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index ddd07106c..63e1a59bd 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -631,7 +631,7 @@ public class ParsingEngineUnitTest extends BaseTest { // -------------------------------------------------------------------------------- private class SingleRodBindingArgProvider { - @Input(shortName="V", required=false) + @Input(fullName="binding", shortName="V", required=false) public RodBinding binding = RodBinding.makeUnbound(Feature.class); } @@ -653,6 +653,29 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } + private class ShortNameOnlyRodBindingArgProvider { + @Input(shortName="short", required=false) + public RodBinding binding = RodBinding.makeUnbound(Feature.class); + } + + @Test + public void shortNameOnlyRodBindingArgumentTest() { + final String[] commandLine = new String[] {"-short:vcf","foo.vcf"}; + + parsingEngine.addArgumentSource( ShortNameOnlyRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + ShortNameOnlyRodBindingArgProvider argProvider = new ShortNameOnlyRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); + Assert.assertEquals(argProvider.binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.isBound(), true, "Bound() isn't returning its expected value"); + Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); + } + @Test public void unbasicRodBindingArgumentTest() { final String[] commandLine = new String[] {}; @@ -696,7 +719,7 @@ public class ParsingEngineUnitTest extends BaseTest { } private class VariantContextRodBindingArgProvider { - @Input(shortName="V") + @Input(fullName = "binding", shortName="V") public RodBinding binding; } @@ -735,7 +758,7 @@ public class ParsingEngineUnitTest extends BaseTest { } private class ListRodBindingArgProvider { - @Input(shortName="V", required=false) + @Input(fullName = "binding", shortName="V", required=false) public List> bindings; } @@ -752,7 +775,7 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.bindings.size(), 1, "Unexpected number of bindings"); RodBinding binding = argProvider.bindings.get(0); - Assert.assertEquals(binding.getName(), "bindings", "Name isn't set properly"); + Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly"); Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); @@ -772,13 +795,13 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.bindings.size(), 2, "Unexpected number of bindings"); RodBinding binding = argProvider.bindings.get(0); - Assert.assertEquals(binding.getName(), "bindings", "Name isn't set properly"); + Assert.assertEquals(binding.getName(), "binding", "Name isn't set properly"); Assert.assertEquals(binding.getSource(), "foo.vcf", "Source isn't set to its expected value"); Assert.assertEquals(binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); RodBinding binding2 = argProvider.bindings.get(1); - Assert.assertEquals(binding2.getName(), "bindings2", "Name isn't set properly"); + Assert.assertEquals(binding2.getName(), "binding2", "Name isn't set properly"); Assert.assertEquals(binding2.getSource(), "bar.vcf", "Source isn't set to its expected value"); Assert.assertEquals(binding2.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(binding2.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java index 2139a53e7..2f77a8f55 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java @@ -38,7 +38,7 @@ public class LeftAlignVariantsIntegrationTest extends WalkerTest { @Test public void testLeftAlignment() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER", + "-T LeftAlignVariants -o %s -R " + b37KGReference + " --variants:vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER", 1, Arrays.asList("158b1d71b28c52e2789f164500b53732")); executeTest("test left alignment", spec); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 772112026..b6fa89303 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -30,15 +30,15 @@ public class VariantContextIntegrationTest extends WalkerTest { @DataProvider(name = "VCITTestData") public Object[][] createVCITTestData() { - new VCITTest("--printPerLocus", "f36b81b8bcd210c0e3a1058d791b78ec"); - new VCITTest("--printPerLocus --onlyContextsOfType SNP", "a77492ba003a1fca8d8e0227fa642f34"); - new VCITTest("--printPerLocus --onlyContextsOfType INDEL", "9e0375a1b680d7df0971dbf256944d7a"); - new VCITTest("--printPerLocus --onlyContextsOfType MIXED", "93628cbba30033398e7e680b92cb3680"); - new VCITTest("--printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); - new VCITTest("--printPerLocus --takeFirstOnly", "c4a3d7545d26880635e0e5e4e69952e2"); - new VCITTest("--printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "22a7bb9e63d5f2950322c26397670e5c"); - new VCITTest("--printPerLocus --onlyContextsStartinAtCurrentPosition", "6387c1a400d1872ae4394d01e533c296"); - new VCITTest("--printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "dde3a3db4d9c57f5042e0dfe03380987"); + new VCITTest("--printPerLocus", ""); + new VCITTest("--printPerLocus --onlyContextsOfType SNP", ""); + new VCITTest("--printPerLocus --onlyContextsOfType INDEL", ""); + new VCITTest("--printPerLocus --onlyContextsOfType MIXED", ""); + new VCITTest("--printPerLocus --onlyContextsOfType NO_VARIATION", ""); + new VCITTest("--printPerLocus --takeFirstOnly", ""); + new VCITTest("--printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", ""); + new VCITTest("--printPerLocus --onlyContextsStartinAtCurrentPosition", ""); + new VCITTest("--printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", ""); return VCITTest.getTests(VCITTest.class); } From a831af11665092d5a29188aece6cfc449cae75b2 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 3 Aug 2011 21:29:21 -0400 Subject: [PATCH 108/186] Another misprint when removing the references to -D --- .../recalibration/RecalibrationWalkersPerformanceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java index f89b80ead..43ea401f7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java @@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L chr1:1-50,000,000" + " -standard" + " -OQ" + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_hg18.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); From 490ca475fc07661826a39287ce7076a05a1d6ca8 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 22:15:22 -0400 Subject: [PATCH 109/186] Replacing hardcoded dbsnp129 with BaseTest variable --- .../java/test/org/broadinstitute/sting/BaseTest.java | 2 +- .../annotator/VariantAnnotatorIntegrationTest.java | 2 +- .../fasta/FastaAlternateReferenceIntegrationTest.java | 2 +- .../genotyper/UnifiedGenotyperPerformanceTest.java | 6 +++--- .../indels/RealignerTargetCreatorIntegrationTest.java | 2 +- .../RecalibrationWalkersIntegrationTest.java | 10 +++++----- .../variantcontext/VariantContextIntegrationTest.java | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index e864e5754..5e46f8f6f 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -64,7 +64,7 @@ public abstract class BaseTest { public static final String b37Refseq = refseqAnnotationLocation + "refGene-big-table-b37.txt"; public static final String dbsnpDataLocation = GATKDataLocation; - public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf"; + public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_129_b36.vcf"; public static final String b37dbSNP132 = dbsnpDataLocation + "dbsnp_132_b37.vcf"; public static final String hapmapDataLocation = comparisonDataLocation + "Validated/HapMap/3.3/"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 84e52f037..fc50f67f2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -94,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, + baseTestString() + " -B:dbsnp,vcf " + b36dbSNP129 + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d")); executeTest("getting DB tag with dbSNP", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 6d2841d6f..be2f3cdaa 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -24,7 +24,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { executeTest("testFastaReference", spec1b); WalkerTestSpec spec2 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", 1, Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); executeTest("testFastaAlternateReferenceIndels", spec2); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java index 738580ab1..d271d78b1 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java @@ -15,7 +15,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-50,000,000" + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); @@ -30,7 +30,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,vcf " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); @@ -46,7 +46,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -L chr1:1-50,000,000" + " -nt 10" + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,vcf " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java index f5ed69476..aabf01415 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -17,7 +17,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest { executeTest("test standard", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - "-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", + "-T RealignerTargetCreator -B:dbsnp,vcf " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", 1, Arrays.asList("0367d39a122c8ac0899fb868a82ef728")); executeTest("test dbsnp", spec2); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index ecef7ca90..469425dcd 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -55,7 +55,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,vcf " + b36dbSNP129 + " -T CountCovariates" + " -I " + bam + ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) @@ -136,7 +136,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -standard" + " -OQ" + " -recalFile %s" + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", + " -B:dbsnp,vcf " + b36dbSNP129, 1, // just one output file Arrays.asList(md5)); executeTest("testCountCovariatesUseOriginalQuals", spec); @@ -183,7 +183,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,vcf " + b36dbSNP129 + " -T CountCovariates" + " -I " + bam + " -standard" + @@ -288,7 +288,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,vcf " + b36dbSNP129 + " -L 1:10,000,000-10,200,000" + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + @@ -313,7 +313,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,vcf " + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" + diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index b6fa89303..7cdb6af95 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -15,7 +15,7 @@ public class VariantContextIntegrationTest extends WalkerTest { " -R " + b36KGReference; private static String root = cmdRoot + - " -L 1:1-1,000,000 -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -L 1:1-1,000,000 -B:dbsnp,vcf " + b36dbSNP129 + " -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; private static final class VCITTest extends TestDataProvider { From 23ec5b94cf55b4dcc29fac64a4552774abaa97eb Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 4 Aug 2011 09:50:02 -0400 Subject: [PATCH 110/186] fixed a missing check for null There was a missed check for the case when you don't provide an indels vcf for the cleaner. --- .../sting/queue/qscripts/DataProcessingPipeline.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 959d073c7..bef3495bf 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -303,7 +303,7 @@ class DataProcessingPipeline extends QScript { this.out = outIntervals this.mismatchFraction = 0.0 this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) - if (!indels.isEmpty) + if (indels != null) this.rodBind :+= RodBind("indels", "VCF", indels) this.scatterCount = nContigs this.analysisName = queueLogDir + outIntervals + ".target" @@ -315,7 +315,7 @@ class DataProcessingPipeline extends QScript { this.targetIntervals = tIntervals this.out = outBam this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) - if (!qscript.indels.isEmpty) + if (qscript.indels != null) this.rodBind :+= RodBind("indels", "VCF", qscript.indels) this.consensusDeterminationModel = consensusDeterminationModel this.compress = 0 From f0d798d47c226b4a326c936ea08fd46849517357 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 4 Aug 2011 12:06:10 -0400 Subject: [PATCH 111/186] Bug fix: call RodBinding.resetNameCounter() in new ParsingEngine() so that we don't magically misnumber arguments in the integration tests where the GATK is only instantiated once. --- .../src/org/broadinstitute/sting/commandline/ParsingEngine.java | 1 + .../recalibration/RecalibrationWalkersIntegrationTest.java | 1 - .../sting/utils/codecs/vcf/VCFIntegrationTest.java | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index a580a1240..d85d45719 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -91,6 +91,7 @@ public class ParsingEngine { protected static Logger logger = Logger.getLogger(ParsingEngine.class); public ParsingEngine( CommandLineProgram clp ) { + RodBinding.resetNameCounter(); parsingMethods.add( ParsingMethod.FullNameParsingMethod ); parsingMethods.add( ParsingMethod.ShortNameParsingMethod ); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 469425dcd..f87b43dfa 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -31,7 +31,6 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @DataProvider(name = "cctestdata") public Object[][] createCCTestData() { - new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "5a52b00d9794d27af723bcf93366681e" ); new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941"); new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" ); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index ae64ba6f8..a89c0315c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -17,7 +17,7 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; - String test1 = baseCommand + "-T VariantAnnotator -BTI variants --variants:vcf " + testVCF; + String test1 = baseCommand + "-T VariantAnnotator --variants:vcf " + testVCF + " -BTI variants"; WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF)); List result = executeTest("Test Variant Annotator with no changes", spec1).getFirst(); From 21dc9a55434eee8870956d504cfc0cc4a3167124 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 4 Aug 2011 12:31:28 -0400 Subject: [PATCH 112/186] Adding mills/devine indel dataset to the resource bundle --- .../sting/queue/qscripts/GATKResourcesBundle.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 934cf2a3c..4f1fe741a 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -134,6 +134,9 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf", "1000G_indels_for_realignment", b37, true, false)) + addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf", + "indels_mills_devine", b37, true, true)) + // // example call set for wiki tutorial // From f10588420c11e8db007a2c975e61206e68a71299 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 4 Aug 2011 12:36:24 -0400 Subject: [PATCH 113/186] Fixing path to dbSNP file as the other one was replaced --- .../indels/RealignerTargetCreatorIntegrationTest.java | 2 +- .../RecalibrationWalkersIntegrationTest.java | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java index f5ed69476..60312dbd2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -17,7 +17,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest { executeTest("test standard", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - "-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", + "-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_129_b36.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", 1, Arrays.asList("0367d39a122c8ac0899fb868a82ef728")); executeTest("test dbsnp", spec2); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 049f44845..e81d2670c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -30,7 +30,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -T CountCovariates" + " -I " + bam + ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) @@ -97,7 +97,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -standard" + " -OQ" + " -recalFile %s" + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf", 1, // just one output file Arrays.asList(md5)); executeTest("testCountCovariatesUseOriginalQuals", spec); @@ -144,7 +144,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -T CountCovariates" + " -I " + bam + " -standard" + @@ -249,7 +249,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -L 1:10,000,000-10,200,000" + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + @@ -275,7 +275,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" + From d2078f09b2b8687db1f990da23c9f0d43e6c1b85 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 4 Aug 2011 12:47:55 -0400 Subject: [PATCH 114/186] Minor fixes to ITs --- .../recalibration/RecalibrationWalkersIntegrationTest.java | 2 +- .../gatk/walkers/varianteval/VariantEvalIntegrationTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index f87b43dfa..74f803ac6 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -312,7 +312,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,vcf " + + " -B:dbsnp,vcf " + b36dbSNP129 + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 38a852eb5..057491e1e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -295,7 +295,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("79089484097614b7ab81bbc3ad3a892a")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("79895904a3c2799e2e384a933d10f3fc")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } From e48492f3c3fb0621c894195cecc0d621620f7b2d Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 4 Aug 2011 12:48:56 -0400 Subject: [PATCH 115/186] Validate that the reference padding base for indels is correct. --- .../gatk/walkers/variantutils/ValidateVariants.java | 4 ++-- .../sting/utils/variantcontext/VariantContext.java | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 0644c669b..0de405d97 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -154,10 +154,10 @@ public class ValidateVariants extends RodWalker { try { switch( type ) { case ALL: - vc.extraStrictValidation(observedRefAllele, rsIDs); + vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs); break; case REF: - vc.validateReferenceBases(observedRefAllele); + vc.validateReferenceBases(observedRefAllele, ref.getBase()); break; case IDS: vc.validateRSIDs(rsIDs); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 1712f6f7b..fff1961c6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1055,11 +1055,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati * Run all extra-strict validation tests on a Variant Context object * * @param reference the true reference allele + * @param paddedRefBase the reference base used for padding indels * @param rsIDs the true dbSNP IDs */ - public void extraStrictValidation(Allele reference, Set rsIDs) { + public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set rsIDs) { // validate the reference - validateReferenceBases(reference); + validateReferenceBases(reference, paddedRefBase); // validate the RS IDs validateRSIDs(rsIDs); @@ -1074,11 +1075,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati //checkReferenceTrack(); } - public void validateReferenceBases(Allele reference) { + public void validateReferenceBases(Allele reference, Byte paddedRefBase) { // don't validate if we're an insertion if ( !reference.isNull() && !reference.basesMatch(getReference()) ) { throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); } + + // we also need to validate the padding base for simple indels + if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) + throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), (char)getReferenceBaseForIndel().byteValue(), (char)paddedRefBase.byteValue())); } public void validateRSIDs(Set rsIDs) { From 98a96f07c163043586826b4d511f2efdc91bca1c Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 4 Aug 2011 14:06:26 -0400 Subject: [PATCH 116/186] Updated standard deviation parameter in VQSR to our current recommended value --- .../VariantRecalibratorArgumentCollection.java | 2 +- .../VariantRecalibrationWalkersIntegrationTest.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java index e1a815913..28b279ccd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java @@ -50,7 +50,7 @@ public class VariantRecalibratorArgumentCollection { @Argument(fullName="numKMeans", shortName="nKM", doc="The number of k-means iterations to perform in order to initialize the means of the Gaussians in the Gaussian mixture model.", required=false) public int NUM_KMEANS_ITERATIONS = 30; @Argument(fullName="stdThreshold", shortName="std", doc="If a variant has annotations more than -std standard deviations away from mean then don't use it for building the Gaussian mixture model.", required=false) - public double STD_THRESHOLD = 8.0; + public double STD_THRESHOLD = 14.0; @Argument(fullName="qualThreshold", shortName="qual", doc="If a known variant has raw QUAL value less than -qual then don't use it for building the Gaussian mixture model.", required=false) public double QUAL_THRESHOLD = 80.0; @Argument(fullName="shrinkage", shortName="shrinkage", doc="The shrinkage parameter in variational Bayes algorithm.", required=false) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 057053a1c..3ac7e3785 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -26,9 +26,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", - "d33212a84368e821cbedecd4f59756d6", // tranches - "4652dca41222bebdf9d9fda343b2a835", // recal file - "243a397a33a935fcaccd5deb6d16f0c0"); // cut VCF + "0ddd1e0e483d2eaf56004615cea23ec7", // tranches + "58780f63182e139fdbe17f6c18b5b774", // recal file + "f67d844b6252a55452cf4167b77530b1"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { From a8eb8c27f037675369a768eb24489da582d7e274 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 4 Aug 2011 15:34:49 -0400 Subject: [PATCH 117/186] a) Minor changes to indel consensus scripts to better reflect good default values, b) Fixed up Mills/Devine codec so it always produces correct ref padded bases, and added option to VariantsToVCF to fix reference base --- .../walkers/variantrecalibration/VariantDataManager.java | 8 ++++++++ .../sting/gatk/walkers/variantutils/VariantsToVCF.java | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 7426a7726..b7f71c1ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -240,6 +240,14 @@ public class VariantDataManager { if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } + if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) { + // normalize QD by event length for indel case + int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now + if (eventLength > 0) // sanity check + value /= (double)eventLength; + + } + if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } } catch( Exception e ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index c9b63878d..2afa315ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -67,6 +67,9 @@ public class VariantsToVCF extends RodWalker { @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false) protected String sampleName = null; + @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false) + protected boolean fixReferenceBase = false; + private Set allowedGenotypeFormatStrings = new HashSet(); private boolean wroteHeader = false; @@ -104,6 +107,10 @@ public class VariantsToVCF extends RodWalker { vc = VariantContext.modifyGenotypes(vc, genotypes); } + // todo - fix me. This may not be the cleanest way to handle features what need correct indel padding + if (fixReferenceBase) { + vc = new VariantContext("Variant",vc.getChr(),vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.getFilters(),vc.getAttributes(), ref.getBase()); + } writeRecord(vc, tracker, ref.getBase()); } From 9be1ee59cc64ebab682066eb3713d5c4421185f1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 4 Aug 2011 18:07:50 -0400 Subject: [PATCH 118/186] TODO comments for Eric --- .../walkers/varianteval/VariantEvalWalker.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 9e6ffec28..c26729ed3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -231,6 +231,22 @@ public class VariantEvalWalker extends RodWalker implements Tr for ( String sampleName : sampleNamesForStratification ) { VariantContext eval = vcs.containsKey(evalName) && vcs.get(evalName) != null ? vcs.get(evalName).get(sampleName) : null; + // todo: Eric, this is really the problem. We select single eval and comp VCs independently + // todo: discarding multiple eval tracks at the sites and not providing matched comps + // todo: where appropriate. Really this loop should look like: + // todo: for each eval track: + // todo: for each eval in track: + // todo: for each compTrack: + // todo: comp = findMatchingComp(eval, compTrack) // find the matching comp in compTrack + // todo: call evalModule(eval, comp) + // todo: // may return null if no such comp exists, but proceed as eval modules may need to see eval / null pair + // todo: for each comp not matched by an eval in compTrack: + // todo: call evalModule(null, comp) + // todo: // need to call with null comp, as module + // todo: note that the reason Kiran pre-computed the possible VCs is to apply the modifiers + // todo: like subset to sample, etc. So you probably will want a master map that maps + // todo: from special eval bindings to the digested VC for efficiency. + if ( typesToUse != null ) { if ( eval != null && ! typesToUse.contains(eval.getType()) ) eval = null; if ( comp != null && ! typesToUse.contains(comp.getType()) ) comp = null; From 9308fbe3fb9f707fecfd27063c580de19a1a598e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 4 Aug 2011 18:08:47 -0400 Subject: [PATCH 119/186] VariantEval Integration Test parameterized for new novelty stratification --- .../varianteval/VariantEvalIntegrationTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 057491e1e..8fa5f0c29 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -236,7 +236,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("db95c8af8ba549d38ca6741a59fd6892")); + 1, Arrays.asList("14054badcd89b24c2375e1d09918f681")); executeTestParallel("testSelect1", spec); } @@ -283,7 +283,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:dbsnp,VCF " + b37dbSNP132 + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("61c36fb6cc75172e2b22a44edeae85e0")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("0897dfba2f4a245faddce38000555cce")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -295,7 +295,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("79895904a3c2799e2e384a933d10f3fc")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ead3602e14ec2944b5d9e4dacc08c819")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -312,13 +312,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9f906c04a4553d649b51ae67e0a25113")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("167a347ce0729d1bc3d4fd5069ebd674")); executeTestParallel("testMultipleCompTracks",spec); } @Test public void testPerSampleAndSubsettedSampleHaveSameResults() { - String md5 = "97a16a99a43d2384cfabc39d36647419"; + String md5 = "40471a84b501eb440ee2d42e3081f228"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( From f21f7f633572dccd7d4d34f46dcf67f1925d9338 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 4 Aug 2011 18:28:59 -0400 Subject: [PATCH 120/186] SelectVariants fully documented, now the shining example of the new RodBinding system. --- .../walkers/variantutils/SelectVariants.java | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 063b005a6..6776798c3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -55,9 +55,35 @@ import java.util.*; */ @Requires(value={}) public class SelectVariants extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + /** + * The VCF file we are selecting variants from. + * + * Variants from this file are sent through the filtering and modifying routines as directed + * by the arguments to SelectVariants, and finally are emitted. + */ + @Input(fullName="variants", shortName = "V", doc="Select variants from this VCF file", required=true) public RodBinding variants; + /** + * If provided, we will filter out variants that are "discordant" to the variants in this file + * + * A site is considered discordant if there exists some sample in eval that has a non-reference genotype + * and either the site isn't present in this track, the sample isn't present in this track, + * or the sample is called reference in this track. + */ + @Input(fullName="discordance", shortName = "disc", doc="Output variants that were not called in this Feature comparison track", required=false) + private RodBinding discordanceTrack = RodBinding.makeUnbound(VariantContext.class); + + /** + * If provided, we will filter out any variant in variants that isn't "concordant" with the variants in this track. + * + * A site is considered concordant if (1) we are not looking for specific samples and there is a variant called + * in both variants and concordance tracks or (2) every sample present in eval is present in the concordance + * track and they have the sample genotype call. + */ + @Input(fullName="concordance", shortName = "conc", doc="Output variants that were also called in this Feature comparison track", required=false) + private RodBinding concordanceTrack = RodBinding.makeUnbound(VariantContext.class); + @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; @@ -82,12 +108,6 @@ public class SelectVariants extends RodWalker { @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't include filtered loci.", required=false) private boolean KEEP_ORIGINAL_CHR_COUNTS = false; - @Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track", required=false) - private RodBinding discordanceTrack = RodBinding.makeUnbound(VariantContext.class); - - @Argument(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track", required=false) - private RodBinding concordanceTrack = RodBinding.makeUnbound(VariantContext.class); - @Hidden @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) private boolean KEEP_AF_SPECTRUM = false; From d7f98e5c2a174578ff4b967f9500631201f6d5ca Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 4 Aug 2011 18:48:34 -0400 Subject: [PATCH 121/186] Fixed merge conflict deleting a { --- .../sting/gatk/walkers/variantutils/VariantsToVCF.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index e3511ab11..07c5e71a6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -112,11 +112,13 @@ public class VariantsToVCF extends RodWalker { vc = VariantContext.modifyGenotypes(vc, genotypes); } - // todo - fix me. This may not be the cleanest way to handle features what need correct indel padding - if (fixReferenceBase) { - vc = new VariantContext("Variant",vc.getChr(),vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.getFilters(),vc.getAttributes(), ref.getBase()); + // todo - fix me. This may not be the cleanest way to handle features what need correct indel padding + if (fixReferenceBase) { + vc = new VariantContext("Variant",vc.getChr(),vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.getFilters(),vc.getAttributes(), ref.getBase()); + } + + writeRecord(vc, tracker, ref.getBase()); } - writeRecord(vc, tracker, ref.getBase()); } return 1; From 14e43c3382be946eb8337c5fe04256ab38847cc5 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 4 Aug 2011 21:52:39 -0400 Subject: [PATCH 122/186] Final fix to RodBindingUnitTest to reset global counter variable --- .../broadinstitute/sting/commandline/RodBindingUnitTest.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java index a32157a41..9a20ba880 100644 --- a/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java @@ -35,6 +35,11 @@ import org.testng.annotations.Test; public class RodBindingUnitTest extends BaseTest { Tags mytags = new Tags(); + @BeforeMethod + public void setUp() { + RodBinding.resetNameCounter(); + } + @Test public void testStandardRodBinding() { RodBinding b = new RodBinding(VariantContext.class, "b", "foo", "vcf", mytags); From 573700d18d0ff7538a84fd3017dbc7c3336281f4 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 4 Aug 2011 21:57:00 -0400 Subject: [PATCH 123/186] Adding missing import --- .../org/broadinstitute/sting/commandline/RodBindingUnitTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java index 9a20ba880..206f32532 100644 --- a/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/RodBindingUnitTest.java @@ -28,6 +28,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.testng.Assert; import org.testng.annotations.Test; +import org.testng.annotations.BeforeMethod; /** * Test suite for the parsing engine. From eaa2f16d837aeec9ad6d4e35d6ff9607972f7c89 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Sat, 6 Aug 2011 10:42:04 -0400 Subject: [PATCH 124/186] When a job finishes successfully in the ShellJobRunner, mark it as DONE instead of FAILED. --- .../sting/queue/engine/shell/ShellJobRunner.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala index 128d8773c..03f9d3315 100755 --- a/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/engine/shell/ShellJobRunner.scala @@ -52,7 +52,7 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu updateStatus(RunnerStatus.RUNNING) job.run() - updateStatus(RunnerStatus.FAILED) + updateStatus(RunnerStatus.DONE) } override def checkUnknownStatus() {} From f049461120b0f333964011a5e1ad20cc93f61992 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Sat, 6 Aug 2011 20:44:19 -0400 Subject: [PATCH 125/186] Changed @Argument to @Input on input RodBindings. Changed shortname collision with longname. Restored scala builds. Updated HSP to use new syntax. --- build.xml | 12 ++++++------ .../sting/gatk/walkers/PileupWalker.java | 3 ++- .../walkers/filters/VariantFiltrationWalker.java | 2 +- .../gatk/walkers/variantutils/SelectVariants.java | 4 ++-- .../java/test/org/broadinstitute/sting/BaseTest.java | 1 + 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/build.xml b/build.xml index 9af8949ba..438e9c90c 100644 --- a/build.xml +++ b/build.xml @@ -168,7 +168,7 @@ - + @@ -973,11 +973,11 @@ - - - - - + + + + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java index e998c8452..6243a6cc0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PileupWalker.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers; import org.broad.tribble.Feature; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -71,7 +72,7 @@ public class PileupWalker extends LocusWalker implements TreeR @Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events") public boolean SHOW_INDEL_PILEUPS = false; - @Argument(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false) + @Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false) public List> rods = Collections.emptyList(); public void initialize() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index d8e6ad227..0daabfa45 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -78,7 +78,7 @@ public class VariantFiltrationWalker extends RodWalker { @Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered; [default:0]", required=false) protected Integer MASK_EXTEND = 0; - @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false) + @Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false) protected String MASK_NAME = "Mask"; @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, should missing values be considered failing the expression (by default they are considered passing)?", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 063b005a6..a74349727 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -82,10 +82,10 @@ public class SelectVariants extends RodWalker { @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't include filtered loci.", required=false) private boolean KEEP_ORIGINAL_CHR_COUNTS = false; - @Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track", required=false) + @Input(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track", required=false) private RodBinding discordanceTrack = RodBinding.makeUnbound(VariantContext.class); - @Argument(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track", required=false) + @Input(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track", required=false) private RodBinding concordanceTrack = RodBinding.makeUnbound(VariantContext.class); @Hidden diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index e864e5754..a95f044ec 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -65,6 +65,7 @@ public abstract class BaseTest { public static final String dbsnpDataLocation = GATKDataLocation; public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf"; + public static final String b37dbSNP129 = dbsnpDataLocation + "dbsnp_129_b37.vcf"; public static final String b37dbSNP132 = dbsnpDataLocation + "dbsnp_132_b37.vcf"; public static final String hapmapDataLocation = comparisonDataLocation + "Validated/HapMap/3.3/"; From b0e91f85cfa35265024392dd022fecbef9fa9330 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 7 Aug 2011 10:33:20 -0400 Subject: [PATCH 126/186] fix merge from Khalid's Queue fix --- .../sting/gatk/walkers/variantutils/SelectVariants.java | 6 ------ public/java/test/org/broadinstitute/sting/BaseTest.java | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 3e7b5d2bd..6776798c3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -108,12 +108,6 @@ public class SelectVariants extends RodWalker { @Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't include filtered loci.", required=false) private boolean KEEP_ORIGINAL_CHR_COUNTS = false; - @Argument(fullName="discordance", shortName = "disc", doc="Output variants that were not called on a ROD comparison track", required=false) - private RodBinding discordanceTrack = RodBinding.makeUnbound(VariantContext.class); - - @Argument(fullName="concordance", shortName = "conc", doc="Output variants that were also called on a ROD comparison track", required=false) - private RodBinding concordanceTrack = RodBinding.makeUnbound(VariantContext.class); - @Hidden @Argument(fullName="keepAFSpectrum", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) private boolean KEEP_AF_SPECTRUM = false; diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index e864e5754..5e46f8f6f 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -64,7 +64,7 @@ public abstract class BaseTest { public static final String b37Refseq = refseqAnnotationLocation + "refGene-big-table-b37.txt"; public static final String dbsnpDataLocation = GATKDataLocation; - public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf"; + public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_129_b36.vcf"; public static final String b37dbSNP132 = dbsnpDataLocation + "dbsnp_132_b37.vcf"; public static final String hapmapDataLocation = comparisonDataLocation + "Validated/HapMap/3.3/"; From ece8f0db5e7d8aac29c396e80bc59bccda2259e7 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 7 Aug 2011 11:26:07 -0400 Subject: [PATCH 127/186] Added b37dbSNP129, needed for Queue --- public/java/test/org/broadinstitute/sting/BaseTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index 5e46f8f6f..7a749c0a2 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -65,6 +65,7 @@ public abstract class BaseTest { public static final String dbsnpDataLocation = GATKDataLocation; public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_129_b36.vcf"; + public static final String b37dbSNP129 = dbsnpDataLocation + "dbsnp_129_b37.vcf"; public static final String b37dbSNP132 = dbsnpDataLocation + "dbsnp_132_b37.vcf"; public static final String hapmapDataLocation = comparisonDataLocation + "Validated/HapMap/3.3/"; From 1d8b1bae0a77a50ac00d9dc89a88ee47baf6cceb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 7 Aug 2011 13:32:26 -0400 Subject: [PATCH 129/186] Need to rename the integration test argument -mask to -maskName --- .../walkers/filters/VariantFiltrationIntegrationTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 05c0c0982..f613407e2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -31,17 +31,17 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testMasks() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseTestString() + " -mask foo --mask:VCF3 " + validationDataLocation + "vcfexample2.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -maskName foo --mask:VCF3 " + validationDataLocation + "vcfexample2.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("578f9e774784c25871678e6464fd212b")); executeTest("test mask all", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseTestString() + " -mask foo --mask:VCF " + validationDataLocation + "vcfMask.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -maskName foo --mask:VCF " + validationDataLocation + "vcfMask.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f")); executeTest("test mask some", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( - baseTestString() + " -mask foo -maskExtend 10 --mask:VCF " + validationDataLocation + "vcfMask.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + validationDataLocation + "vcfMask.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("5939f80d14b32d88587373532d7b90e5")); executeTest("test mask extend", spec3); } From 1c63d43176458f0052254af45ffb7670b853bd52 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 7 Aug 2011 15:02:46 -0400 Subject: [PATCH 130/186] Help now points to GATKDocs instead of spitting out full, garbled description --- .../sting/gatk/CommandLineGATK.java | 11 ++--- .../help/DocumentedGATKFeatureHandler.java | 4 +- .../sting/utils/help/GATKDocWorkUnit.java | 48 ++++++++++++++----- .../sting/utils/help/GATKDoclet.java | 20 +++++++- 4 files changed, 59 insertions(+), 24 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 2af29ea70..f8c84116d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.GATKDoclet; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; @@ -175,12 +176,8 @@ public class CommandLineGATK extends CommandLineExecutable { StringBuilder additionalHelp = new StringBuilder(); Formatter formatter = new Formatter(additionalHelp); - formatter.format("Description:%n"); - - WalkerManager walkerManager = engine.getWalkerManager(); - String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType); - - printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH); + formatter.format("For a full description of this walker, see its GATKdocs at:%n"); + formatter.format("%s%n", GATKDoclet.helpLinksToGATKDocs(walkerType)); return additionalHelp.toString(); } @@ -194,8 +191,6 @@ public class CommandLineGATK extends CommandLineExecutable { StringBuilder additionalHelp = new StringBuilder(); Formatter formatter = new Formatter(additionalHelp); - formatter.format("Available analyses:%n"); - // Get the list of walker names from the walker manager. WalkerManager walkerManager = engine.getWalkerManager(); diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java index 366df0c3a..c561ea54b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java @@ -50,8 +50,8 @@ public abstract class DocumentedGATKFeatureHandler { public boolean shouldBeProcessed(ClassDoc doc) { return true; } - public String getDestinationFilename(ClassDoc doc) { - return HelpUtils.getClassName(doc).replace(".", "_") + ".html"; + public String getDestinationFilename(ClassDoc doc, Class clazz) { + return GATKDoclet.htmlFilenameForClass(clazz); } public abstract String getTemplateName(ClassDoc doc) throws IOException; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java index 65c6624d5..1f6db2757 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java @@ -30,19 +30,29 @@ import java.util.HashMap; import java.util.Map; /** -* Created by IntelliJ IDEA. -* User: depristo -* Date: 7/24/11 -* Time: 7:59 PM -* To change this template use File | Settings | File Templates. -*/ -public class GATKDocWorkUnit implements Comparable { - // known at the start - final String name, filename, group; - final DocumentedGATKFeatureHandler handler; - final ClassDoc classDoc; + * Simple collection of all relevant information about something the GATKDoclet can document + * + * Created by IntelliJ IDEA. + * User: depristo + * Date: 7/24/11 + * Time: 7:59 PM + */ +class GATKDocWorkUnit implements Comparable { + /** The class that's being documented */ final Class clazz; + /** The name of the thing we are documenting */ + final String name; + /** the filename where we will be writing the docs for this class */ + final String filename; + /** The name of the documentation group (e.g., walkers, read filters) class belongs to */ + final String group; + /** The documentation handler for this class */ + final DocumentedGATKFeatureHandler handler; + /** The javadoc documentation for clazz */ + final ClassDoc classDoc; + /** The annotation that lead to this Class being in GATKDoc */ final DocumentedGATKFeature annotation; + /** When was this walker built, and what's the absolute version number */ final String buildTimestamp, absoluteVersion; // set by the handler @@ -64,12 +74,21 @@ public class GATKDocWorkUnit implements Comparable { this.absoluteVersion = absoluteVersion; } + /** + * Called by the GATKDoclet to set handler provided context for this work unit + * @param summary + * @param forTemplate + */ public void setHandlerContent(String summary, Map forTemplate) { this.summary = summary; this.forTemplate = forTemplate; } - public Map toMap() { + /** + * Return a String -> String map suitable for FreeMarker to create an index to this WorkUnit + * @return + */ + public Map indexDataMap() { Map data = new HashMap(); data.put("name", name); data.put("summary", summary); @@ -78,6 +97,11 @@ public class GATKDocWorkUnit implements Comparable { return data; } + /** + * Sort in order of the name of this WorkUnit + * @param other + * @return + */ public int compareTo(GATKDocWorkUnit other) { return this.name.compareTo(other.name); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 49214237a..2647b6983 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -42,6 +42,9 @@ import java.util.*; * */ public class GATKDoclet { + private final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/"; + private final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/"; + private final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/"; final protected static File SETTINGS_DIR = new File("settings/helpTemplates"); final protected static File DESTINATION_DIR = new File("gatkdocs"); final protected static Logger logger = Logger.getLogger(GATKDoclet.class); @@ -89,6 +92,19 @@ public class GATKDoclet { return showHiddenFeatures; } + public static String htmlFilenameForClass(Class c) { + return c.getName().replace(".", "_") + ".html"; + } + + public static String helpLinksToGATKDocs(Class c) { + String classPath = htmlFilenameForClass(c); + StringBuilder b = new StringBuilder(); + b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n"); + b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n"); + b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n"); + return b.toString(); + } + public Set workUnits() { TreeSet m = new TreeSet(); @@ -103,7 +119,7 @@ public class GATKDoclet { DocumentedGATKFeatureHandler handler = createHandler(doc, feature); if ( handler != null && handler.shouldBeProcessed(doc) ) { logger.info("Going to generate documentation for class " + doc); - String filename = handler.getDestinationFilename(doc); + String filename = handler.getDestinationFilename(doc, clazz); GATKDocWorkUnit unit = new GATKDocWorkUnit(doc.name(), filename, feature.groupName(), feature, handler, doc, clazz, @@ -220,7 +236,7 @@ public class GATKDoclet { Set docFeatures = new HashSet(); List> data = new ArrayList>(); for ( GATKDocWorkUnit workUnit : indexData ) { - data.add(workUnit.toMap()); + data.add(workUnit.indexDataMap()); docFeatures.add(workUnit.annotation); } From 5f8bc3aa8adba5cce015027a016c96066ff3dd82 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 7 Aug 2011 15:17:50 -0400 Subject: [PATCH 131/186] Documenting classes, and name cleanup --- .../help/DocumentedGATKFeatureHandler.java | 45 ++++++++++++++++++- .../sting/utils/help/GATKDoclet.java | 2 +- .../help/GenericDocumentationHandler.java | 2 +- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java index c561ea54b..44ad308b3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java @@ -31,29 +31,70 @@ import java.io.*; import java.util.Set; /** - * + * Extend this class to provide a documentation handler for GATKdocs */ public abstract class DocumentedGATKFeatureHandler { private GATKDoclet doclet; + /** + * @return the javadoc RootDoc of this javadoc run + */ protected RootDoc getRootDoc() { return this.doclet.rootDoc; } + /** Set the master doclet driving this handler */ public void setDoclet(GATKDoclet doclet) { this.doclet = doclet; } + /** + * @return the GATKDoclet driving this documentation run + */ public GATKDoclet getDoclet() { return doclet; } - public boolean shouldBeProcessed(ClassDoc doc) { return true; } + /** + * Should return false iff this handler wants GATKDoclet to skip documenting + * this ClassDoc. + * @param doc that is being considered for inclusion in the docs + * @return true if the doclet should document ClassDoc doc + */ + public boolean includeInDocs(ClassDoc doc) { return true; } + /** + * Return the flat filename (no paths) that the handler would like the Doclet to + * write out the documentation for ClassDoc doc and its associated Class clazz + * @param doc + * @param clazz + * @return + */ public String getDestinationFilename(ClassDoc doc, Class clazz) { return GATKDoclet.htmlFilenameForClass(clazz); } + /** + * Return the name of the FreeMarker template we will use to process ClassDoc doc. + * + * Note this is a flat filename relative to settings/helpTemplates in the GATK source tree + * @param doc + * @return + * @throws IOException + */ public abstract String getTemplateName(ClassDoc doc) throws IOException; + + /** + * Actually generate the documentation map associated with toProcess + * + * Can use all to provide references and rootDoc for additional information, if necessary. + * Implementing methods should end with a call to setHandlerContext on toProcess, as in: + * + * toProcess.setHandlerContent(summary, rootMap); + * + * @param rootDoc + * @param toProcess + * @param all + */ public abstract void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcess, Set all); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 2647b6983..24bbcf1fb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -117,7 +117,7 @@ public class GATKDoclet { DocumentedGATKFeature feature = getFeatureForClassDoc(doc); DocumentedGATKFeatureHandler handler = createHandler(doc, feature); - if ( handler != null && handler.shouldBeProcessed(doc) ) { + if ( handler != null && handler.includeInDocs(doc) ) { logger.info("Going to generate documentation for class " + doc); String filename = handler.getDestinationFilename(doc, clazz); GATKDocWorkUnit unit = new GATKDocWorkUnit(doc.name(), diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index c69345816..3ca24dc35 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -51,7 +51,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { RootDoc rootDoc; @Override - public boolean shouldBeProcessed(ClassDoc doc) { + public boolean includeInDocs(ClassDoc doc) { return true; // try { // Class type = HelpUtils.getClassForDoc(doc); From 526b524c3c95b7c3185291087debc936463efdbb Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Sun, 7 Aug 2011 20:16:51 -0400 Subject: [PATCH 132/186] CombineVariants with new RodBinding. Bugfix -- CombineVariants now uses the new RodBinding syntax, -V / --variants. Passed all integration tests on first run -- Exposed gapping bug in the List> system now fixed. ParserEngine now has a addRodBinding() that is called by RodBindingArgumentTypeDescriptor when it encounters each RodBinding. This allows the system to work with collection types that are recursively parsed by the system. --- .../commandline/ArgumentTypeDescriptor.java | 1 + .../sting/commandline/ParsingEngine.java | 16 +++++-- .../walkers/variantutils/CombineVariants.java | 45 ++++++++++--------- .../CombineVariantsIntegrationTest.java | 21 +++++---- 4 files changed, 48 insertions(+), 35 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 0882f5385..622576747 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -330,6 +330,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { Class parameterType = getParameterizedTypeClass(type); RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); parsingEngine.addTags(result,tags); + parsingEngine.addRodBinding(result); return result; } catch (InvocationTargetException e) { throw new UserException.CommandLineException( diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index d85d45719..9b543142b 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.commandline; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -330,7 +331,17 @@ public class ParsingEngine { if(!tags.containsKey(key)) return new Tags(); return tags.get(key); - } + } + + /** + * Add a RodBinding type argument to this parser. Called during parsing to allow + * us to track all of the RodBindings discovered in the command line. + * @param rodBinding the rodbinding to add. Must not be added twice + */ + @Requires("rodBinding != null") + public void addRodBinding(final RodBinding rodBinding) { + rodBindings.add(rodBinding); + } /** * Notify the user that a deprecated command-line argument has been used. @@ -367,9 +378,6 @@ public class ParsingEngine { Object value = (argumentMatches.size() != 0) ? source.parse(this,argumentMatches) : source.createTypeDefault(this); JVMUtils.setFieldValue(source.field,target,value); - - if ( value instanceof RodBinding ) - rodBindings.add((RodBinding)value); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index e918d5ce8..7905c2c32 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; @@ -52,8 +50,23 @@ import java.util.*; * priority list (if provided), emits a single record instance at every position represented in the rods. */ @Reference(window=@Window(start=-50,stop=50)) -@Requires(value={}) public class CombineVariants extends RodWalker { + /** + * The VCF files to merge together + * + * variants can take any number of arguments on the command line. Each -V argument + * will be included in the final merged output VCF. If no explicit name is provided, + * the -V arguments will be named using the default algorithm: variants, variants2, variants3, etc. + * The user can override this by providing an explicit name -V:name,vcf for each -V argument, + * and each named argument will be labeled as such in the output (i.e., set=name rather than + * set=variants2). The order of arguments does not matter unless except for the naming, so + * if you provide an rod priority list and no explicit names than variants, variants2, etc + * are techincally order dependent. It is strongly recommended to provide explicit names when + * a rod priority list is provided. + */ + @Input(fullName = "variants", shortName = "V", doc="The VCF files to merge together", required=true) + public List> variantsToMerge; + @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; @@ -85,10 +98,6 @@ public class CombineVariants extends RodWalker { @Argument(fullName="minimumN", shortName="minN", doc="Combine variants and output site only if variant is present in at least N input files.", required=false) public int minimumN = 1; - @Hidden - @Argument(fullName="masterMerge", shortName="master", doc="Master merge mode -- experts only. You need to look at the code to understand it", required=false) - public boolean master = false; - @Hidden @Argument(fullName="mergeInfoWithMaxAC", shortName="mergeInfoWithMaxAC", doc="If true, when VCF records overlap the info field is taken from the one with the max AC instead of only taking the fields which are identical across the overlapping records.", required=false) public boolean MERGE_INFO_WITH_MAX_AC = false; @@ -148,7 +157,7 @@ public class CombineVariants extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getValues(VariantContext.class, context.getLocation()); + Collection vcs = tracker.getValues(variantsToMerge, context.getLocation()); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); @@ -172,17 +181,13 @@ public class CombineVariants extends RodWalker { return 0; List mergedVCs = new ArrayList(); - if ( master ) { - mergedVCs.add(VariantContextUtils.masterMerge(vcs, "master")); - } else { - Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); - // iterate over the types so that it's deterministic - for ( VariantContext.Type type : VariantContext.Type.values() ) { - if ( VCsByType.containsKey(type) ) - mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), - priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, - SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); - } + Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); + // iterate over the types so that it's deterministic + for ( VariantContext.Type type : VariantContext.Type.values() ) { + if ( VCsByType.containsKey(type) ) + mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } for ( VariantContext mergedVC : mergedVCs ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 9b152bc71..d27ab34a0 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -44,7 +44,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { public void test1InOut(String file, String md5, String args, boolean vcf3) { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -priority v1 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args), + baseTestString(" -priority v1 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args), 1, Arrays.asList(md5)); executeTest("testInOut1--" + file, spec); @@ -52,7 +52,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { public void combine2(String file1, String file2, String args, String md5, boolean vcf3) { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -priority v1,v2 -B:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -B:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args), + baseTestString(" -priority v1,v2 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -V:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args), 1, Arrays.asList(md5)); executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); @@ -63,8 +63,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { String file2 = "hapmap_3.3.b37.sites.vcf"; WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference - + " -L 1:1-10,000,000 -B:omni,VCF " + validationDataLocation + file1 - + " -B:hm3,VCF " + validationDataLocation + file2 + args, + + " -L 1:1-10,000,000 -V:omni,VCF " + validationDataLocation + file1 + + " -V:hm3,VCF " + validationDataLocation + file2 + args, 1, Arrays.asList(md5)); executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); @@ -91,10 +91,10 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -B:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + - " -B:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + - " -B:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + - " -B:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + + baseTestString(" -V:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + + " -V:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + + " -V:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + + " -V:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + " -setKey centerSet" + " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + @@ -104,15 +104,14 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("threeWayWithRefs", spec); } - // complex examples with filtering, indels, and multiple alleles public void combineComplexSites(String args, String md5) { String file1 = "combine.1.vcf"; String file2 = "combine.2.vcf"; WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference - + " -B:one,VCF " + validationDataLocation + file1 - + " -B:two,VCF " + validationDataLocation + file2 + args, + + " -V:one,VCF " + validationDataLocation + file1 + + " -V:two,VCF " + validationDataLocation + file2 + args, 1, Arrays.asList(md5)); executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); From 4d6cb33612a1233e17584726cee216526b6ad77c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 8 Aug 2011 10:49:28 -0400 Subject: [PATCH 134/186] removing temporary bam index The clean bai file was left behind after the data processing pipeline was done --- .../sting/queue/qscripts/DataProcessingPipeline.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 116d16f35..47ba0220f 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -313,6 +313,7 @@ class DataProcessingPipeline extends QScript { } case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input_file :+= inBams this.targetIntervals = tIntervals this.out = outBam From 0db79207e87ac563ce0b7629936c053de8313b48 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 8 Aug 2011 12:27:13 -0400 Subject: [PATCH 135/186] Refactored dependancy from CommandLineGATK from javadocs This allows us to run the GATK again in environments without Javadoc loading by default in the classpath --- .../sting/gatk/CommandLineGATK.java | 3 +- .../help/DocumentedGATKFeatureHandler.java | 2 +- .../sting/utils/help/GATKDocUtils.java | 48 +++++++++++++++++++ .../sting/utils/help/GATKDoclet.java | 16 ------- 4 files changed, 51 insertions(+), 18 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index f8c84116d..7e96b609e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.GATKDocUtils; import org.broadinstitute.sting.utils.help.GATKDoclet; import org.broadinstitute.sting.utils.text.TextFormattingUtils; @@ -177,7 +178,7 @@ public class CommandLineGATK extends CommandLineExecutable { Formatter formatter = new Formatter(additionalHelp); formatter.format("For a full description of this walker, see its GATKdocs at:%n"); - formatter.format("%s%n", GATKDoclet.helpLinksToGATKDocs(walkerType)); + formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType)); return additionalHelp.toString(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java index 44ad308b3..ce03c8093 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java @@ -71,7 +71,7 @@ public abstract class DocumentedGATKFeatureHandler { * @return */ public String getDestinationFilename(ClassDoc doc, Class clazz) { - return GATKDoclet.htmlFilenameForClass(clazz); + return GATKDocUtils.htmlFilenameForClass(clazz); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java new file mode 100644 index 000000000..8efeecd7b --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +/** + * @author depristo + * @since 8/8/11 + */ +public class GATKDocUtils { + private final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/"; + private final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/"; + private final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/"; + + public static String htmlFilenameForClass(Class c) { + return c.getName().replace(".", "_") + ".html"; + } + + public static String helpLinksToGATKDocs(Class c) { + String classPath = htmlFilenameForClass(c); + StringBuilder b = new StringBuilder(); + b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n"); + b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n"); + b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n"); + return b.toString(); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 24bbcf1fb..f278e593d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -42,9 +42,6 @@ import java.util.*; * */ public class GATKDoclet { - private final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/"; - private final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/"; - private final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/"; final protected static File SETTINGS_DIR = new File("settings/helpTemplates"); final protected static File DESTINATION_DIR = new File("gatkdocs"); final protected static Logger logger = Logger.getLogger(GATKDoclet.class); @@ -92,19 +89,6 @@ public class GATKDoclet { return showHiddenFeatures; } - public static String htmlFilenameForClass(Class c) { - return c.getName().replace(".", "_") + ".html"; - } - - public static String helpLinksToGATKDocs(Class c) { - String classPath = htmlFilenameForClass(c); - StringBuilder b = new StringBuilder(); - b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n"); - b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n"); - b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n"); - return b.toString(); - } - public Set workUnits() { TreeSet m = new TreeSet(); From 8072bd9831e0000bf49dbcf6ba689068765789f3 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 8 Aug 2011 12:35:39 -0400 Subject: [PATCH 136/186] Updating resource bundle generation qscript for changeover to git --- .../queue/qscripts/GATKResourcesBundle.scala | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 4f1fe741a..59c00b8cd 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -13,7 +13,7 @@ class GATKResourcesBundle extends QScript { var gatkJarFile: File = new File("dist/GenomeAnalysisTK.jar") @Argument(doc="liftOverPerl", required=false) - var liftOverPerl: File = new File("./perl/liftOverVCF.pl") + var liftOverPerl: File = new File("./public/perl/liftOverVCF.pl") @Argument(shortName = "ver", doc="The SVN version of this release", required=true) var VERSION: String = _ @@ -57,11 +57,11 @@ class GATKResourcesBundle extends QScript { //Console.printf("liftover(%s => %s)%n", inRef.name, outRef.name) (inRef.name, outRef.name) match { case ("b37", "hg19") => - return new LiftOverPerl(in, out, new File("chainFiles/b37tohg19.chain"), inRef, outRef) + return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg19.chain"), inRef, outRef) case ("b37", "hg18") => - return new LiftOverPerl(in, out, new File("chainFiles/b37tohg18.chain"), inRef, outRef) + return new LiftOverPerl(in, out, new File("public/chainFiles/b37tohg18.chain"), inRef, outRef) case ("b37", "b36") => - return new LiftOverPerl(in, out, new File("chainFiles/b37tob36.chain"), inRef, outRef) + return new LiftOverPerl(in, out, new File("public/chainFiles/b37tob36.chain"), inRef, outRef) case _ => return null } } @@ -85,7 +85,7 @@ class GATKResourcesBundle extends QScript { // b37 = new Reference("b37", new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")) hg18 = new Reference("hg18", new File("/Users/depristo/Desktop/broadLocal/localData/Homo_sapiens_assembly18.fasta")) - exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta")) + exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta")) refs = List(b37, hg18, exampleFASTA) val DATAROOT = "/Users/depristo/Desktop/broadLocal/localData/" @@ -94,7 +94,7 @@ class GATKResourcesBundle extends QScript { addResource(new Resource(DATAROOT + "dbsnp_132_b37.vcf", "dbsnp_132", b37, true, false)) addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) - addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) + addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) } def initializeStandardDataFiles() = { @@ -105,7 +105,7 @@ class GATKResourcesBundle extends QScript { b37 = new Reference("b37", new File("/humgen/1kg/reference/human_g1k_v37.fasta")) hg18 = new Reference("hg18", new File("/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")) b36 = new Reference("b36", new File("/humgen/1kg/reference/human_b36_both.fasta")) - exampleFASTA = new Reference("exampleFASTA", new File("testdata/exampleFASTA.fasta")) + exampleFASTA = new Reference("exampleFASTA", new File("public/testdata/exampleFASTA.fasta")) refs = List(hg19, b37, hg18, b36, exampleFASTA) addResource(new Resource(b37.file, "", b37, false)) @@ -155,8 +155,8 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/refGene_b37.sorted.txt", "refGene", b37, true, false)) - addResource(new Resource("chainFiles/hg18tob37.chain", "", hg18, false, false)) - addResource(new Resource("chainFiles/b36tob37.chain", "", b36, false, false)) + addResource(new Resource("public/chainFiles/hg18tob37.chain", "", hg18, false, false)) + addResource(new Resource("public/chainFiles/b36tob37.chain", "", b36, false, false)) // todo -- chain files? // todo 1000G SNP and indel call sets? @@ -165,7 +165,7 @@ class GATKResourcesBundle extends QScript { // exampleFASTA file // addResource(new Resource(exampleFASTA.file, "exampleFASTA", exampleFASTA, false)) - addResource(new Resource("testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) + addResource(new Resource("public/testdata/exampleBAM.bam", "exampleBAM", exampleFASTA, false)) } def createBundleDirectories(dir: File) = { From c1061e994ceb0b0f2b8b6c25193a3ffce202c4c6 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 4 Aug 2011 19:36:26 -0400 Subject: [PATCH 138/186] Initial support for adding genomic annotations through VariantAnnotator using the output from the SnpEff tool, which replaces the old Genomic Annotator. --- .../sting/gatk/walkers/annotator/SnpEff.java | 171 ++++++++++ .../utils/codecs/snpEff/SnpEffCodec.java | 202 ++++++++++++ .../utils/codecs/snpEff/SnpEffConstants.java | 107 ++++++ .../utils/codecs/snpEff/SnpEffFeature.java | 306 ++++++++++++++++++ 4 files changed, 786 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java new file mode 100644 index 000000000..e834e6324 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; + +public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { + + // SnpEff field keys: + public static final String GENE_ID_KEY = "GENE_ID"; + public static final String GENE_NAME_KEY = "GENE_NAME"; + public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID"; + public static final String EXON_ID_KEY = "EXON_ID"; + public static final String EXON_RANK_KEY = "EXON_RANK"; + public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE"; + public static final String EFFECT_KEY = "EFFECT"; + public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT"; + public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION"; + public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA"; + public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON"; + public static final String CODON_NUM_KEY = "CODON_NUM"; + public static final String CDS_SIZE_KEY = "CDS_SIZE"; + + private static final String RMD_TRACK_NAME = "SnpEff"; + private static final Logger logger = Logger.getLogger(SnpEff.class); + + public Map annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { + List snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME); + + sanityCheckSnpEffFeatures(snpEffFeatures); + + SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures); + return generateAnnotations(mostSignificantEffect); + } + + private void sanityCheckSnpEffFeatures( List snpEffFeatures ) { + Boolean locusIsNonCodingGene = null; + + for ( Object feature : snpEffFeatures ) { + SnpEffFeature snpEffFeature = (SnpEffFeature)feature; + + if ( locusIsNonCodingGene == null ) { + locusIsNonCodingGene = snpEffFeature.isNonCodingGene(); + } + else if ( ! locusIsNonCodingGene.equals(snpEffFeature.isNonCodingGene()) ) { + logger.warn(String.format("Locus %s:%d is marked as both within and not within a non-coding gene", + snpEffFeature.getChr(), snpEffFeature.getStart())); + return; + } + } + } + + private SnpEffFeature getMostSignificantEffect ( List snpEffFeatures ) { + SnpEffFeature mostSignificantEffect = null; + + for ( Object feature : snpEffFeatures ) { + SnpEffFeature snpEffFeature = (SnpEffFeature)feature; + + if ( mostSignificantEffect == null || + snpEffFeature.getEffectImpact().isHigherImpactThan(mostSignificantEffect.getEffectImpact()) ) { + + mostSignificantEffect = snpEffFeature; + } + } + + return mostSignificantEffect; + } + + private Map generateAnnotations ( SnpEffFeature mostSignificantEffect ) { + Map annotations = new LinkedHashMap(Utils.optimumHashSize(getKeyNames().size())); + + if ( mostSignificantEffect.hasGeneID() ) + annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID()); + if ( mostSignificantEffect.hasGeneName() ) + annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName()); + if ( mostSignificantEffect.hasTranscriptID() ) + annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID()); + if ( mostSignificantEffect.hasExonID() ) + annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID()); + if ( mostSignificantEffect.hasExonRank() ) + annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank())); + if ( mostSignificantEffect.isNonCodingGene() ) + annotations.put(WITHIN_NON_CODING_GENE_KEY, null); + + annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString()); + annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString()); + if ( mostSignificantEffect.hasEffectExtraInformation() ) + annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation()); + + if ( mostSignificantEffect.hasOldAndNewAA() ) + annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA()); + if ( mostSignificantEffect.hasOldAndNewCodon() ) + annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon()); + if ( mostSignificantEffect.hasCodonNum() ) + annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum())); + if ( mostSignificantEffect.hasCdsSize() ) + annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize())); + + return annotations; + } + + public List getKeyNames() { + return Arrays.asList( GENE_ID_KEY, + GENE_NAME_KEY, + TRANSCRIPT_ID_KEY, + EXON_ID_KEY, + EXON_RANK_KEY, + WITHIN_NON_CODING_GENE_KEY, + EFFECT_KEY, + EFFECT_IMPACT_KEY, + EFFECT_EXTRA_INFORMATION_KEY, + OLD_NEW_AA_KEY, + OLD_NEW_CODON_KEY, + CODON_NUM_KEY, + CDS_SIZE_KEY + ); + } + + public List getDescriptions() { + return Arrays.asList( + new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"), + new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"), + new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"), + new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"), + new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"), + new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"), + new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"), + new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())), + new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"), + new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"), + new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"), + new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"), + new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size") + ); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java new file mode 100644 index 000000000..f5d77635a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.TribbleException; +import org.broad.tribble.readers.LineReader; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; + +import java.io.IOException; + +public class SnpEffCodec implements FeatureCodec { + + public static final int EXPECTED_NUMBER_OF_FIELDS = 23; + public static final String FIELD_DELIMITER_PATTERN = "\\t"; + public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]"; + public static final String HEADER_LINE_START = "# "; + public static final String[] HEADER_FIELD_NAMES = { "Chromo", + "Position", + "Reference", + "Change", + "Change type", + "Homozygous", + "Quality", + "Coverage", + "Warnings", + "Gene_ID", + "Gene_name", + "Bio_type", + "Trancript_ID", // yes, this is how it's spelled in the SnpEff output + "Exon_ID", + "Exon_Rank", + "Effect", + "old_AA/new_AA", + "Old_codon/New_codon", + "Codon_Num(CDS)", + "CDS_size", + "Codons around", + "AAs around", + "Custom_interval_ID" + }; + public static final int[] REQUIRED_FIELDS = { 0, 1, 15 }; + public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE"; + + public Feature decodeLoc ( String line ) { + return decode(line); + } + + public Feature decode ( String line ) { + String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1); + + if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) { + throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS + + ") number of fields: found " + tokens.length + " fields.", line); + } + + try { + checkForRequiredFields(tokens, line); + + String contig = tokens[0]; + long position = Long.parseLong(tokens[1]); + + String reference = tokens[2].isEmpty() ? null : tokens[2]; + String change = tokens[3].isEmpty() ? null : tokens[3]; + ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]); + Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]); + Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]); + Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]); + String warnings = tokens[8].isEmpty() ? null : tokens[8]; + String geneID = tokens[9].isEmpty() ? null : tokens[9]; + String geneName = tokens[10].isEmpty() ? null : tokens[10]; + String bioType = tokens[11].isEmpty() ? null : tokens[11]; + String transcriptID = tokens[12].isEmpty() ? null : tokens[12]; + String exonID = tokens[13].isEmpty() ? null : tokens[13]; + Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]); + + boolean isNonCodingGene = isNonCodingGene(tokens[15]); + int effectFieldTokenLimit = isNonCodingGene ? 3 : 2; + String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit); + EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene); + String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene); + + String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16]; + String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17]; + Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]); + Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]); + String codonsAround = tokens[20].isEmpty() ? null : tokens[20]; + String aasAround = tokens[21].isEmpty() ? null : tokens[21]; + String customIntervalID = tokens[22].isEmpty() ? null : tokens[22]; + + return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage, + warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene, + effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize, + codonsAround, aasAround, customIntervalID); + } + catch ( NumberFormatException e ) { + throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line); + } + catch ( IllegalArgumentException e ) { + throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line); + } + } + + private void checkForRequiredFields ( String[] tokens, String line ) { + for ( int requiredFieldIndex : REQUIRED_FIELDS ) { + if ( tokens[requiredFieldIndex].isEmpty() ) { + throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" + + HEADER_FIELD_NAMES[requiredFieldIndex] + "\"", + line); + } + } + } + + private boolean isNonCodingGene ( String effectField ) { + return effectField.startsWith(NON_CODING_GENE_FLAG); + } + + private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) { + String effectName = ""; + + if ( effectFieldTokens.length > 1 && isNonCodingGene ) { + effectName = effectFieldTokens[1].trim(); + } + else { + effectName = effectFieldTokens[0].trim(); + } + + return EffectType.valueOf(effectName); + } + + private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) { + if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) { + return effectFieldTokens[effectFieldTokens.length - 1]; + } + + return null; + } + + public Class getFeatureType() { + return SnpEffFeature.class; + } + + public Object readHeader ( LineReader reader ) { + String headerLine = ""; + + try { + headerLine = reader.readLine(); + } + catch ( IOException e ) { + throw new TribbleException("Unable to read header line from input file."); + } + + validateHeaderLine(headerLine); + return headerLine; + } + + private void validateHeaderLine ( String headerLine ) { + if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) { + throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START); + } + + String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN); + + if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) { + throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" + + EXPECTED_NUMBER_OF_FIELDS + ") of columns."); + } + + for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) { + if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) { + throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" + + HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" + + headerTokens[columnIndex] + "\""); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java new file mode 100644 index 000000000..f226c3523 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +public class SnpEffConstants { + + public enum EffectType { + START_GAINED (EffectImpact.HIGH), + START_LOST (EffectImpact.HIGH), + EXON_DELETED (EffectImpact.HIGH), + FRAME_SHIFT (EffectImpact.HIGH), + STOP_GAINED (EffectImpact.HIGH), + STOP_LOST (EffectImpact.HIGH), + SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH), + SPLICE_SITE_DONOR (EffectImpact.HIGH), + + NON_SYNONYMOUS_CODING (EffectImpact.MODERATE), + UTR_5_DELETED (EffectImpact.MODERATE), + UTR_3_DELETED (EffectImpact.MODERATE), + CODON_INSERTION (EffectImpact.MODERATE), + CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE), + CODON_DELETION (EffectImpact.MODERATE), + CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE), + + NONE (EffectImpact.LOW), + CHROMOSOME (EffectImpact.LOW), + INTERGENIC (EffectImpact.LOW), + UPSTREAM (EffectImpact.LOW), + UTR_5_PRIME (EffectImpact.LOW), + SYNONYMOUS_START (EffectImpact.LOW), + NON_SYNONYMOUS_START (EffectImpact.LOW), + CDS (EffectImpact.LOW), + GENE (EffectImpact.LOW), + TRANSCRIPT (EffectImpact.LOW), + EXON (EffectImpact.LOW), + SYNONYMOUS_CODING (EffectImpact.LOW), + CODON_CHANGE (EffectImpact.LOW), + SYNONYMOUS_STOP (EffectImpact.LOW), + NON_SYNONYMOUS_STOP (EffectImpact.LOW), + INTRON (EffectImpact.LOW), + UTR_3_PRIME (EffectImpact.LOW), + DOWNSTREAM (EffectImpact.LOW), + INTRON_CONSERVED (EffectImpact.LOW), + INTERGENIC_CONSERVED (EffectImpact.LOW), + CUSTOM (EffectImpact.LOW); + + private final EffectImpact impact; + + EffectType ( EffectImpact impact ) { + this.impact = impact; + } + + public EffectImpact getImpact() { + return impact; + } + } + + public enum EffectImpact { + LOW (1), + MODERATE (2), + HIGH (3); + + private final int severityRating; + + EffectImpact ( int severityRating ) { + this.severityRating = severityRating; + } + + public boolean isHigherImpactThan ( EffectImpact other ) { + return this.severityRating > other.severityRating; + } + } + + public enum ChangeType { + SNP, + MNP, + INS, + DEL + } + + public enum Zygosity { + Hom, + Het + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java new file mode 100644 index 000000000..3b9d6d4d6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +import org.broad.tribble.Feature; + +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; + +public class SnpEffFeature implements Feature { + + private String contig; + private long position; + private String reference; + private String change; + private ChangeType changeType; + private Zygosity zygosity; + private Double quality; + private Long coverage; + private String warnings; + private String geneID; + private String geneName; + private String bioType; + private String transcriptID; + private String exonID; + private Integer exonRank; + private boolean isNonCodingGene; + private EffectType effect; + private String effectExtraInformation; + private String oldAndNewAA; + private String oldAndNewCodon; + private Integer codonNum; + private Integer cdsSize; + private String codonsAround; + private String aasAround; + private String customIntervalID; + + public SnpEffFeature ( String contig, + long position, + String reference, + String change, + ChangeType changeType, + Zygosity zygosity, + Double quality, + Long coverage, + String warnings, + String geneID, + String geneName, + String bioType, + String transcriptID, + String exonID, + Integer exonRank, + boolean isNonCodingGene, + EffectType effect, + String effectExtraInformation, + String oldAndNewAA, + String oldAndNewCodon, + Integer codonNum, + Integer cdsSize, + String codonsAround, + String aasAround, + String customIntervalID ) { + + this.contig = contig; + this.position = position; + this.reference = reference; + this.change = change; + this.changeType = changeType; + this.zygosity = zygosity; + this.quality = quality; + this.coverage = coverage; + this.warnings = warnings; + this.geneID = geneID; + this.geneName = geneName; + this.bioType = bioType; + this.transcriptID = transcriptID; + this.exonID = exonID; + this.exonRank = exonRank; + this.isNonCodingGene = isNonCodingGene; + this.effect = effect; + this.effectExtraInformation = effectExtraInformation; + this.oldAndNewAA = oldAndNewAA; + this.oldAndNewCodon = oldAndNewCodon; + this.codonNum = codonNum; + this.cdsSize = cdsSize; + this.codonsAround = codonsAround; + this.aasAround = aasAround; + this.customIntervalID = customIntervalID; + } + + public String getChr() { + return contig; + } + + public int getStart() { + return (int)position; + } + + public int getEnd() { + return (int)position; + } + + public boolean hasReference() { + return reference != null; + } + + public String getReference() { + return reference; + } + + public boolean hasChange() { + return change != null; + } + + public String getChange() { + return change; + } + + public boolean hasChangeType() { + return changeType != null; + } + + public ChangeType getChangeType() { + return changeType; + } + + public boolean hasZygosity() { + return zygosity != null; + } + + public Zygosity getZygosity() { + return zygosity; + } + + public boolean hasQuality() { + return quality != null; + } + + public Double getQuality() { + return quality; + } + + public boolean hasCoverage() { + return coverage != null; + } + + public Long getCoverage() { + return coverage; + } + + public boolean hasWarnings() { + return warnings != null; + } + + public String getWarnings() { + return warnings; + } + + public boolean hasGeneID() { + return geneID != null; + } + + public String getGeneID() { + return geneID; + } + + public boolean hasGeneName() { + return geneName != null; + } + + public String getGeneName() { + return geneName; + } + + public boolean hasBioType() { + return bioType != null; + } + + public String getBioType() { + return bioType; + } + + public boolean hasTranscriptID() { + return transcriptID != null; + } + + public String getTranscriptID() { + return transcriptID; + } + + public boolean hasExonID() { + return exonID != null; + } + + public String getExonID() { + return exonID; + } + + public boolean hasExonRank() { + return exonRank != null; + } + + public Integer getExonRank() { + return exonRank; + } + + public boolean isNonCodingGene() { + return isNonCodingGene; + } + + public EffectType getEffect() { + return effect; + } + + public EffectImpact getEffectImpact() { + return effect.getImpact(); + } + + public boolean hasEffectExtraInformation() { + return effectExtraInformation != null; + } + + public String getEffectExtraInformation() { + return effectExtraInformation; + } + + public boolean hasOldAndNewAA() { + return oldAndNewAA != null; + } + + public String getOldAndNewAA() { + return oldAndNewAA; + } + + public boolean hasOldAndNewCodon() { + return oldAndNewCodon != null; + } + + public String getOldAndNewCodon() { + return oldAndNewCodon; + } + + public boolean hasCodonNum() { + return codonNum != null; + } + + public Integer getCodonNum() { + return codonNum; + } + + public boolean hasCdsSize() { + return cdsSize != null; + } + + public Integer getCdsSize() { + return cdsSize; + } + + public boolean hasCodonsAround() { + return codonsAround != null; + } + + public String getCodonsAround() { + return codonsAround; + } + + public boolean hadAasAround() { + return aasAround != null; + } + + public String getAasAround() { + return aasAround; + } + + public boolean hasCustomIntervalID() { + return customIntervalID != null; + } + + public String getCustomIntervalID() { + return customIntervalID; + } +} From dd974040af16b9d40dc9781b4926931bba24b304 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 8 Aug 2011 10:46:18 -0400 Subject: [PATCH 139/186] When finding the highest-impact effect at a locus, all effects that are not within a non-coding gene are now considered higher impact than all effects that are within a non-coding gene. --- .../sting/gatk/walkers/annotator/SnpEff.java | 51 ++++++------------- .../utils/codecs/snpEff/SnpEffFeature.java | 11 ++++ 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index e834e6324..c307d4cc0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -56,35 +55,15 @@ public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { public static final String CODON_NUM_KEY = "CODON_NUM"; public static final String CDS_SIZE_KEY = "CDS_SIZE"; - private static final String RMD_TRACK_NAME = "SnpEff"; - private static final Logger logger = Logger.getLogger(SnpEff.class); + public static final String RMD_TRACK_NAME = "SnpEff"; public Map annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { List snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME); - sanityCheckSnpEffFeatures(snpEffFeatures); - SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures); return generateAnnotations(mostSignificantEffect); } - private void sanityCheckSnpEffFeatures( List snpEffFeatures ) { - Boolean locusIsNonCodingGene = null; - - for ( Object feature : snpEffFeatures ) { - SnpEffFeature snpEffFeature = (SnpEffFeature)feature; - - if ( locusIsNonCodingGene == null ) { - locusIsNonCodingGene = snpEffFeature.isNonCodingGene(); - } - else if ( ! locusIsNonCodingGene.equals(snpEffFeature.isNonCodingGene()) ) { - logger.warn(String.format("Locus %s:%d is marked as both within and not within a non-coding gene", - snpEffFeature.getChr(), snpEffFeature.getStart())); - return; - } - } - } - private SnpEffFeature getMostSignificantEffect ( List snpEffFeatures ) { SnpEffFeature mostSignificantEffect = null; @@ -92,7 +71,7 @@ public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { SnpEffFeature snpEffFeature = (SnpEffFeature)feature; if ( mostSignificantEffect == null || - snpEffFeature.getEffectImpact().isHigherImpactThan(mostSignificantEffect.getEffectImpact()) ) { + snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) { mostSignificantEffect = snpEffFeature; } @@ -153,19 +132,19 @@ public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { public List getDescriptions() { return Arrays.asList( - new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"), - new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"), - new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"), - new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"), - new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"), - new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"), - new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"), - new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())), - new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"), - new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"), - new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"), - new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"), - new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size") + new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"), + new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"), + new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"), + new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"), + new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"), + new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"), + new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"), + new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())), + new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"), + new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"), + new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"), + new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"), + new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size") ); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java index 3b9d6d4d6..cfa5a91ab 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java @@ -112,6 +112,17 @@ public class SnpEffFeature implements Feature { this.customIntervalID = customIntervalID; } + public boolean isHigherImpactThan ( SnpEffFeature other ) { + if ( ! isNonCodingGene() && other.isNonCodingGene() ) { + return true; + } + else if ( isNonCodingGene() && ! other.isNonCodingGene() ) { + return false; + } + + return getEffectImpact().isHigherImpactThan(other.getEffectImpact()); + } + public String getChr() { return contig; } From 197169e47b093e80e357a79f3249e2445c9241b0 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 8 Aug 2011 13:34:04 -0400 Subject: [PATCH 140/186] Submitting patch from Larry Singh to make MathUtils compatible with java 1.7 --- .../org/broadinstitute/sting/utils/MathUtils.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) mode change 100755 => 100644 public/java/src/org/broadinstitute/sting/utils/MathUtils.java diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java old mode 100755 new mode 100644 index 36ed506aa..cbe2948aa --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -147,13 +147,13 @@ public class MathUtils { return Math.log10(sum) + maxValue; } - public static double sum(List values) { + public static double sumDoubles(List values) { double s = 0.0; for ( double v : values) s += v; return s; } - public static int sum(List values) { + public static int sumIntegers(List values) { int s = 0; for ( int v : values) s += v; return s; @@ -428,7 +428,7 @@ public class MathUtils { // for precision purposes, we need to add (or really subtract, since they're // all negative) the largest value; also, we need to convert to normal-space. - double maxValue = MathUtils.arrayMax( array ); + double maxValue = MathUtils.arrayMaxDouble( array ); for (int i = 0; i < array.size(); i++) normalized[i] = Math.pow(10, array.get(i) - maxValue); @@ -507,7 +507,7 @@ public class MathUtils { return minI; } - public static int arrayMax(List array) { + public static int arrayMaxInt(List array) { if ( array == null ) throw new IllegalArgumentException("Array cannot be null!"); if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!"); @@ -516,7 +516,7 @@ public class MathUtils { return m; } - public static double arrayMax(List array) { + public static double arrayMaxDouble(List array) { if ( array == null ) throw new IllegalArgumentException("Array cannot be null!"); if ( array.size() == 0 ) throw new IllegalArgumentException("Array size cannot be 0!"); @@ -1274,5 +1274,4 @@ public class MathUtils { public static double log10Factorial (int x) { return log10Gamma(x+1); } - -} \ No newline at end of file +} From e36994e36bd9028601f75dff4939809fe8747c76 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 8 Aug 2011 14:04:46 -0400 Subject: [PATCH 141/186] Refactored a FeatureManager class from RMDTrackBuilder New class handles (vastly more cleanly) the db of tribble codecs, features, and names for use throughout the GATK. Added SelfScopingFeatureCodec interface that allows a FeatureCodec to examine a file and determine if the file can be parsed. This is the first step towards allowing the GATK to dynamically determine the type of a RodBinding. --- .../rmd/ReferenceOrderedDataSource.java | 4 +- .../gatk/refdata/SelfScopingFeatureCodec.java | 48 ++++ .../gatk/refdata/indexer/RMDIndexer.java | 11 +- .../gatk/refdata/tracks/FeatureManager.java | 216 ++++++++++++++++++ .../gatk/refdata/tracks/RMDTrackBuilder.java | 139 +++-------- .../walkers/diffengine/VCFDiffableReader.java | 10 +- .../gatk/GATKExtensionsGenerator.java | 2 +- .../queue/extensions/gatk/RodBindField.java | 70 +++--- .../utils/classloader/PluginManager.java | 2 +- .../utils/codecs/vcf/AbstractVCFCodec.java | 17 +- .../sting/utils/codecs/vcf/VCF3Codec.java | 8 + .../sting/utils/codecs/vcf/VCFCodec.java | 8 +- .../sting/utils/text/ListFileUtils.java | 14 +- .../tracks/FeatureManagerUnitTest.java | 157 +++++++++++++ .../tracks/RMDTrackBuilderUnitTest.java | 8 +- 15 files changed, 540 insertions(+), 174 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java index 6992fc1ff..18679dd77 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataSource.java @@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource { } public Class getType() { - return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); + return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass(); } public Class getRecordType() { - return builder.createCodec(getType(),getName()).getFeatureType(); + return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass(); } public File getFile() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java new file mode 100644 index 000000000..de781b839 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata; + +import java.io.File; + +/** + * An interface marking that a given Tribble codec can look at the file and determine whether the + * codec specifically parsing the contents of the file. + */ +public interface SelfScopingFeatureCodec { + /** + * This function returns true iff the File potentialInput can be parsed by this + * codec. + * + * The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs + * return true for the same file. If this occurs the GATK splits out an error. + * + * Note this function must never throw an error. All errors should be trapped + * and false returned. + * + * @param potentialInput the file to test for parsiability with this codec + * @return true if potentialInput can be parsed, false otherwise + */ + public boolean canDecode(final File potentialInput); +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java index 85374757d..029800aea 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java @@ -12,14 +12,13 @@ import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import java.io.File; import java.io.FileOutputStream; -import java.util.Map; /** * a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory @@ -83,14 +82,14 @@ public class RMDIndexer extends CommandLineProgram { RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL); // find the types available to the track builders - Map typeMapping = builder.getAvailableTrackNamesAndTypes(); + FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType); // check that the type is valid - if (!typeMapping.containsKey(inputFileType)) - throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet())); + if (descriptor == null) + throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures()); // create the codec - FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType)); + FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser); // check if it's a reference dependent feature codec if (codec instanceof ReferenceDependentFeatureCodec) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java new file mode 100644 index 000000000..26a400071 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.NameAwareCodec; +import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; +import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; +import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.io.File; +import java.util.*; + + +/** + * Class for managing Tribble Feature readers available to the GATK. The features + * are dynamically determined via a PluginManager. This class provides convenient + * getter methods for obtaining FeatureDescriptor objects that collect all of the + * useful information about the Tribble Codec, Feature, and name in one place. + * + * @author depristo + */ +public class FeatureManager { + public static class FeatureDescriptor { + final String name; + final FeatureCodec codec; + + public FeatureDescriptor(final String name, final FeatureCodec codec) { + this.name = name; + this.codec = codec; + } + + public String getName() { + return name; + } + public FeatureCodec getCodec() { + return codec; + } + public Class getCodecClass() { return codec.getClass(); } + public Class getFeatureClass() { return codec.getFeatureType(); } + + @Override + public String toString() { + return String.format("FeatureDescriptor name=%s codec=%s feature=%s", getName(), getCodecClass().getName(), getFeatureClass().getName()); + } + } + + private final PluginManager pluginManager; + private final Collection featureDescriptors = new HashSet(); + + + /** + * Construct a FeatureManager + */ + public FeatureManager() { + pluginManager = new PluginManager(FeatureCodec.class, "Codecs", "Codec"); + + for (final String rawName: pluginManager.getPluginsByName().keySet()) { + FeatureCodec codec = pluginManager.createByName(rawName); + String name = rawName.toUpperCase(); + FeatureDescriptor featureDescriptor = new FeatureDescriptor(name, codec); + featureDescriptors.add(featureDescriptor); + } + } + + /** + * Return the FeatureDescriptor whose getCodecClass().equals(codecClass). + * + * @param codecClass + * @return A FeatureDescriptor or null if none is found + */ + @Requires("codecClass != null") + public FeatureDescriptor getByCodec(Class codecClass) { + for ( FeatureDescriptor descriptor : featureDescriptors ) + if ( descriptor.getCodecClass().equals(codecClass) ) + return descriptor; + return null; + } + + /** + * Returns a collection of FeatureDescriptors that emit records of type featureClass + * + * @param featureClass + * @return A FeatureDescriptor or null if none is found + */ + @Requires("featureClass != null") + public Collection getByFeature(Class featureClass) { + Set consistentDescriptors = new HashSet(); + + if (featureClass == null) + throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object"); + + for ( FeatureDescriptor descriptor : featureDescriptors ) { + if ( featureClass.isAssignableFrom(descriptor.getFeatureClass())) + consistentDescriptors.add(descriptor); + } + return consistentDescriptors; + } + + /** + * Return the FeatureDescriptor with getName().equals(name) + * + * @param name + * @return A FeatureDescriptor or null if none is found + */ + @Requires("name != null") + public FeatureDescriptor getByName(String name) { + for ( FeatureDescriptor descriptor : featureDescriptors ) + if ( descriptor.getName().equalsIgnoreCase(name) ) + return descriptor; + return null; + } + + /** + * Returns the FeatureDescriptor that can read the contexts of File file, is one can be determined + * + * @param file + * @return A FeatureDescriptor or null if none is found + */ + @Requires({"file != null", "file.isFile()", "file.canRead()"}) + public FeatureDescriptor getByFiletype(File file) { + List canParse = new ArrayList(); + for ( FeatureDescriptor descriptor : featureDescriptors ) + if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) { + if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) { + canParse.add(descriptor); + } + } + + if ( canParse.size() == 0 ) + return null; + else if ( canParse.size() > 1 ) + throw new ReviewedStingException("BUG: multiple feature descriptors can read file " + file + ": " + canParse); + else + return canParse.get(0); + } + + /** + * Returns the FeatureDescriptor associated with the type described by triplet, or null if none is found + * @param triplet + * @return + */ + @Requires("triplet != null") + public FeatureDescriptor getByTriplet(RMDTriplet triplet) { + return getByName(triplet.getType()); + } + + /** + * @return all of the FeatureDescriptors available to the GATK. Never null + */ + @Ensures("result != null") + public Collection getFeatureDescriptors() { + return Collections.unmodifiableCollection(featureDescriptors); + } + + + /** + * Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load + * @return + */ + @Ensures("result != null") + public String userFriendlyListOfAvailableFeatures() { + List names = new ArrayList(); + for ( final FeatureDescriptor descriptor : featureDescriptors ) + names.add(descriptor.getName()); + return Utils.join(",", names); + } + + /** + * Create a new FeatureCodec of the type described in descriptor, assigning it the + * name (if possible) and providing it the genomeLocParser (where necessary) + * + * @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create + * @param name the name to assign this codec + * @return the feature codec itself + */ + @Requires({"descriptor != null", "name != null", "genomeLocParser != null"}) + @Ensures("result != null") + public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) { + FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass()); + if ( codex instanceof NameAwareCodec ) + ((NameAwareCodec)codex).setName(name); + if ( codex instanceof ReferenceDependentFeatureCodec ) + ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); + return codex; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index f285f1263..d352894e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -65,7 +65,7 @@ import java.util.*; * that gets iterators from the FeatureReader using Tribble. * */ -public class RMDTrackBuilder extends PluginManager { +public class RMDTrackBuilder { // extends PluginManager { /** * our log, which we use to capture anything from this class */ @@ -74,8 +74,6 @@ public class RMDTrackBuilder extends PluginManager { // a constant we use for marking sequence dictionary entries in the Tribble index property list public static final String SequenceDictionaryPropertyPredicate = "DICT:"; - private Map classes = null; - // private sequence dictionary we use to set our tracks with private SAMSequenceDictionary dict = null; @@ -89,6 +87,8 @@ public class RMDTrackBuilder extends PluginManager { */ private ValidationExclusion.TYPE validationExclusionType; + FeatureManager featureManager; + /** * Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally * used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor, @@ -100,66 +100,14 @@ public class RMDTrackBuilder extends PluginManager { public RMDTrackBuilder(SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) { - this(); this.dict = dict; - this.genomeLocParser = genomeLocParser; this.validationExclusionType = validationExclusionType; - - classes = new HashMap(); - for (String name: this.getPluginsByName().keySet()) { - classes.put(name.toUpperCase(), getPluginsByName().get(name)); - } + this.genomeLocParser = genomeLocParser; + featureManager = new FeatureManager(); } - /** - * Limited constructor that produces a builder capable for validating types, but not building tracks - */ - public RMDTrackBuilder() { - super(FeatureCodec.class, "Codecs", "Codec"); - - classes = new HashMap(); - for (String name: this.getPluginsByName().keySet()) { - classes.put(name.toUpperCase(), getPluginsByName().get(name)); - } - } - - - /** @return a list of all available track types we currently have access to create */ - public Map getAvailableTrackNamesAndTypes() { - return Collections.unmodifiableMap(classes); - } - - /** @return a list of all available track record types we currently have access to create */ - public Map getAvailableTrackNamesAndRecordTypes() { - HashMap classToRecord = new HashMap(); - for (String name: this.getPluginsByName().keySet()) { - FeatureCodec codec = this.createByName(name); - classToRecord.put(name.toUpperCase(), codec.getFeatureType()); - } - return classToRecord; - } - - public Class getFeatureCodecClass(RMDTriplet fileDescriptor) { - return getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); - } - - /** - * Returns the FeatureClass (BeagleFeature) produced by an RMDTriplet, or null - * if no such binding is found - * - * @param fileDescriptor - * @return - */ - public Class getFeatureClass(RMDTriplet fileDescriptor) { - return getAvailableTrackNamesAndRecordTypes().get(fileDescriptor.getType().toUpperCase()); - } - - /** - * Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load - * @return - */ - public String getAvailableTribbleFeatureNames() { - return Utils.join(",", getAvailableTrackNamesAndRecordTypes().keySet()); + public FeatureManager getFeatureManager() { + return featureManager; } /** @@ -173,38 +121,33 @@ public class RMDTrackBuilder extends PluginManager { String name = fileDescriptor.getName(); File inputFile = new File(fileDescriptor.getFile()); - Class featureCodecClass = getFeatureCodecClass(fileDescriptor); - if (featureCodecClass == null) + FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor); + if (descriptor == null) throw new UserException.BadArgumentValue("-B",fileDescriptor.getType()); // return a feature reader track Pair pair; if (inputFile.getAbsolutePath().endsWith(".gz")) - pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile); + pair = createTabixIndexedFeatureSource(descriptor, name, inputFile); else - pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType()); + pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType()); if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); - return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name)); + return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name)); } /** * Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream. - * @param targetClass Type of Tribble class to build. + * @param codecClass Type of Tribble codec class to build. * @param inputFile Input file type to use. * @return An RMDTrack, suitable for accessing reference metadata. */ - public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) { - // TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics. - String typeName = null; - for(Map.Entry trackType: getAvailableTrackNamesAndTypes().entrySet()) { - if(trackType.getValue().equals(targetClass)) - typeName = trackType.getKey(); - } + public RMDTrack createInstanceOfTrack(Class codecClass, File inputFile) { + final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass); - if(typeName == null) - throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName()); + if (descriptor == null) + throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName()); - return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags())); + return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags())); } /** @@ -212,16 +155,16 @@ public class RMDTrackBuilder extends PluginManager { * reader of the appropriate type will figure out what the right index type is, and determine if it * exists. * - * @param targetClass the codec class type + * @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create * @param name the name of the track * @param inputFile the file to load * @return a feature reader implementation */ - private Pair createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) { + private Pair createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) { // we might not know the index type, try loading with the default reader constructor logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file"); try { - return new Pair(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null); + return new Pair(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null); } catch (TribbleException e) { throw new UserException(e.getMessage(), e); } @@ -229,28 +172,26 @@ public class RMDTrackBuilder extends PluginManager { /** * add a name to the codec, if it takes one - * @param targetClass the class to create a codec for + * @param descriptor the class to create a codec for * @param name the name to assign this codec * @return the feature codec itself */ - public FeatureCodec createCodec(Class targetClass, String name) { - FeatureCodec codex = this.createByType(targetClass); - if ( codex instanceof NameAwareCodec ) - ((NameAwareCodec)codex).setName(name); - if(codex instanceof ReferenceDependentFeatureCodec) - ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); - return codex; + private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) { + return featureManager.createCodec(descriptor, name, genomeLocParser); } /** * create a feature source object given: - * @param targetClass the target class + * @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create * @param name the name of the codec * @param inputFile the tribble file to parse * @param storageType How the RMD is streamed into the input file. * @return the input file as a FeatureReader */ - private Pair getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) { + private Pair getFeatureSource(FeatureManager.FeatureDescriptor descriptor, + String name, + File inputFile, + RMDStorageType storageType) { // Feature source and sequence dictionary to use as the ultimate reference FeatureSource featureSource = null; SAMSequenceDictionary sequenceDictionary = null; @@ -260,7 +201,7 @@ public class RMDTrackBuilder extends PluginManager { if(canBeIndexed) { try { - Index index = loadIndex(inputFile, createCodec(targetClass, name)); + Index index = loadIndex(inputFile, createCodec(descriptor, name)); try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } catch (ReviewedStingException e) { } @@ -273,7 +214,7 @@ public class RMDTrackBuilder extends PluginManager { sequenceDictionary = getSequenceDictionaryFromProperties(index); } - featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name)); + featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name)); } catch (TribbleException e) { throw new UserException(e.getMessage()); @@ -283,7 +224,7 @@ public class RMDTrackBuilder extends PluginManager { } } else { - featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(targetClass, name),false); + featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false); } return new Pair(featureSource,sequenceDictionary); @@ -418,22 +359,6 @@ public class RMDTrackBuilder extends PluginManager { return idx; } - /** - * Returns a collection of track names that match the record type. - * @param trackRecordType the record type specified in the @RMD annotation - * @return a collection of available track record type names that match the record type - */ - public Collection getTrackRecordTypeNames(Class trackRecordType) { - Set names = new TreeSet(); - if (trackRecordType == null) - throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object"); - - for (Map.Entry availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) { - if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue())) - names.add(availableTrackRecordType.getKey()); - } - return names; - } // --------------------------------------------------------------------------------------------------------- // static functions to work with the sequence dictionaries of indexes diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 77a992ce0..a447d17af 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -129,14 +129,6 @@ public class VCFDiffableReader implements DiffableReader { @Override public boolean canRead(File file) { - try { - final String VCF4_HEADER = "##fileformat=VCFv4"; - char[] buff = new char[VCF4_HEADER.length()]; - new FileReader(file).read(buff, 0, VCF4_HEADER.length()); - String firstLine = new String(buff); - return firstLine.startsWith(VCF4_HEADER); - } catch ( IOException e ) { - return false; - } + return AbstractVCFCodec.canDecodeFile(file, VCFCodec.VCF4_MAGIC_HEADER); } } diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index e5974e165..9578eda84 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -157,7 +157,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram { List argumentFields = new ArrayList(); argumentFields.addAll(ArgumentDefinitionField.getArgumentFields(parser,walkerType)); - argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder)); + //argumentFields.addAll(RodBindField.getRodArguments(walkerType, trackBuilder)); argumentFields.addAll(ReadFilterField.getFilterArguments(parser,walkerType)); String constructor = String.format("analysisName = \"%1$s\"%nanalysis_type = \"%1$s\"%n", walkerName); diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java index 02d2fd0a8..baf083575 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/RodBindField.java @@ -91,39 +91,39 @@ public class RodBindField extends ArgumentField { } return exclusiveOf.toString(); } - - public static List getRodArguments(Class walkerClass, RMDTrackBuilder trackBuilder) { - List argumentFields = new ArrayList(); - - List requires = WalkerManager.getRequiredMetaData(walkerClass); - List allows = WalkerManager.getAllowsMetaData(walkerClass); - - for (RMD required: requires) { - List fields = new ArrayList(); - String trackName = required.name(); - if ("*".equals(trackName)) { - // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers - //fields.add(new RodBindArgumentField(argumentDefinition, true)); - } else { - for (String typeName: trackBuilder.getTrackRecordTypeNames(required.type())) - fields.add(new RodBindField(trackName, typeName, fields, true)); - } - argumentFields.addAll(fields); - } - - for (RMD allowed: allows) { - List fields = new ArrayList(); - String trackName = allowed.name(); - if ("*".equals(trackName)) { - // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers - //fields.add(new RodBindArgumentField(argumentDefinition, false)); - } else { - for (String typeName: trackBuilder.getTrackRecordTypeNames(allowed.type())) - fields.add(new RodBindField(trackName, typeName, fields, true)); - } - argumentFields.addAll(fields); - } - - return argumentFields; - } +// +// public static List getRodArguments(Class walkerClass, RMDTrackBuilder trackBuilder) { +// List argumentFields = new ArrayList(); +// +// List requires = WalkerManager.getRequiredMetaData(walkerClass); +// List allows = WalkerManager.getAllowsMetaData(walkerClass); +// +// for (RMD required: requires) { +// List fields = new ArrayList(); +// String trackName = required.name(); +// if ("*".equals(trackName)) { +// // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers +// //fields.add(new RodBindArgumentField(argumentDefinition, true)); +// } else { +// for (String typeName: trackBuilder.getFeatureManager().getTrackRecordTypeNames(required.type())) +// fields.add(new RodBindField(trackName, typeName, fields, true)); +// } +// argumentFields.addAll(fields); +// } +// +// for (RMD allowed: allows) { +// List fields = new ArrayList(); +// String trackName = allowed.name(); +// if ("*".equals(trackName)) { +// // TODO: Add the field triplet for name=* after @Allows and @Requires are fixed on walkers +// //fields.add(new RodBindArgumentField(argumentDefinition, false)); +// } else { +// for (String typeName: trackBuilder.getFeatureManager().getTrackRecordTypeNames(allowed.type())) +// fields.add(new RodBindField(trackName, typeName, fields, true)); +// } +// argumentFields.addAll(fields); +// } +// +// return argumentFields; +// } } diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java index 8d37ff573..04cbef0c3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java @@ -172,7 +172,7 @@ public class PluginManager { } } - protected Map> getPluginsByName() { + public Map> getPluginsByName() { return Collections.unmodifiableMap(pluginsByName); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 9788f8654..cb505c717 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -7,16 +7,20 @@ import org.broad.tribble.NameAwareCodec; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; import java.util.*; -public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser { +public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser, SelfScopingFeatureCodec { protected final static Logger log = Logger.getLogger(VCFCodec.class); protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column @@ -616,4 +620,15 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, return inputVC; } + + public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) { + try { + char[] buff = new char[MAGIC_HEADER_LINE.length()]; + new FileReader(potentialInput).read(buff, 0, MAGIC_HEADER_LINE.length()); + String firstLine = new String(buff); + return firstLine.startsWith(MAGIC_HEADER_LINE); + } catch ( IOException e ) { + return false; + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java index c29f2ba8b..ea16595bb 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.util.*; @@ -16,6 +18,8 @@ import java.util.*; * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. */ public class VCF3Codec extends AbstractVCFCodec { + public final static String VCF3_MAGIC_HEADER = "##fileformat=VCFv3"; + /** * @param reader the line reader to take header lines from @@ -178,4 +182,8 @@ public class VCF3Codec extends AbstractVCFCodec { return genotypes; } + @Override + public boolean canDecode(final File potentialInput) { + return canDecodeFile(potentialInput, VCF3_MAGIC_HEADER); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index 05fff5d9e..55a0eb3f9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.io.File; +import java.io.FileReader; import java.io.IOException; import java.util.*; @@ -16,6 +18,7 @@ import java.util.*; * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. */ public class VCFCodec extends AbstractVCFCodec { + public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4"; /** * @param reader the line reader to take header lines from @@ -184,5 +187,8 @@ public class VCFCodec extends AbstractVCFCodec { return genotypes; } - + @Override + public boolean canDecode(final File potentialInput) { + return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 82a8f86d9..79271464b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -28,7 +28,7 @@ import org.broadinstitute.sting.commandline.ParsingEngine; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -134,7 +134,7 @@ public class ListFileUtils { public static Collection unpackRODBindings(final Collection RODBindings, final ParsingEngine parser) { // todo -- this is a strange home for this code. Move into ROD system Collection rodBindings = new ArrayList(); - RMDTrackBuilder builderForValidation = new RMDTrackBuilder(); + FeatureManager builderForValidation = new FeatureManager(); for (RodBinding rodBinding: RODBindings) { String argValue = rodBinding.getSource(); @@ -153,15 +153,15 @@ public class ListFileUtils { RMDTriplet triplet = new RMDTriplet(name,type,fileName,storageType,rodBinding.getTags()); // validate triplet type - Class typeFromTribble = builderForValidation.getFeatureClass(triplet); - if ( typeFromTribble == null ) + FeatureManager.FeatureDescriptor descriptor = builderForValidation.getByTriplet(triplet); + if ( descriptor == null ) throw new UserException.UnknownTribbleType(rodBinding.getTribbleType(), String.format("Field %s had provided type %s but there's no such Tribble type. Available types are %s", - rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.getAvailableTribbleFeatureNames())); - if ( ! rodBinding.getType().isAssignableFrom(typeFromTribble) ) + rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures())); + if ( ! rodBinding.getType().isAssignableFrom(descriptor.getFeatureClass()) ) throw new UserException.BadArgumentValue(rodBinding.getName(), String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s", - rodBinding.getName(), rodBinding.getType(), typeFromTribble)); + rodBinding.getName(), rodBinding.getType(), descriptor.getName())); rodBindings.add(triplet); diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java new file mode 100644 index 000000000..5d662ffed --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata.tracks; + + +import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.refdata.features.table.BedTableCodec; +import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; +import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.*; +import java.util.*; + + +/** + * @author depristo + * + * UnitTests for RMD FeatureManager + */ +public class FeatureManagerUnitTest extends BaseTest { + private static final File RANDOM_FILE = new File(validationDataLocation + "exampleGATKReport.eval"); + private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf"); + private static final File VCF4_FILE = new File(validationDataLocation + "vcf4.1.example.vcf"); + + private FeatureManager manager; + private GenomeLocParser genomeLocParser; + + @BeforeMethod + public void setup() { + File referenceFile = new File(b36KGReference); + try { + IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile); + genomeLocParser = new GenomeLocParser(seq); + manager = new FeatureManager(); + } + catch(FileNotFoundException ex) { + throw new UserException.CouldNotReadInputFile(referenceFile,ex); + } + } + + @Test + public void testManagerCreation() { + Assert.assertTrue(manager.getFeatureDescriptors().size() > 0); + } + + private class FMTest extends BaseTest.TestDataProvider { + public Class codec; + public Class feature; + public String name; + public File associatedFile; + + private FMTest(final Class feature, final Class codec, final String name, final File file) { + super(FMTest.class); + this.codec = codec; + this.feature = feature; + this.name = name; + this.associatedFile = file; + } + + public void assertExpected(FeatureManager.FeatureDescriptor featureDescriptor) { + Assert.assertEquals(featureDescriptor.getCodecClass(), codec); + Assert.assertEquals(featureDescriptor.getFeatureClass(), feature); + Assert.assertEquals(featureDescriptor.getName().toLowerCase(), name.toLowerCase()); + } + + public String toString() { + return String.format("FMTest name=%s codec=%s feature=%s file=%s", name, codec, feature, associatedFile); + } + } + + @DataProvider(name = "tests") + public Object[][] createTests() { + new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE); + new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE); + new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null); + return FMTest.getTests(FMTest.class); + } + + @Test(dataProvider = "tests") + public void testGetByFile(FMTest params) { + if ( params.associatedFile != null ) { + FeatureManager.FeatureDescriptor byFile = manager.getByFiletype(params.associatedFile); + Assert.assertNotNull(byFile, "Couldn't find any type associated with file " + params.associatedFile); + params.assertExpected(byFile); + } + } + + @Test + public void testGetByFileNoMatch() { + FeatureManager.FeatureDescriptor byFile = manager.getByFiletype(RANDOM_FILE); + Assert.assertNull(byFile, "Found type " + byFile + " associated with RANDOM, non-Tribble file " + RANDOM_FILE); + } + + @Test(dataProvider = "tests") + public void testGetters(FMTest params) { + params.assertExpected(manager.getByCodec(params.codec)); + params.assertExpected(manager.getByName(params.name)); + params.assertExpected(manager.getByName(params.name.toLowerCase())); + params.assertExpected(manager.getByName(params.name.toUpperCase())); + + Collection descriptors = manager.getByFeature(params.feature); + Assert.assertTrue(descriptors.size() > 0, "Look up by FeatureClass failed"); + } + + @Test + public void testUserFriendlyList() { + Assert.assertTrue(manager.userFriendlyListOfAvailableFeatures().length() > 0, "Expected at least one codec to be listed"); + Assert.assertTrue(manager.userFriendlyListOfAvailableFeatures().split(",").length > 0, "Expected at least two codecs, but only saw one"); + } + + @Test + public void testCodecCreation() { + FeatureManager.FeatureDescriptor descriptor = manager.getByName("vcf"); + Assert.assertNotNull(descriptor, "Couldn't find VCF feature descriptor!"); + + FeatureCodec c = manager.createCodec(descriptor, "foo", genomeLocParser); + Assert.assertNotNull(c, "Couldn't create codec"); + Assert.assertEquals(c.getClass(), descriptor.getCodecClass()); + Assert.assertEquals(c.getFeatureType(), descriptor.getFeatureClass()); + } + +} + diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java index 70d2e7a85..ae218e898 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilderUnitTest.java @@ -1,5 +1,6 @@ /* - * Copyright (c) 2010. The Broad Institute + * Copyright (c) 2011, The Broad Institute + * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without @@ -11,7 +12,7 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT @@ -74,8 +75,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest { @Test public void testBuilder() { - Map classes = builder.getAvailableTrackNamesAndTypes(); - Assert.assertTrue(classes.size() > 0); + Assert.assertTrue(builder.getFeatureManager().getFeatureDescriptors().size() > 0); } @Test From 0810c423093e0562f9fe5a7101902e77beb78c46 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 8 Aug 2011 14:45:46 -0400 Subject: [PATCH 143/186] GATK now does dynamic type determination for VCF files Added UnitTests covering all of the cases. --- .../commandline/ArgumentTypeDescriptor.java | 40 +++++++-- .../commandline/ParsingEngineUnitTest.java | 83 +++++++++++++++++++ 2 files changed, 115 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 622576747..d341b2cde 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.commandline; import org.apache.log4j.Logger; import org.broad.tribble.Feature; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.walkers.Multiplex; import org.broadinstitute.sting.gatk.walkers.Multiplexer; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -313,19 +314,42 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { String value = getArgumentValue( defaultDefinition, matches ); try { String name = defaultDefinition.fullName; - String tribbleType; + String tribbleType = null; Tags tags = getArgumentTags(matches); // must have one or two tag values here if ( tags.getPositionalTags().size() == 2 ) { // -X:name,type style name = tags.getPositionalTags().get(0); tribbleType = tags.getPositionalTags().get(1); - } else if ( tags.getPositionalTags().size() == 1 ) { // -X:type style - tribbleType = tags.getPositionalTags().get(0); - } else - throw new UserException.CommandLineException( - String.format("Unexpected number of positional tags for argument %s : %s. " + - "Rod bindings only suport -X:type and -X:name,type argument styles", - value, source.field.getName())); + } else { + if ( tags.getPositionalTags().size() == 1 ) { + // -X:type style is a type when we cannot determine the type dynamically + tribbleType = tags.getPositionalTags().get(0); + } + + // try to determine the file type dynamically + FeatureManager manager = new FeatureManager(); + File file = new File(value); + if ( file.canRead() && file.isFile() ) { + FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); + if ( featureDescriptor != null ) { + tribbleType = featureDescriptor.getName(); + logger.warn("Dynamically determined of " + file + " to be " + tribbleType); + + if ( tags.getPositionalTags().size() == 1 ) { + // -X:type style is a name when we can determine the type dynamically + name = tags.getPositionalTags().get(0); + } + } + } + + // now, if we haven't found a type + if ( tribbleType == null ) + throw new UserException.CommandLineException( + String.format("Unexpected number of positional tags for argument %s : %s. " + + "Rod bindings only suport -X:type and -X:name,type argument styles", + value, source.field.getName())); + } + Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); Class parameterType = getParameterizedTypeClass(type); RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index 63e1a59bd..366401ad6 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.commandline; import org.broad.tribble.Feature; +import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.testng.Assert; @@ -837,5 +838,87 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.bindings.get(1).getName(), "foo2", "Name isn't set properly"); } + private final static String HISEQ_VCF = testDir + "HiSeq.10000.vcf"; + private final static String TRANCHES_FILE = testDir + "tranches.6.txt"; + @Test + public void variantContextBindingTestDynamicTyping1() { + final String[] commandLine = new String[] {"-V", HISEQ_VCF}; + + parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.binding.getName(), "binding", "Name isn't set properly"); + Assert.assertEquals(argProvider.binding.getSource(), HISEQ_VCF, "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 0, "Tags aren't correctly set"); + } + + @Test + public void variantContextBindingTestDynamicTypingNameAsSingleArgument() { + final String[] commandLine = new String[] {"-V:name", HISEQ_VCF}; + + parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.binding.getName(), "name", "Name isn't set properly"); + Assert.assertEquals(argProvider.binding.getSource(), HISEQ_VCF, "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); + } + + @Test() + public void variantContextBindingTestDynamicTypingTwoTagsPassing() { + final String[] commandLine = new String[] {"-V:name,vcf", HISEQ_VCF}; + + parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + + Assert.assertEquals(argProvider.binding.getName(), "name", "Name isn't set properly"); + Assert.assertEquals(argProvider.binding.getSource(), HISEQ_VCF, "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 2, "Tags aren't correctly set"); + } + + @Test() + public void variantContextBindingTestDynamicTypingTwoTagsCausingTypeFailure() { + final String[] commandLine = new String[] {"-V:name,beagle", HISEQ_VCF}; + + parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject(argProvider); + + Assert.assertEquals(argProvider.binding.getName(), "name", "Name isn't set properly"); + Assert.assertEquals(argProvider.binding.getSource(), HISEQ_VCF, "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getType(), VariantContext.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getTribbleType(), "beagle", "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 2, "Tags aren't correctly set"); + } + + @Test(expectedExceptions = UserException.class) + public void variantContextBindingTestDynamicTypingUnknownTribbleType() { + final String[] commandLine = new String[] {"-V", TRANCHES_FILE}; + + parsingEngine.addArgumentSource( VariantContextRodBindingArgProvider.class ); + parsingEngine.parse( commandLine ); + parsingEngine.validate(); + + VariantContextRodBindingArgProvider argProvider = new VariantContextRodBindingArgProvider(); + parsingEngine.loadArgumentsIntoObject( argProvider ); + } } From ba7353c561ca8cb423efc7b6e01508d6d19f408d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 8 Aug 2011 15:04:38 -0400 Subject: [PATCH 144/186] Updated IntegrationTests to use the new type free format for VCF files --- .../commandline/ArgumentTypeDescriptor.java | 2 +- .../VariantAnnotatorIntegrationTest.java | 4 +- ...astaAlternateReferenceIntegrationTest.java | 2 +- .../UnifiedGenotyperPerformanceTest.java | 2 +- ...RealignerTargetCreatorIntegrationTest.java | 2 +- ...ergeAndMatchHaplotypesIntegrationTest.java | 4 +- .../phasing/MergeMNPsIntegrationTest.java | 2 +- ...gatingAlternateAllelesIntegrationTest.java | 2 +- .../PhaseByTransmissionIntegrationTest.java | 2 +- .../ReadBackedPhasingIntegrationTest.java | 2 +- .../RecalibrationWalkersPerformanceTest.java | 4 +- .../VariantEvalIntegrationTest.java | 68 +++++++++---------- ...ntRecalibrationWalkersIntegrationTest.java | 10 +-- .../CombineVariantsIntegrationTest.java | 16 ++--- 14 files changed, 61 insertions(+), 61 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index d341b2cde..ebed68022 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -333,7 +333,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); if ( featureDescriptor != null ) { tribbleType = featureDescriptor.getName(); - logger.warn("Dynamically determined of " + file + " to be " + tribbleType); + logger.warn("Dynamically determined type of " + file + " to be " + tribbleType); if ( tags.getPositionalTags().size() == 1 ) { // -X:type style is a name when we can determine the type dynamically diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index fc50f67f2..09da337ce 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -102,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, + baseTestString() + " -B:compH3 " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688")); executeTest("getting DB tag with HM3", spec); } @@ -110,7 +110,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variants", 1, + baseTestString() + " -B:foo " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variants", 1, Arrays.asList("e9c0d832dc6b4ed06c955060f830c140")); executeTest("using expression", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index be2f3cdaa..cedee826c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -24,7 +24,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { executeTest("testFastaReference", spec1b); WalkerTestSpec spec2 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", 1, Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); executeTest("testFastaAlternateReferenceIndels", spec2); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java index d271d78b1..132e6aeb7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java @@ -15,7 +15,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-50,000,000" + - " -B:dbsnp,VCF " + b36dbSNP129 + + " -B:dbsnp " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java index aabf01415..8fa15700d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -23,7 +23,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest { executeTest("test dbsnp", spec2); WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - "-T RealignerTargetCreator -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI indels -o %s", + "-T RealignerTargetCreator -R " + b36KGReference + " -B:indels " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI indels -o %s", 1, Arrays.asList("5206cee6c01b299417bf2feeb8b3dc96")); executeTest("test rods only", spec3); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java index 21435dd7d..d27c63759 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java @@ -16,8 +16,8 @@ public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest { buildCommandLine( "-T MergeAndMatchHaplotypes", "-R " + b37KGReference, - "-B:pbt,VCF " + fundamentalTestPBTVCF, - "-B:rbp,VCF " + fundamentalTestRBPVCF, + "-B:pbt " + fundamentalTestPBTVCF, + "-B:rbp " + fundamentalTestRBPVCF, "-o %s" ), 1, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java index c88eac149..ef6fd0d7f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java @@ -10,7 +10,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest { public static String baseTestString(String reference, String VCF, int maxDistMNP) { return "-T MergeMNPs" + " -R " + reference + - " -B:variant,VCF " + validationDataLocation + VCF + + " -B:variant " + validationDataLocation + VCF + " --maxGenomicDistanceForMNP " + maxDistMNP + " -o %s" + " -NO_HEADER"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java index f855c1dd3..b9c291b61 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java @@ -10,7 +10,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest public static String baseTestString(String reference, String VCF, int maxDist) { return "-T MergeSegregatingAlternateAlleles" + " -R " + reference + - " -B:variant,VCF " + validationDataLocation + VCF + + " -B:variant " + validationDataLocation + VCF + " --maxGenomicDistance " + maxDist + " -o %s" + " -NO_HEADER"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java index 59750e18f..4a205c85f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -16,7 +16,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-T PhaseByTransmission", "-NO_HEADER", "-R " + b37KGReference, - "-B:variant,VCF " + fundamentalTestVCF, + "-B:variant " + fundamentalTestVCF, "-f NA12892+NA12891=NA12878", "-o %s" ), diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 1bf3e579f..2b3122e77 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { return "-T ReadBackedPhasing" + " -R " + reference + " -I " + validationDataLocation + reads + - " -B:variant,VCF " + validationDataLocation + VCF + + " -B:variant " + validationDataLocation + VCF + " --cacheWindowSize " + cacheWindowSize + " --maxPhaseSites " + maxPhaseSites + " --phaseQualityThresh " + phaseQualityThresh + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java index 43ea401f7..45d104862 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java @@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L chr1:1-50,000,000" + " -standard" + " -OQ" + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -B:dbsnp " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); @@ -31,7 +31,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + " -standard" + " -OQ" + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -B:dbsnp " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 8fa5f0c29..6bbaa266d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -20,8 +20,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -40,8 +40,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -61,8 +61,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -83,8 +83,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -104,8 +104,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -125,8 +125,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -146,8 +146,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -167,8 +167,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -190,8 +190,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -215,7 +215,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:eval,VCF " + fundamentalTestVCF, + "-B:eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -232,7 +232,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testSelect1() { String extraArgs = "-L 1:1-10,000,000"; String tests = cmdRoot + - " -B:dbsnp,VCF " + b36dbSNP129 + + " -B:dbsnp " + b36dbSNP129 + " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", @@ -280,8 +280,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L 20" + - " -B:dbsnp,VCF " + b37dbSNP132 + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -B:dbsnp " + b37dbSNP132 + + " -B:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("0897dfba2f4a245faddce38000555cce")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); @@ -291,9 +291,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testMultipleEvalTracksWithoutGenotypes() { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L 20" + - " -B:dbsnp,VCF " + b37dbSNP132 + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + - " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + + " -B:dbsnp " + b37dbSNP132 + + " -B:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -B:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ead3602e14ec2944b5d9e4dacc08c819")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); @@ -305,9 +305,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-T VariantEval" + " -R " + b37KGReference + - " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + - " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + - " -B:dbsnp,VCF " + dbsnp + + " -B:comp " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + + " -B:eval " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + + " -B:dbsnp " + dbsnp + " -L 20:10000000-10100000" + " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; @@ -324,8 +324,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestSNPsVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestSNPsVCF, "-noEV", "-EV CompOverlap", "-sn HG00625", @@ -342,8 +342,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestSNPsOneSampleVCF, "-noEV", "-EV CompOverlap", "-noST", @@ -363,8 +363,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestSNPsVCF, + "-B:dbsnp " + b37dbSNP132, + "-B:eval " + fundamentalTestSNPsVCF, "-noEV", "-EV CountVariants", "-noST", diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 3ac7e3785..f6c858135 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -41,11 +41,11 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { //System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b37KGReference + - " -B:dbsnp,VCF,known=true,training=false,truth=false,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" + - " -B:hapmap,VCF,known=false,training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" + - " -B:omni,VCF,known=false,training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" + + " -B:dbsnp,known=true,training=false,truth=false,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" + + " -B:hapmap,known=false,training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" + + " -B:omni,known=false,training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" + " -T VariantRecalibrator" + - " -B:input,VCF " + params.inVCF + + " -B:input " + params.inVCF + " -L 20:1,000,000-40,000,000" + " -an QD -an HaplotypeScore -an HRun" + " -percentBad 0.07" + @@ -64,7 +64,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -T ApplyRecalibration" + " -L 20:12,000,000-30,000,000" + " -NO_HEADER" + - " -B:input,VCF " + params.inVCF + + " -B:input " + params.inVCF + " -o %s" + " -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) + " -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null), diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index d27ab34a0..6dfbc1723 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -63,8 +63,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { String file2 = "hapmap_3.3.b37.sites.vcf"; WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference - + " -L 1:1-10,000,000 -V:omni,VCF " + validationDataLocation + file1 - + " -V:hm3,VCF " + validationDataLocation + file2 + args, + + " -L 1:1-10,000,000 -V:omni " + validationDataLocation + file1 + + " -V:hm3 " + validationDataLocation + file2 + args, 1, Arrays.asList(md5)); executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); @@ -91,10 +91,10 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -V:NA19240_BGI,VCF "+validationDataLocation+"NA19240.BGI.RG.vcf" + - " -V:NA19240_ILLUMINA,VCF "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + - " -V:NA19240_WUGSC,VCF "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + - " -V:denovoInfo,VCF "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + + baseTestString(" -V:NA19240_BGI "+validationDataLocation+"NA19240.BGI.RG.vcf" + + " -V:NA19240_ILLUMINA "+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + + " -V:NA19240_WUGSC "+validationDataLocation+"NA19240.WUGSC.RG.vcf" + + " -V:denovoInfo "+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + " -setKey centerSet" + " -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED" + " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + @@ -110,8 +110,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { String file2 = "combine.2.vcf"; WalkerTestSpec spec = new WalkerTestSpec( "-T CombineVariants -NO_HEADER -o %s -R " + b37KGReference - + " -V:one,VCF " + validationDataLocation + file1 - + " -V:two,VCF " + validationDataLocation + file2 + args, + + " -V:one " + validationDataLocation + file1 + + " -V:two " + validationDataLocation + file2 + args, 1, Arrays.asList(md5)); executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); From 4f8fc0f2f1de28e2abe46a678c5651b591cea09d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 8 Aug 2011 15:05:47 -0400 Subject: [PATCH 145/186] VCF3 now dynamically determined --- .../RecalibrationWalkersIntegrationTest.java | 4 ++-- .../walkers/varianteval/VariantEvalIntegrationTest.java | 8 ++++---- .../variantcontext/VariantContextIntegrationTest.java | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 74f803ac6..8334c99ec 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -236,7 +236,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF3 " + validationDataLocation + "vcfexample3.vcf" + + " -B:dbsnp " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + " -L 1:10,000,000-10,200,000" + @@ -284,7 +284,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + + " -B:anyNameABCD " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + " -B:dbsnp,vcf " + b36dbSNP129 + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 6bbaa266d..401fecb44 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -233,8 +233,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000"; String tests = cmdRoot + " -B:dbsnp " + b36dbSNP129 + - " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + - " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; + " -B:eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + + " -B:comp_genotypes " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", 1, Arrays.asList("14054badcd89b24c2375e1d09918f681")); executeTestParallel("testSelect1", spec); @@ -244,7 +244,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testVEGenotypeConcordance() { String vcfFile = "GenotypeConcordanceEval.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", + WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval " + validationDataLocation + vcfFile + " -B:comp " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, Arrays.asList("96f27163f16bb945f19c6623cd6db34e")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); @@ -252,7 +252,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { - String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; + String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d1932be3748fcf6da77dc51aec323710")); executeTestParallel("testCompVsEvalAC",spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 7cdb6af95..9655b514a 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -16,7 +16,7 @@ public class VariantContextIntegrationTest extends WalkerTest { private static String root = cmdRoot + " -L 1:1-1,000,000 -B:dbsnp,vcf " + b36dbSNP129 + - " -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; + " -B:vcf " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; private static final class VCITTest extends TestDataProvider { String args, md5; @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { public void testToVCF() { // this really just tests that we are seeing the same number of objects over all of chr1 - WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s", + WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s", 2, // just one output file Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63")); executeTest("testToVCF", spec); From d7813db217fc5d0728d7654e985e9ee04669ed5f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 8 Aug 2011 16:25:35 -0400 Subject: [PATCH 146/186] Combine Variants was actually outputting invalid VCFs in cases where it was combining Variant Contexts with different alternate alleles: if any of the genotypes had PLs they were no longer valid/correct. Added a check for such cases (the combined VC has more alleles than an original VC) and strip out the PLs when triggered; added integration test to cover it. I also added the check to Select Variants, although it currently doesn't remove unused alleles so it should never trigger. Is there any reason not to strip out unused alleles after a select? --- .../walkers/variantutils/SelectVariants.java | 4 ++++ .../sting/utils/variantcontext/Genotype.java | 7 +++++++ .../variantcontext/VariantContextUtils.java | 18 ++++++++++++++++++ .../CombineVariantsIntegrationTest.java | 10 ++++++++++ .../SelectVariantsIntegrationTest.java | 12 ++++++++++++ 5 files changed, 51 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 41374a349..b2bce2e59 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -544,6 +544,10 @@ public class SelectVariants extends RodWalker { VariantContext sub = vc.subContextFromGenotypes(genotypes, vc.getAlleles()); + // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate) + if ( vc.getAlleles().size() != sub.getAlleles().size() ) + sub = VariantContext.modifyGenotypes(sub, VariantContextUtils.stripPLs(vc.getGenotypes())); + HashMap attributes = new HashMap(sub.getAttributes()); int depth = 0; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 0b5976c3c..fdf3d97db 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -57,6 +57,13 @@ public class Genotype { return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased()); } + public static Genotype removePLs(Genotype g) { + Map attrs = new HashMap(g.getAttributes()); + attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); + attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); + return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased()); + } + public static Genotype modifyAlleles(Genotype g, List alleles) { return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 7d10749ee..fa039b42e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -588,6 +588,14 @@ public class VariantContextUtils { } } + // if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate) + for ( VariantContext vc : VCs ) { + if ( vc.alleles.size() != alleles.size() ) { + genotypes = stripPLs(genotypes); + break; + } + } + // take the VC with the maxAC and pull the attributes into a modifiable map if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) { attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes()); @@ -633,6 +641,16 @@ public class VariantContextUtils { return merged; } + public static Map stripPLs(Map genotypes) { + Map newGs = new HashMap(genotypes.size()); + + for ( Map.Entry g : genotypes.entrySet() ) { + newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue()); + } + + return newGs; + } + public static Map> separateVariantContextsByType(Collection VCs) { HashMap> mappedVCs = new HashMap>(); for ( VariantContext vc : VCs ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 9b152bc71..9d5add172 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -70,6 +70,14 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combineSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } + public void combinePLs(String file1, String file2, String md5) { + WalkerTestSpec spec = new WalkerTestSpec( + "-T CombineVariants -NO_HEADER -o %s -R " + b36KGReference + " -priority v1,v2 -B:v1,VCF " + validationDataLocation + file1 + " -B:v2,VCF " + validationDataLocation + file2, + 1, + Arrays.asList(md5)); + executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); + } + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); } @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); } @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); } @@ -78,6 +86,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); } @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); } + @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); } + @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index b5f41542e..564400f75 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -63,4 +63,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testConcordance--" + testFile, spec); } + @Test(enabled=false) + public void testRemovePLs() { + String testFile = validationDataLocation + "combine.3.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -sn NA12892 -B:variant,VCF " + testFile + " -o %s -NO_HEADER", + 1, + Arrays.asList("") + ); + + executeTest("testWithPLs--" + testFile, spec); + } } From 5e288136e02a0816155f83472e391a7ef9cbbef5 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 8 Aug 2011 16:51:43 -0400 Subject: [PATCH 147/186] Added unit tests for the SnpEff codec, and made minor adjustments to the codec itself. --- .../utils/codecs/snpEff/SnpEffCodec.java | 9 +- .../utils/codecs/snpEff/SnpEffFeature.java | 63 +++++ .../codecs/snpEff/SnpEffCodecUnitTest.java | 259 ++++++++++++++++++ 3 files changed, 330 insertions(+), 1 deletion(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java index f5d77635a..dfe1f5f1a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -80,6 +80,7 @@ public class SnpEffCodec implements FeatureCodec { } try { + trimAllFields(tokens); checkForRequiredFields(tokens, line); String contig = tokens[0]; @@ -126,6 +127,12 @@ public class SnpEffCodec implements FeatureCodec { } } + private void trimAllFields ( String[] tokens ) { + for ( int i = 0; i < tokens.length; i++ ) { + tokens[i] = tokens[i].trim(); + } + } + private void checkForRequiredFields ( String[] tokens, String line ) { for ( int requiredFieldIndex : REQUIRED_FIELDS ) { if ( tokens[requiredFieldIndex].isEmpty() ) { @@ -155,7 +162,7 @@ public class SnpEffCodec implements FeatureCodec { private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) { if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) { - return effectFieldTokens[effectFieldTokens.length - 1]; + return effectFieldTokens[effectFieldTokens.length - 1].trim(); } return null; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java index cfa5a91ab..4a68d7cf1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java @@ -314,4 +314,67 @@ public class SnpEffFeature implements Feature { public String getCustomIntervalID() { return customIntervalID; } + + public boolean equals ( Object o ) { + if ( o == null || ! (o instanceof SnpEffFeature) ) { + return false; + } + + SnpEffFeature other = (SnpEffFeature)o; + + return contig.equals(other.contig) && + position == other.position && + (reference == null ? other.reference == null : reference.equals(other.reference)) && + (change == null ? other.change == null : change.equals(other.change)) && + changeType == other.changeType && + zygosity == other.zygosity && + (quality == null ? other.quality == null : quality.equals(other.quality)) && + (coverage == null ? other.coverage == null : coverage.equals(other.coverage)) && + (warnings == null ? other.warnings == null : warnings.equals(other.warnings)) && + (geneID == null ? other.geneID == null : geneID.equals(other.geneID)) && + (geneName == null ? other.geneName == null : geneName.equals(other.geneName)) && + (bioType == null ? other.bioType == null : bioType.equals(other.bioType)) && + (transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) && + (exonID == null ? other.exonID == null : exonID.equals(other.exonID)) && + (exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) && + isNonCodingGene == other.isNonCodingGene && + effect == other.effect && + (effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) && + (oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) && + (oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) && + (codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) && + (cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) && + (codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) && + (aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) && + (customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID)); + } + + public String toString() { + return "[Contig: " + contig + + " Position: " + position + + " Reference: " + reference + + " Change: " + change + + " Change Type: " + changeType + + " Zygosity: " + zygosity + + " Quality: " + quality + + " Coverage: " + coverage + + " Warnings: " + warnings + + " Gene ID: " + geneID + + " Gene Name: " + geneName + + " Bio Type: " + bioType + + " Transcript ID: " + transcriptID + + " Exon ID: " + exonID + + " Exon Rank: " + exonRank + + " Non-Coding Gene: " + isNonCodingGene + + " Effect: " + effect + + " Effect Extra Information: " + effectExtraInformation + + " Old/New AA: " + oldAndNewAA + + " Old/New Codon: " + oldAndNewCodon + + " Codon Num: " + codonNum + + " CDS Size: " + cdsSize + + " Codons Around: " + codonsAround + + " AAs Around: " + aasAround + + " Custom Interval ID: " + customIntervalID + + "]"; + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java new file mode 100644 index 000000000..6d492565b --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +import org.apache.commons.io.input.ReaderInputStream; +import org.broad.tribble.TribbleException; +import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.LineReader; +import org.testng.Assert; +import org.testng.annotations.Test; + +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; + +import java.io.StringReader; + +public class SnpEffCodecUnitTest { + + @Test + public void testParseWellFormedSnpEffHeaderLine() { + String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" + + "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" + + "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" + + "AAs around\tCustom_interval_ID"; + + SnpEffCodec codec = new SnpEffCodec(); + LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine))); + String headerReturned = (String)codec.readHeader(reader); + + Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine); + } + + @Test(expectedExceptions = TribbleException.InvalidHeader.class) + public void testParseWrongNumberOfFieldsSnpEffHeaderLine() { + String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" + + "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" + + "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" + + "AAs around"; + + SnpEffCodec codec = new SnpEffCodec(); + LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine))); + codec.readHeader(reader); + } + + @Test(expectedExceptions = TribbleException.InvalidHeader.class) + public void testParseMisnamedColumnSnpEffHeaderLine() { + String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" + + "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" + + "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" + + "AAs around\tCustom_interval_ID"; + + SnpEffCodec codec = new SnpEffCodec(); + LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine))); + codec.readHeader(reader); + } + + @Test + public void testParseSimpleEffectSnpEffLine() { + String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + + "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t"; + + SnpEffFeature expectedFeature = new SnpEffFeature("1", + 69428l, + "T", + "G", + ChangeType.SNP, + Zygosity.Hom, + 6049.69, + 61573l, + null, + "ENSG00000177693", + "OR4F5", + "mRNA", + "ENST00000326183", + "exon_1_69055_70108", + 1, + false, + EffectType.NON_SYNONYMOUS_CODING, + null, + "F/C", + "TTT/TGT", + 113, + 918, + null, + null, + null + ); + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine); + + Assert.assertEquals(feature, expectedFeature); + } + + @Test + public void testParseNonCodingRegionSnpEffLine() { + String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" + + "RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" + + "L/V\tCTA/GTA\t272\t952\t\t\t"; + + SnpEffFeature expectedFeature = new SnpEffFeature("1", + 1337592l, + "G", + "C", + ChangeType.SNP, + Zygosity.Hom, + 1935.52, + 21885l, + null, + "ENSG00000250188", + "RP4-758J18.5", + "mRNA", + "ENST00000514958", + "exon_1_1337454_1338076", + 2, + true, + EffectType.NON_SYNONYMOUS_CODING, + null, + "L/V", + "CTA/GTA", + 272, + 952, + null, + null, + null + ); + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine); + + Assert.assertEquals(feature, expectedFeature); + } + + @Test + public void testParseExtraEffectInformationSnpEffLine() { + String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" + + "mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t"; + + SnpEffFeature expectedFeature = new SnpEffFeature("1", + 879537l, + "T", + "C", + ChangeType.SNP, + Zygosity.Hom, + 341.58, + 13733l, + null, + "ENSG00000187634", + "SAMD11", + "mRNA", + "ENST00000341065", + null, + null, + false, + EffectType.UTR_3_PRIME, + "4 bases from transcript end", + null, + null, + null, + null, + null, + null, + null + ); + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine); + + Assert.assertEquals(feature, expectedFeature); + } + + @Test + public void testParseMultiEffectSnpEffLine() { + String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" + + "ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t"; + + SnpEffFeature expectedFeature = new SnpEffFeature("1", + 901901l, + "C", + "T", + ChangeType.SNP, + Zygosity.Hom, + 162.91, + 4646l, + null, + "ENSG00000187583", + "PLEKHN1", + "mRNA", + "ENST00000379410", + "exon_1_901877_901994", + 1, + false, + EffectType.START_GAINED, + "ATG, UTR_5_PRIME: 11 bases from TSS", + null, + null, + null, + null, + null, + null, + null + ); + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine); + + Assert.assertEquals(feature, expectedFeature); + } + + @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class) + public void testParseWrongNumberOfFieldsSnpEffLine() { + String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + + "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t"; + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine); + } + + @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class) + public void testParseBlankEffectFieldSnpEffLine() { + String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + + "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t"; + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine); + } + + @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class) + public void testParseInvalidNumericFieldSnpEffLine() { + String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + + "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";; + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine); + } +} From f8ad91b16fc019f95a2aa7385588dd6e5f8584dc Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 8 Aug 2011 16:57:38 -0400 Subject: [PATCH 148/186] Reverting a bunch of bad -B type drops --- .../phasing/ReadBackedPhasingWalker.java | 19 +++-- .../VariantAnnotatorIntegrationTest.java | 4 +- ...astaAlternateReferenceIntegrationTest.java | 2 +- .../UnifiedGenotyperPerformanceTest.java | 2 +- ...RealignerTargetCreatorIntegrationTest.java | 2 +- ...ergeAndMatchHaplotypesIntegrationTest.java | 4 +- .../phasing/MergeMNPsIntegrationTest.java | 2 +- ...gatingAlternateAllelesIntegrationTest.java | 2 +- .../PhaseByTransmissionIntegrationTest.java | 2 +- .../ReadBackedPhasingIntegrationTest.java | 2 +- .../RecalibrationWalkersIntegrationTest.java | 4 +- .../RecalibrationWalkersPerformanceTest.java | 4 +- .../VariantEvalIntegrationTest.java | 76 +++++++++---------- ...ntRecalibrationWalkersIntegrationTest.java | 10 +-- .../VariantContextIntegrationTest.java | 4 +- 15 files changed, 71 insertions(+), 68 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 8e62999b5..7df55b4cd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -23,9 +23,7 @@ */ package org.broadinstitute.sting.gatk.walkers.phasing; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.sample.Sample; @@ -64,6 +62,13 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr public class ReadBackedPhasingWalker extends RodWalker { private static final boolean DEBUG = false; + /** + * The VCF file we are phasing variants from. + * + * All heterozygous variants found in this VCF file will be phased, where possible + */ + @Input(fullName="variants", shortName = "V", doc="Phase variants from this VCF file", required=true) + public RodBinding variants; @Output(doc = "File to which variants should be written", required = true) protected VCFWriter writer = null; @@ -98,8 +103,6 @@ public class ReadBackedPhasingWalker extends RodWalker rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(rodName)); - Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(rodName).getGenotypeSamples() : samplesToPhase); + Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); + Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(variants.getName()).getGenotypeSamples() : samplesToPhase); writer.writeHeader(new VCFHeader(hInfo, samples)); } @@ -204,7 +207,7 @@ public class ReadBackedPhasingWalker extends RodWalker unprocessedList = new LinkedList(); - for (VariantContext vc : tracker.getValues(VariantContext.class, rodName, context.getLocation())) { + for (VariantContext vc : tracker.getValues(variants, context.getLocation())) { if (samplesToPhase != null) vc = reduceVCToSamples(vc, samplesToPhase); if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 09da337ce..fc50f67f2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -102,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:compH3 " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, + baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688")); executeTest("getting DB tag with HM3", spec); } @@ -110,7 +110,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:foo " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variants", 1, + baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variants", 1, Arrays.asList("e9c0d832dc6b4ed06c955060f830c140")); executeTest("using expression", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index cedee826c..be2f3cdaa 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -24,7 +24,7 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { executeTest("testFastaReference", spec1b); WalkerTestSpec spec2 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", 1, Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); executeTest("testFastaAlternateReferenceIndels", spec2); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java index 132e6aeb7..d271d78b1 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java @@ -15,7 +15,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-50,000,000" + - " -B:dbsnp " + b36dbSNP129 + + " -B:dbsnp,VCF " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java index 8fa15700d..aabf01415 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -23,7 +23,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest { executeTest("test dbsnp", spec2); WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - "-T RealignerTargetCreator -R " + b36KGReference + " -B:indels " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI indels -o %s", + "-T RealignerTargetCreator -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI indels -o %s", 1, Arrays.asList("5206cee6c01b299417bf2feeb8b3dc96")); executeTest("test rods only", spec3); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java index d27c63759..21435dd7d 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java @@ -16,8 +16,8 @@ public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest { buildCommandLine( "-T MergeAndMatchHaplotypes", "-R " + b37KGReference, - "-B:pbt " + fundamentalTestPBTVCF, - "-B:rbp " + fundamentalTestRBPVCF, + "-B:pbt,VCF " + fundamentalTestPBTVCF, + "-B:rbp,VCF " + fundamentalTestRBPVCF, "-o %s" ), 1, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java index ef6fd0d7f..c88eac149 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeMNPsIntegrationTest.java @@ -10,7 +10,7 @@ public class MergeMNPsIntegrationTest extends WalkerTest { public static String baseTestString(String reference, String VCF, int maxDistMNP) { return "-T MergeMNPs" + " -R " + reference + - " -B:variant " + validationDataLocation + VCF + + " -B:variant,VCF " + validationDataLocation + VCF + " --maxGenomicDistanceForMNP " + maxDistMNP + " -o %s" + " -NO_HEADER"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java index b9c291b61..f855c1dd3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeSegregatingAlternateAllelesIntegrationTest.java @@ -10,7 +10,7 @@ public class MergeSegregatingAlternateAllelesIntegrationTest extends WalkerTest public static String baseTestString(String reference, String VCF, int maxDist) { return "-T MergeSegregatingAlternateAlleles" + " -R " + reference + - " -B:variant " + validationDataLocation + VCF + + " -B:variant,VCF " + validationDataLocation + VCF + " --maxGenomicDistance " + maxDist + " -o %s" + " -NO_HEADER"; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java index 4a205c85f..59750e18f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -16,7 +16,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-T PhaseByTransmission", "-NO_HEADER", "-R " + b37KGReference, - "-B:variant " + fundamentalTestVCF, + "-B:variant,VCF " + fundamentalTestVCF, "-f NA12892+NA12891=NA12878", "-o %s" ), diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 2b3122e77..1bf3e579f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { return "-T ReadBackedPhasing" + " -R " + reference + " -I " + validationDataLocation + reads + - " -B:variant " + validationDataLocation + VCF + + " -B:variant,VCF " + validationDataLocation + VCF + " --cacheWindowSize " + cacheWindowSize + " --maxPhaseSites " + maxPhaseSites + " --phaseQualityThresh " + phaseQualityThresh + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 8334c99ec..74f803ac6 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -236,7 +236,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp " + validationDataLocation + "vcfexample3.vcf" + + " -B:dbsnp,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + " -L 1:10,000,000-10,200,000" + @@ -284,7 +284,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:anyNameABCD " + validationDataLocation + "vcfexample3.vcf" + + " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + " -B:dbsnp,vcf " + b36dbSNP129 + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java index 45d104862..43ea401f7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersPerformanceTest.java @@ -16,7 +16,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L chr1:1-50,000,000" + " -standard" + " -OQ" + - " -B:dbsnp " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); @@ -31,7 +31,7 @@ public class RecalibrationWalkersPerformanceTest extends WalkerTest { " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + " -standard" + " -OQ" + - " -B:dbsnp " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -recalFile /dev/null" + moreArgs, 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 401fecb44..8fa5f0c29 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -20,8 +20,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -40,8 +40,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -61,8 +61,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -83,8 +83,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -104,8 +104,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -125,8 +125,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -146,8 +146,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -167,8 +167,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -190,8 +190,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -215,7 +215,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:eval " + fundamentalTestVCF, + "-B:eval,VCF " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -232,9 +232,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testSelect1() { String extraArgs = "-L 1:1-10,000,000"; String tests = cmdRoot + - " -B:dbsnp " + b36dbSNP129 + - " -B:eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + - " -B:comp_genotypes " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; + " -B:dbsnp,VCF " + b36dbSNP129 + + " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + + " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", 1, Arrays.asList("14054badcd89b24c2375e1d09918f681")); executeTestParallel("testSelect1", spec); @@ -244,7 +244,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testVEGenotypeConcordance() { String vcfFile = "GenotypeConcordanceEval.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval " + validationDataLocation + vcfFile + " -B:comp " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", + WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, Arrays.asList("96f27163f16bb945f19c6623cd6db34e")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); @@ -252,7 +252,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { - String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; + String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d1932be3748fcf6da77dc51aec323710")); executeTestParallel("testCompVsEvalAC",spec); } @@ -280,8 +280,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L 20" + - " -B:dbsnp " + b37dbSNP132 + - " -B:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -B:dbsnp,VCF " + b37dbSNP132 + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("0897dfba2f4a245faddce38000555cce")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); @@ -291,9 +291,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testMultipleEvalTracksWithoutGenotypes() { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L 20" + - " -B:dbsnp " + b37dbSNP132 + - " -B:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + - " -B:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + + " -B:dbsnp,VCF " + b37dbSNP132 + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ead3602e14ec2944b5d9e4dacc08c819")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); @@ -305,9 +305,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-T VariantEval" + " -R " + b37KGReference + - " -B:comp " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + - " -B:eval " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + - " -B:dbsnp " + dbsnp + + " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + + " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + + " -B:dbsnp,VCF " + dbsnp + " -L 20:10000000-10100000" + " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; @@ -324,8 +324,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestSNPsVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestSNPsVCF, "-noEV", "-EV CompOverlap", "-sn HG00625", @@ -342,8 +342,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestSNPsOneSampleVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, "-noEV", "-EV CompOverlap", "-noST", @@ -363,8 +363,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp " + b37dbSNP132, - "-B:eval " + fundamentalTestSNPsVCF, + "-B:dbsnp,VCF " + b37dbSNP132, + "-B:eval,VCF " + fundamentalTestSNPsVCF, "-noEV", "-EV CountVariants", "-noST", diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index f6c858135..3ac7e3785 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -41,11 +41,11 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { //System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b37KGReference + - " -B:dbsnp,known=true,training=false,truth=false,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" + - " -B:hapmap,known=false,training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" + - " -B:omni,known=false,training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" + + " -B:dbsnp,VCF,known=true,training=false,truth=false,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" + + " -B:hapmap,VCF,known=false,training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" + + " -B:omni,VCF,known=false,training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" + " -T VariantRecalibrator" + - " -B:input " + params.inVCF + + " -B:input,VCF " + params.inVCF + " -L 20:1,000,000-40,000,000" + " -an QD -an HaplotypeScore -an HRun" + " -percentBad 0.07" + @@ -64,7 +64,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -T ApplyRecalibration" + " -L 20:12,000,000-30,000,000" + " -NO_HEADER" + - " -B:input " + params.inVCF + + " -B:input,VCF " + params.inVCF + " -o %s" + " -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) + " -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null), diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 9655b514a..7cdb6af95 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -16,7 +16,7 @@ public class VariantContextIntegrationTest extends WalkerTest { private static String root = cmdRoot + " -L 1:1-1,000,000 -B:dbsnp,vcf " + b36dbSNP129 + - " -B:vcf " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; + " -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; private static final class VCITTest extends TestDataProvider { String args, md5; @@ -58,7 +58,7 @@ public class VariantContextIntegrationTest extends WalkerTest { public void testToVCF() { // this really just tests that we are seeing the same number of objects over all of chr1 - WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s", + WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s", 2, // just one output file Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63")); executeTest("testToVCF", spec); From 80924d24de1372d769856df3e2721c7b01107dd7 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 8 Aug 2011 19:26:27 -0400 Subject: [PATCH 149/186] Single positional arguments are now treated as names unless they actually match a tribble feature --- .../commandline/ArgumentTypeDescriptor.java | 53 +++++++++++-------- .../gatk/EngineFeaturesIntegrationTest.java | 37 +------------ .../ReadBackedPhasingIntegrationTest.java | 2 +- .../CombineVariantsIntegrationTest.java | 2 +- .../utils/codecs/vcf/VCFIntegrationTest.java | 4 +- 5 files changed, 36 insertions(+), 62 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index ebed68022..0fb8bbd3a 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -317,39 +317,46 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { String tribbleType = null; Tags tags = getArgumentTags(matches); // must have one or two tag values here - if ( tags.getPositionalTags().size() == 2 ) { // -X:name,type style + if ( tags.getPositionalTags().size() > 2 ) { + throw new UserException.CommandLineException( + String.format("Unexpected number of positional tags for argument %s : %s. " + + "Rod bindings only suport -X:type and -X:name,type argument styles", + value, source.field.getName())); + } if ( tags.getPositionalTags().size() == 2 ) { + // -X:name,type style name = tags.getPositionalTags().get(0); tribbleType = tags.getPositionalTags().get(1); } else { - if ( tags.getPositionalTags().size() == 1 ) { - // -X:type style is a type when we cannot determine the type dynamically - tribbleType = tags.getPositionalTags().get(0); + // case with 0 or 1 positional tags + FeatureManager manager = new FeatureManager(); + + // -X:type style is a type when we cannot determine the type dynamically + String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null; + if ( tag1 != null ) { + if ( manager.getByName(tag1) != null ) // this a type + tribbleType = tag1; + else + name = tag1; } - // try to determine the file type dynamically - FeatureManager manager = new FeatureManager(); - File file = new File(value); - if ( file.canRead() && file.isFile() ) { - FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); - if ( featureDescriptor != null ) { - tribbleType = featureDescriptor.getName(); - logger.warn("Dynamically determined type of " + file + " to be " + tribbleType); - - if ( tags.getPositionalTags().size() == 1 ) { - // -X:type style is a name when we can determine the type dynamically - name = tags.getPositionalTags().get(0); + if ( tribbleType == null ) { + // try to determine the file type dynamically + File file = new File(value); + if ( file.canRead() && file.isFile() ) { + FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file); + if ( featureDescriptor != null ) { + tribbleType = featureDescriptor.getName(); + logger.warn("Dynamically determined type of " + file + " to be " + tribbleType); } } } - - // now, if we haven't found a type - if ( tribbleType == null ) - throw new UserException.CommandLineException( - String.format("Unexpected number of positional tags for argument %s : %s. " + - "Rod bindings only suport -X:type and -X:name,type argument styles", - value, source.field.getName())); } + if ( tribbleType == null ) // error handling + throw new UserException.CommandLineException( + String.format("Could not parse argument %s with value %s", + defaultDefinition.fullName, value)); + Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); Class parameterType = getParameterizedTypeClass(type); RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index cdca08abd..80a8d2fa4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -24,26 +24,16 @@ package org.broadinstitute.sting.gatk; -import org.broad.tribble.Feature; import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.RodBinding; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.testng.Assert; import org.testng.annotations.Test; -import java.util.List; - /** * */ public class EngineFeaturesIntegrationTest extends WalkerTest { private void testBadRODBindingInput(String type, String name, Class c) { - WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -L 1:1 --variants:" + type + " " + WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -L 1:1 --variants:variants," + type + " " + b37dbSNP132 + " -R " + b37KGReference + " -o %s", 1, c); executeTest(name, spec); @@ -64,27 +54,4 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { @Test() private void testBadRODBindingInputTypeUnknownType() { testBadRODBindingInput("bedXXX", "Unknown input to VCF expecting walker", UserException.UnknownTribbleType.class); } -} - -//class TestRodBindings extends RodWalker { -// @Input(fullName="req", required=true) -// public RodBinding required; -// -// @Input(fullName="optional", required=false) -// public RodBinding optional = RodBinding.makeUnbound(Feature.class); -// -// @Input(fullName="rodList", shortName="RL", doc="A list of ROD types that we will convert to a table", required=true) -// public List> variantsList; -// -// public void initialize() { -// // bound values -// Assert.assertEquals(required.isBound(), true); -// -// -// System.exit(0); -// } -// -// public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; } -// public Integer reduceInit() { return 0; } -// public Integer reduce(Integer counter, Integer sum) { return counter + sum; } -//} \ No newline at end of file +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 1bf3e579f..3566ecd05 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { return "-T ReadBackedPhasing" + " -R " + reference + " -I " + validationDataLocation + reads + - " -B:variant,VCF " + validationDataLocation + VCF + + " --variants " + validationDataLocation + VCF + " --cacheWindowSize " + cacheWindowSize + " --maxPhaseSites " + maxPhaseSites + " --phaseQualityThresh " + phaseQualityThresh + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index f52504ccb..4abf0a102 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -72,7 +72,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { public void combinePLs(String file1, String file2, String md5) { WalkerTestSpec spec = new WalkerTestSpec( - "-T CombineVariants -NO_HEADER -o %s -R " + b36KGReference + " -priority v1,v2 -B:v1,VCF " + validationDataLocation + file1 + " -B:v2,VCF " + validationDataLocation + file2, + "-T CombineVariants -NO_HEADER -o %s -R " + b36KGReference + " -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 " + validationDataLocation + file2, 1, Arrays.asList(md5)); executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index a89c0315c..e758ce0a2 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -17,11 +17,11 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; - String test1 = baseCommand + "-T VariantAnnotator --variants:vcf " + testVCF + " -BTI variants"; + String test1 = baseCommand + "-T VariantAnnotator --variants " + testVCF + " -BTI variants"; WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF)); List result = executeTest("Test Variant Annotator with no changes", spec1).getFirst(); - String test2 = baseCommand + "-T VariantsToVCF --variants:vcf " + result.get(0).getAbsolutePath(); + String test2 = baseCommand + "-T VariantsToVCF --variants " + result.get(0).getAbsolutePath(); WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF)); executeTest("Test Variants To VCF from new output", spec2); } From a13bc7b9290a914f788a10e3bd451e3788082c70 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 8 Aug 2011 20:01:24 -0400 Subject: [PATCH 150/186] Added an integration test for the SnpEff annotation support, as well as some extra safety checks and comments. --- .../sting/gatk/walkers/annotator/SnpEff.java | 22 ++++++-- .../utils/codecs/snpEff/SnpEffCodec.java | 49 ++++++++++++++++++ .../utils/codecs/snpEff/SnpEffConstants.java | 8 +++ .../utils/codecs/snpEff/SnpEffFeature.java | 51 +++++++++++++++++-- .../VariantAnnotatorIntegrationTest.java | 11 ++++ 5 files changed, 134 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index c307d4cc0..b9b97e154 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants; import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature; @@ -38,9 +38,22 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { +/** + * A set of genomic annotations based on the output of the SnpEff variant effect predictor tool + * (http://snpeff.sourceforge.net/). + * + * For each variant, chooses one of the effects of highest biological impact from the SnpEff + * output file (which must be bound to an RMD track named "SnpEff"), and adds annotations + * on that effect. + * + * The possible biological effects and their associated impacts are defined in the class: + * org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants + * + * @author David Roazen + */ +public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation { - // SnpEff field keys: + // SnpEff annotation key names: public static final String GENE_ID_KEY = "GENE_ID"; public static final String GENE_NAME_KEY = "GENE_NAME"; public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID"; @@ -55,11 +68,14 @@ public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { public static final String CODON_NUM_KEY = "CODON_NUM"; public static final String CDS_SIZE_KEY = "CDS_SIZE"; + // Name of the RMD track bound to the raw SnpEff-generated output file: public static final String RMD_TRACK_NAME = "SnpEff"; public Map annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { List snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME); + // Add only annotations for one of the most biologically-significant effects as defined in + // the SnpEffConstants class: SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures); return generateAnnotations(mostSignificantEffect); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java index dfe1f5f1a..827df16bb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -34,6 +34,40 @@ import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygos import java.io.IOException; +/** + * Codec for decoding the output format of the SnpEff variant effect predictor tool + * (http://snpeff.sourceforge.net/). + * + * This format has 23 tab-delimited fields: + * + * Chromosome + * Position + * Reference + * Change + * Change Type: {SNP, MNP, INS, DEL} + * Zygosity: {Hom, Het} + * Quality + * Coverage + * Warnings + * Gene ID + * Gene Name + * Bio Type + * Transcript ID + * Exon ID + * Exon Rank + * Effect + * Old/New Amino Acid + * Old/New Codon + * Codon Num + * CDS Size + * Codons Around + * Amino Acids Around + * Custom Interval ID + * + * We treat all except the Chromosome, Position, and Effect fields as optional. + * + * @author David Roazen + */ public class SnpEffCodec implements FeatureCodec { public static final int EXPECTED_NUMBER_OF_FIELDS = 23; @@ -64,9 +98,13 @@ public class SnpEffCodec implements FeatureCodec { "AAs around", "Custom_interval_ID" }; + + // The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line: public static final int[] REQUIRED_FIELDS = { 0, 1, 15 }; + public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE"; + public Feature decodeLoc ( String line ) { return decode(line); } @@ -101,6 +139,11 @@ public class SnpEffCodec implements FeatureCodec { Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]); boolean isNonCodingGene = isNonCodingGene(tokens[15]); + + // Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present, + // otherwise split it into two subfields. We need this limit to prevent the extra effect-related information + // in the final field (when present) from being inappropriately tokenized: + int effectFieldTokenLimit = isNonCodingGene ? 3 : 2; String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit); EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene); @@ -150,6 +193,9 @@ public class SnpEffCodec implements FeatureCodec { private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) { String effectName = ""; + // If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield, + // otherwise it will be in the first subfield: + if ( effectFieldTokens.length > 1 && isNonCodingGene ) { effectName = effectFieldTokens[1].trim(); } @@ -161,6 +207,9 @@ public class SnpEffCodec implements FeatureCodec { } private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) { + + // The extra effect-related information, if present, will always be the last subfield: + if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) { return effectFieldTokens[effectFieldTokens.length - 1].trim(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java index f226c3523..270db470f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java @@ -24,8 +24,14 @@ package org.broadinstitute.sting.utils.codecs.snpEff; +/** + * A set of constants associated with the SnpEff codec. + * + * @author David Roazen + */ public class SnpEffConstants { + // Possible SnpEff biological effects and their associated impacts: public enum EffectType { START_GAINED (EffectImpact.HIGH), START_LOST (EffectImpact.HIGH), @@ -93,6 +99,7 @@ public class SnpEffConstants { } } + // The kinds of variants supported by the SnpEff output format: public enum ChangeType { SNP, MNP, @@ -100,6 +107,7 @@ public class SnpEffConstants { DEL } + // Possible zygosities of SnpEff variants: public enum Zygosity { Hom, Het diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java index 4a68d7cf1..2f120b7d2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java @@ -26,15 +26,26 @@ package org.broadinstitute.sting.utils.codecs.snpEff; import org.broad.tribble.Feature; +import java.util.NoSuchElementException; + import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact; import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; +/** + * Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output. + * + * Many fields are optional, and missing values are represented by nulls. You should always call the + * hasX() method before calling the corresponding getX() method. Required fields can never be null + * and do not have a hasX() method. + * + * @author David Roazen + */ public class SnpEffFeature implements Feature { - private String contig; - private long position; + private String contig; // REQUIRED FIELD + private long position; // REQUIRED FIELD private String reference; private String change; private ChangeType changeType; @@ -48,8 +59,8 @@ public class SnpEffFeature implements Feature { private String transcriptID; private String exonID; private Integer exonRank; - private boolean isNonCodingGene; - private EffectType effect; + private boolean isNonCodingGene; // REQUIRED FIELD + private EffectType effect; // REQUIRED FIELD private String effectExtraInformation; private String oldAndNewAA; private String oldAndNewCodon; @@ -85,6 +96,10 @@ public class SnpEffFeature implements Feature { String aasAround, String customIntervalID ) { + if ( contig == null || effect == null ) { + throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields"); + } + this.contig = contig; this.position = position; this.reference = reference; @@ -113,6 +128,10 @@ public class SnpEffFeature implements Feature { } public boolean isHigherImpactThan ( SnpEffFeature other ) { + + // If one effect is in a non-coding gene and the other is not, the effect NOT in the + // non-coding gene has higher impact: + if ( ! isNonCodingGene() && other.isNonCodingGene() ) { return true; } @@ -120,6 +139,9 @@ public class SnpEffFeature implements Feature { return false; } + // Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts + // of the effects themselves as defined in the SnpEffConstants class: + return getEffectImpact().isHigherImpactThan(other.getEffectImpact()); } @@ -140,6 +162,7 @@ public class SnpEffFeature implements Feature { } public String getReference() { + if ( reference == null ) throw new NoSuchElementException("This feature has no reference field"); return reference; } @@ -148,6 +171,7 @@ public class SnpEffFeature implements Feature { } public String getChange() { + if ( change == null ) throw new NoSuchElementException("This feature has no change field"); return change; } @@ -156,6 +180,7 @@ public class SnpEffFeature implements Feature { } public ChangeType getChangeType() { + if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field"); return changeType; } @@ -164,6 +189,7 @@ public class SnpEffFeature implements Feature { } public Zygosity getZygosity() { + if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field"); return zygosity; } @@ -172,6 +198,7 @@ public class SnpEffFeature implements Feature { } public Double getQuality() { + if ( quality == null ) throw new NoSuchElementException("This feature has no quality field"); return quality; } @@ -180,6 +207,7 @@ public class SnpEffFeature implements Feature { } public Long getCoverage() { + if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field"); return coverage; } @@ -188,6 +216,7 @@ public class SnpEffFeature implements Feature { } public String getWarnings() { + if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field"); return warnings; } @@ -196,6 +225,7 @@ public class SnpEffFeature implements Feature { } public String getGeneID() { + if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field"); return geneID; } @@ -204,6 +234,7 @@ public class SnpEffFeature implements Feature { } public String getGeneName() { + if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field"); return geneName; } @@ -212,6 +243,7 @@ public class SnpEffFeature implements Feature { } public String getBioType() { + if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field"); return bioType; } @@ -220,6 +252,7 @@ public class SnpEffFeature implements Feature { } public String getTranscriptID() { + if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field"); return transcriptID; } @@ -228,6 +261,7 @@ public class SnpEffFeature implements Feature { } public String getExonID() { + if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field"); return exonID; } @@ -236,6 +270,7 @@ public class SnpEffFeature implements Feature { } public Integer getExonRank() { + if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field"); return exonRank; } @@ -256,6 +291,7 @@ public class SnpEffFeature implements Feature { } public String getEffectExtraInformation() { + if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field"); return effectExtraInformation; } @@ -264,6 +300,7 @@ public class SnpEffFeature implements Feature { } public String getOldAndNewAA() { + if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field"); return oldAndNewAA; } @@ -272,6 +309,7 @@ public class SnpEffFeature implements Feature { } public String getOldAndNewCodon() { + if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field"); return oldAndNewCodon; } @@ -280,6 +318,7 @@ public class SnpEffFeature implements Feature { } public Integer getCodonNum() { + if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field"); return codonNum; } @@ -288,6 +327,7 @@ public class SnpEffFeature implements Feature { } public Integer getCdsSize() { + if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field"); return cdsSize; } @@ -296,6 +336,7 @@ public class SnpEffFeature implements Feature { } public String getCodonsAround() { + if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field"); return codonsAround; } @@ -304,6 +345,7 @@ public class SnpEffFeature implements Feature { } public String getAasAround() { + if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field"); return aasAround; } @@ -312,6 +354,7 @@ public class SnpEffFeature implements Feature { } public String getCustomIntervalID() { + if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field"); return customIntervalID; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index e6300e6c9..5dc7299a9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -125,4 +125,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("Testing lookup vcf tabix vs. vcf tribble", spec); } } + + @Test + public void testSnpEffAnnotations() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + b37KGReference + " -o %s -A SnpEff -B:variant,VCF " + validationDataLocation + "/1000G.exomes.vcf " + + "-B:SnpEff,SnpEff " + validationDataLocation + "/snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out" + " -L 1", + 1, + Arrays.asList("5fe3644744d3c084a179c3d204555333") + ); + executeTest("Testing SnpEff annotations", spec); + } } From 28d8c8fcbc5487cd1525118cc38d8a9dc6f26663 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 8 Aug 2011 21:07:13 -0400 Subject: [PATCH 151/186] Modified the SnpEff integration test to run on a much smaller interval. --- .../walkers/annotator/VariantAnnotatorIntegrationTest.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 5dc7299a9..173c57a15 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -129,10 +129,11 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testSnpEffAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( - "-T VariantAnnotator -R " + b37KGReference + " -o %s -A SnpEff -B:variant,VCF " + validationDataLocation + "/1000G.exomes.vcf " + - "-B:SnpEff,SnpEff " + validationDataLocation + "/snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out" + " -L 1", + "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff -B:variant,VCF " + + validationDataLocation + "1000G.exomes.vcf -B:SnpEff,SnpEff " + validationDataLocation + + "snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000", 1, - Arrays.asList("5fe3644744d3c084a179c3d204555333") + Arrays.asList("c08648a078368c80530bff004b3157f1") ); executeTest("Testing SnpEff annotations", spec); } From 2efa37661971bc3bf05132a12664ed5058136c97 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 8 Aug 2011 23:23:42 -0400 Subject: [PATCH 152/186] Made the necessary changes to get SnpEff support working with the new rodbinding system. --- .../sting/gatk/walkers/annotator/SnpEff.java | 31 +++++++++++-------- .../walkers/annotator/VariantAnnotator.java | 4 +++ .../VariantAnnotatorIntegrationTest.java | 4 +-- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index b9b97e154..cac59c8bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -43,8 +44,8 @@ import java.util.*; * (http://snpeff.sourceforge.net/). * * For each variant, chooses one of the effects of highest biological impact from the SnpEff - * output file (which must be bound to an RMD track named "SnpEff"), and adds annotations - * on that effect. + * output file (which must be provided on the command line via --snpEffFile:SnpEff ), + * and adds annotations on that effect. * * The possible biological effects and their associated impacts are defined in the class: * org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants @@ -68,28 +69,32 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio public static final String CODON_NUM_KEY = "CODON_NUM"; public static final String CDS_SIZE_KEY = "CDS_SIZE"; - // Name of the RMD track bound to the raw SnpEff-generated output file: - public static final String RMD_TRACK_NAME = "SnpEff"; - public Map annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { - List snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME); + List features = tracker.getValues(Feature.class); // Add only annotations for one of the most biologically-significant effects as defined in // the SnpEffConstants class: - SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures); + SnpEffFeature mostSignificantEffect = getMostSignificantEffect(features); + + if ( mostSignificantEffect == null ) { + return null; + } + return generateAnnotations(mostSignificantEffect); } - private SnpEffFeature getMostSignificantEffect ( List snpEffFeatures ) { + private SnpEffFeature getMostSignificantEffect ( List features ) { SnpEffFeature mostSignificantEffect = null; - for ( Object feature : snpEffFeatures ) { - SnpEffFeature snpEffFeature = (SnpEffFeature)feature; + for ( Feature feature : features ) { + if ( feature instanceof SnpEffFeature ) { + SnpEffFeature snpEffFeature = (SnpEffFeature)feature; - if ( mostSignificantEffect == null || - snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) { + if ( mostSignificantEffect == null || + snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) { - mostSignificantEffect = snpEffFeature; + mostSignificantEffect = snpEffFeature; + } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 85035ba93..ef408fae3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -55,6 +56,9 @@ public class VariantAnnotator extends RodWalker { @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; + @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) + public RodBinding snpEffFile; + @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index fe7e8b1d8..cbfb3cd0b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -129,8 +129,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testSnpEffAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( - "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff -B:variant,VCF " + - validationDataLocation + "1000G.exomes.vcf -B:SnpEff,SnpEff " + validationDataLocation + + "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variants:VCF " + + validationDataLocation + "1000G.exomes.vcf --snpEffFile:SnpEff " + validationDataLocation + "snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000", 1, Arrays.asList("c08648a078368c80530bff004b3157f1") From 2db6225c5364b7d4b81fbcd278c9e4ffa228c1c0 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 9 Aug 2011 00:04:42 -0400 Subject: [PATCH 153/186] A read filter that sets all mapping qualities to a given value Pacbio has decided to assign 255 to the MQ of all their reads since they claim their aligner does not produce a number equivalent to a mapping quality. Despite much back and forth, they are dead set on not using this field, so if we want to use their bams, we will need to override that. This filter does just that. Replacing all values with a given one. Default is 60. --- .../filters/ReassignMappingQualityFilter.java | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java new file mode 100644 index 000000000..50a1384fa --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; + +/** + * A read filter (transformer) that sets all reads mapping quality to a given value. + * + *

+ * If a BAM file contains erroneous or missing mapping qualities, this 'filter' will set + * all your mapping qualities to a given value. Default being 60. + *

+ * + * + *

Input

+ *

+ * BAM file(s) + *

+ * + * + *

Output

+ *

+ * BAM file(s) with all reads mapping qualities reassigned + *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -rf ReassignMappingQuality
+ *      -DMQ 35
+ *  
+ * + * @author carneiro + * @since 8/8/11 + */ + +public class ReassignMappingQualityFilter extends ReadFilter { + + @Argument(fullName = "default_mapping_quality", shortName = "DMQ", doc = "Default read mapping quality to assign to all reads", required = false) + public int defaultMappingQuality = 60; + + public boolean filterOut(SAMRecord rec) { + rec.setMappingQuality(defaultMappingQuality); + return false; + } +} + From cb28875c2a07ec706b774ac3a3bb91a7fe54e718 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Tue, 9 Aug 2011 00:46:39 -0400 Subject: [PATCH 154/186] Updated rod binding syntax usage on CombineVariants from .rodBind to .variants. --- .../sting/queue/extensions/gatk/VcfGatherFunction.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala index 27e186585..fcb3e690a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala @@ -43,7 +43,7 @@ class VcfGatherFunction extends CombineVariants with GatherFunction { this.intervals = this.originalGATK.intervals this.intervalsString = this.originalGATK.intervalsString - this.rodBind = this.gatherParts.zipWithIndex map { case (input, index) => new RodBind("input"+index, "VCF", input) } + this.variants = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) } this.rod_priority_list = (0 until this.gatherParts.size).map("input"+_).mkString(",") this.out = this.originalOutput this.assumeIdenticalSamples = true From 70b3daf689b5df42b8b99f6b90891a3d7a501e18 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 03:03:43 -0400 Subject: [PATCH 155/186] VariantsToVCF is up and running again; integration tests are reenabled (and added one for dbSNP).ant --- .../gatk/refdata/VariantContextAdaptors.java | 31 +--- .../gatk/refdata/features/DbSNPHelper.java | 17 +- .../annotator/VariantAnnotatorEngine.java | 2 +- .../walkers/variantutils/VariantsToVCF.java | 164 ++++++++---------- .../utils/codecs/hapmap/HapMapFeature.java | 1 + .../utils/variantcontext/VariantContext.java | 4 + .../VariantsToVCFIntegrationTest.java | 155 +++++++++-------- 7 files changed, 187 insertions(+), 187 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index d6f8bab9b..216edaf87 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -112,7 +112,7 @@ public class VariantContextAdaptors { alleles.add(refAllele); // add all of the alt alleles - boolean sawNullAllele = false; + boolean sawNullAllele = refAllele.isNull(); for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) { if ( ! Allele.acceptableAlleleBases(alt) ) { //System.out.printf("Excluding dbsnp record %s%n", dbsnp); @@ -133,7 +133,7 @@ public class VariantContextAdaptors { Byte refBaseForIndel = new Byte(ref.getBases()[index]); Map genotypes = null; - VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel); + VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel); return vc; } else return null; // can't handle anything else @@ -163,16 +163,6 @@ public class VariantContextAdaptors { @Override public Class getAdaptableFeatureType() { return GeliTextFeature.class; } - /** - * convert to a Variant Context, given: - * @param name the name of the ROD - * @param input the Rod object, in this case a RodGeliText - * @return a VariantContext object - */ -// VariantContext convert(String name, Object input) { -// return convert(name, input, null); -// } - /** * convert to a Variant Context, given: * @param name the name of the ROD @@ -238,16 +228,6 @@ public class VariantContextAdaptors { @Override public Class getAdaptableFeatureType() { return HapMapFeature.class; } - /** - * convert to a Variant Context, given: - * @param name the name of the ROD - * @param input the Rod object, in this case a RodGeliText - * @return a VariantContext object - */ -// VariantContext convert(String name, Object input) { -// return convert(name, input, null); -// } - /** * convert to a Variant Context, given: * @param name the name of the ROD @@ -262,6 +242,11 @@ public class VariantContextAdaptors { HapMapFeature hapmap = (HapMapFeature)input; + int index = hapmap.getStart() - ref.getWindow().getStart(); + if ( index < 0 ) + return null; // we weren't given enough reference context to create the VariantContext + Byte refBaseForIndel = new Byte(ref.getBases()[index]); + HashSet alleles = new HashSet(); Allele refSNPAllele = Allele.create(ref.getBase(), true); int deletionLength = -1; @@ -320,7 +305,7 @@ public class VariantContextAdaptors { long end = hapmap.getEnd(); if ( deletionLength > 0 ) end += deletionLength; - VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs); + VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs, refBaseForIndel); return vc; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java index f62a157f0..e6e7a7588 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/features/DbSNPHelper.java @@ -59,7 +59,7 @@ public class DbSNPHelper { return dbsnp; } - public static String rsIDOfFirstRealSNP(List featureList) { + public static String rsIDOfFirstRealSNP(List featureList, boolean deleteMe) { if (featureList == null) return null; @@ -81,6 +81,21 @@ public class DbSNPHelper { return rsID; } + public static String rsIDOfFirstRealSNP(List VCs) { + if ( VCs == null ) + return null; + + String rsID = null; + for ( VariantContext vc : VCs ) { + if ( vc.isSNP() ) { + rsID = vc.getID(); + break; + } + } + + return rsID; + } + public static String rsIDOfFirstRealIndel(List featureList) { if (featureList == null) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 8636736bf..78207cb86 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -155,7 +155,7 @@ public class VariantAnnotatorEngine { String rsID = null; if (vc.isSNP()) - rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME), true); else if (vc.isIndel()) rsID = DbSNPHelper.rsIDOfFirstRealIndel(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); infoAnnotations.put(VCFConstants.DBSNP_KEY, rsID != null ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 07c5e71a6..497d98b99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -27,15 +27,12 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.Feature; -import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; @@ -43,6 +40,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; import org.broadinstitute.sting.utils.codecs.vcf.*; @@ -52,6 +50,7 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; +import java.io.File; import java.util.*; /** @@ -63,10 +62,13 @@ public class VariantsToVCF extends RodWalker { @Output(doc="File to which variants should be written",required=true) protected VCFWriter baseWriter = null; - private SortingVCFWriter vcfwriter; // needed because hapmap indel records move + private SortingVCFWriter vcfwriter; // needed because hapmap/dbsnp indel records move - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + @Input(fullName="variant", shortName = "V", doc="Input variant file", required=true) + public RodBinding variants; + + @Input(fullName="dbsnp", shortName = "D", doc="dbSNP VCF for populating rsIDs", required=false) + public RodBinding dbsnp; @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false) protected String sampleName = null; @@ -77,10 +79,6 @@ public class VariantsToVCF extends RodWalker { private Set allowedGenotypeFormatStrings = new HashSet(); private boolean wroteHeader = false; - // Don't allow mixed types for now - private EnumSet ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, - VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL, VariantContext.Type.MNP); - // for dealing with indels in hapmap CloseableIterator dbsnpIterator = null; @@ -92,128 +90,108 @@ public class VariantsToVCF extends RodWalker { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) return 0; - String rsID = DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); + String rsID = dbsnp == null ? null : DbSNPHelper.rsIDOfFirstRealSNP(tracker.getValues(dbsnp, context.getLocation())); Collection contexts = getVariantContexts(tracker, ref); for ( VariantContext vc : contexts ) { - if ( ALLOWED_VARIANT_CONTEXT_TYPES.contains(vc.getType()) ) { - Map attrs = new HashMap(vc.getAttributes()); - if ( rsID != null && !vc.hasID() ) { - attrs.put(VariantContext.ID_KEY, rsID); - vc = VariantContext.modifyAttributes(vc, attrs); - } - - // set the appropriate sample name if necessary - if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) { - Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName); - Map genotypes = new HashMap(); - genotypes.put(sampleName, g); - vc = VariantContext.modifyGenotypes(vc, genotypes); - } - - // todo - fix me. This may not be the cleanest way to handle features what need correct indel padding - if (fixReferenceBase) { - vc = new VariantContext("Variant",vc.getChr(),vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.getFilters(),vc.getAttributes(), ref.getBase()); - } - - writeRecord(vc, tracker, ref.getBase()); + Map attrs = new HashMap(vc.getAttributes()); + if ( rsID != null && !vc.hasID() ) { + attrs.put(VariantContext.ID_KEY, rsID); + vc = VariantContext.modifyAttributes(vc, attrs); } + + // set the appropriate sample name if necessary + if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) { + Genotype g = Genotype.modifyName(vc.getGenotype(variants.getName()), sampleName); + Map genotypes = new HashMap(); + genotypes.put(sampleName, g); + vc = VariantContext.modifyGenotypes(vc, genotypes); + } + + if ( fixReferenceBase ) { + vc = VariantContext.modifyReferencePadding(vc, ref.getBase()); + } + + writeRecord(vc, tracker, ref.getLocus()); } return 1; } private Collection getVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref) { - // we need to special case the HapMap format because indels aren't handled correctly - List features = tracker.getValues(Feature.class, variants.getName()); - if ( features.size() > 0 && features.get(0) instanceof HapMapFeature ) { - ArrayList hapmapVCs = new ArrayList(features.size()); - for ( Object feature : features ) { - HapMapFeature hapmap = (HapMapFeature)feature; - Byte refBase = null; - // if it's an indel, we need to figure out the alleles - if ( hapmap.getAlleles()[0].equals("-") ) { - Map alleleMap = new HashMap(2); + List features = tracker.getValues(variants, ref.getLocus()); + List VCs = new ArrayList(features.size()); - // get the dbsnp object corresponding to this record, so we can learn whether this is an insertion or deletion - DbSNPFeature dbsnp = getDbsnpFeature(hapmap.getName()); - if ( dbsnp == null || dbsnp.getVariantType().equalsIgnoreCase("mixed") ) - continue; + for ( Feature record : features ) { + if ( VariantContextAdaptors.canBeConvertedToVariantContext(record) ) { + // we need to special case the HapMap format because indels aren't handled correctly + if ( record instanceof HapMapFeature) { - boolean isInsertion = dbsnp.getVariantType().equalsIgnoreCase("insertion"); + // is it an indel? + HapMapFeature hapmap = (HapMapFeature)record; + if ( hapmap.getAlleles()[0].equals(HapMapFeature.NULL_ALLELE_STRING) || hapmap.getAlleles()[1].equals(HapMapFeature.NULL_ALLELE_STRING) ) { + // get the dbsnp object corresponding to this record (needed to help us distinguish between insertions and deletions) + VariantContext dbsnpVC = getDbsnp(hapmap.getName()); + if ( dbsnpVC == null || dbsnpVC.isMixed() ) + continue; - alleleMap.put(HapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, isInsertion)); - alleleMap.put(HapMapFeature.INSERTION, Allele.create(hapmap.getAlleles()[1], !isInsertion)); - hapmap.setActualAlleles(alleleMap); + Map alleleMap = new HashMap(2); + alleleMap.put(HapMapFeature.DELETION, Allele.create(Allele.NULL_ALLELE_STRING, dbsnpVC.isInsertion())); + alleleMap.put(HapMapFeature.INSERTION, Allele.create(((HapMapFeature)record).getAlleles()[1], !dbsnpVC.isInsertion())); + hapmap.setActualAlleles(alleleMap); - // also, use the correct positioning for insertions - if ( isInsertion ) - hapmap.updatePosition(dbsnp.getStart()); - else - hapmap.updatePosition(dbsnp.getStart() - 1); + // also, use the correct positioning for insertions + hapmap.updatePosition(dbsnpVC.getStart()); - if ( hapmap.getStart() < ref.getWindow().getStart() ) { - logger.warn("Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping..."); - continue; + if ( hapmap.getStart() < ref.getWindow().getStart() ) { + logger.warn("Hapmap record at " + ref.getLocus() + " represents an indel too large to be converted; skipping..."); + continue; + } } - refBase = ref.getBases()[hapmap.getStart() - ref.getWindow().getStart()]; - } - VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), hapmap, ref); - if ( vc != null ) { - if ( refBase != null ) { - // TODO -- fix me - //Map attrs = new HashMap(vc.getAttributes()); - //attrs.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, refBase); - //vc = VariantContext.modifyAttributes(vc, attrs); - } - hapmapVCs.add(vc); } + + // ok, we might actually be able to turn this record in a variant context + VariantContext vc = VariantContextAdaptors.toVariantContext(variants.getName(), record, ref); + + if ( vc != null ) // sometimes the track has odd stuff in it that can't be converted + VCs.add(vc); } - return hapmapVCs; } - // for everything else, we can just convert to VariantContext - return tracker.getValues(variants, ref.getLocus()); + return VCs; } - private DbSNPFeature getDbsnpFeature(String rsID) { + private VariantContext getDbsnp(String rsID) { if ( dbsnpIterator == null ) { - ReferenceOrderedDataSource dbsnpDataSource = null; - for ( ReferenceOrderedDataSource ds : getToolkit().getRodDataSources() ) { - if ( ds.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { - dbsnpDataSource = ds; - break; - } - } - if ( dbsnpDataSource == null ) + if ( dbsnp == null ) throw new UserException.BadInput("No dbSNP rod was provided, but one is needed to decipher the correct indel alleles from the HapMap records"); RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe); - dbsnpIterator = builder.createInstanceOfTrack(DbSNPCodec.class, dbsnpDataSource.getFile()).getIterator(); + dbsnpIterator = builder.createInstanceOfTrack(VCFCodec.class, new File(dbsnp.getSource())).getIterator(); // Note that we should really use some sort of seekable iterator here so that the search doesn't take forever // (but it's complicated because the hapmap location doesn't match the dbsnp location, so we don't know where to seek to) } while ( dbsnpIterator.hasNext() ) { GATKFeature feature = dbsnpIterator.next(); - DbSNPFeature dbsnp = (DbSNPFeature)feature.getUnderlyingObject(); - if ( dbsnp.getRsID().equals(rsID) ) - return dbsnp; + VariantContext vc = (VariantContext)feature.getUnderlyingObject(); + if ( vc.hasID() && vc.getID().equals(rsID) ) + return vc; } return null; } - private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, byte ref) { + private void writeRecord(VariantContext vc, RefMetaDataTracker tracker, GenomeLoc loc) { if ( !wroteHeader ) { wroteHeader = true; // setup the header fields Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName()))); //hInfo.add(new VCFHeaderLine("source", "VariantsToVCF")); //hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName())); @@ -232,13 +210,13 @@ public class VariantsToVCF extends RodWalker { samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); if ( samples.isEmpty() ) { - List rods = tracker.getValues(Feature.class, variants.getName()); - if ( rods.size() == 0 ) - throw new IllegalStateException("No rod data is present"); + List features = tracker.getValues(variants, loc); + if ( features.size() == 0 ) + throw new IllegalStateException("No rod data is present, but we just created a VariantContext"); - Object rod = rods.get(0); - if ( rod instanceof HapMapFeature) - samples.addAll(Arrays.asList(((HapMapFeature)rod).getSampleIDs())); + Feature f = features.get(0); + if ( f instanceof HapMapFeature ) + samples.addAll(Arrays.asList(((HapMapFeature)f).getSampleIDs())); else samples.addAll(vc.getSampleNames()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/HapMapFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/HapMapFeature.java index 7a47a4b8d..6a10d0203 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/HapMapFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/HapMapFeature.java @@ -37,6 +37,7 @@ import java.util.Map; */ public class HapMapFeature implements Feature { + public static final String NULL_ALLELE_STRING = "-"; public static final String INSERTION = "I"; public static final String DELETION = "D"; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index fff1961c6..23478cc2b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -396,6 +396,10 @@ public class VariantContext implements Feature { // to enable tribble intergrati return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes, vc.getReferenceBaseForIndel(), true); } + public static VariantContext modifyReferencePadding(VariantContext vc, Byte b) { + return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes(), b, true); + } + public static VariantContext modifyPErrorFiltersAndAttributes(VariantContext vc, double negLog10PError, Set filters, Map attributes) { return new VariantContext(vc.getSource(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.genotypes, negLog10PError, filters, attributes, vc.getReferenceBaseForIndel(), true); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index 51859df53..f65ba2cf0 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -15,73 +15,90 @@ import java.util.ArrayList; * test(s) for the VariantsToVCF walker. */ public class VariantsToVCFIntegrationTest extends WalkerTest { - // TODO -- eric, fix me -// @Test -// public void testVariantsToVCFUsingGeliInput() { -// List md5 = new ArrayList(); -// md5.add("4accae035d271b35ee2ec58f403c68c6"); -// -// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( -// "-R " + b36KGReference + -// " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + -// " -T VariantsToVCF" + -// " -L 1:10,000,000-11,000,000" + -// " -sample NA123AB" + -// " -o %s" + -// " -NO_HEADER", -// 1, // just one output file -// md5); -// executeTest("testVariantsToVCFUsingGeliInput #1", spec).getFirst(); -// } -// -// @Test -// public void testGenotypesToVCFUsingGeliInput() { -// List md5 = new ArrayList(); -// md5.add("71e8c98d7c3a73b6287ecc339086fe03"); -// -// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( -// "-R " + b36KGReference + -// " -B:variant,GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" + -// " -T VariantsToVCF" + -// " -L 1:10,000,000-11,000,000" + -// " -sample NA123AB" + -// " -o %s" + -// " -NO_HEADER", -// 1, // just one output file -// md5); -// executeTest("testVariantsToVCFUsingGeliInput #2", spec).getFirst(); -// } -// -// @Test -// public void testGenotypesToVCFUsingHapMapInput() { -// List md5 = new ArrayList(); -// md5.add("f343085305e80c7a2493422e4eaad983"); -// -// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( -// "-R " + b36KGReference + -// " -B:variant,HapMap " + validationDataLocation + "rawHapMap.yri.chr1.txt" + -// " -T VariantsToVCF" + -// " -L 1:1-1,000,000" + -// " -o %s" + -// " -NO_HEADER", -// 1, // just one output file -// md5); -// executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst(); -// } -// -// @Test -// public void testGenotypesToVCFUsingVCFInput() { -// List md5 = new ArrayList(); -// md5.add("86f02e2e764ba35854cff2aa05a1fdd8"); -// -// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( -// "-R " + b36KGReference + -// " -B:variant,VCF " + validationDataLocation + "complexExample.vcf4" + -// " -T VariantsToVCF" + -// " -o %s" + -// " -NO_HEADER", -// 1, // just one output file -// md5); -// executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst(); -// } + + @Test + public void testVariantsToVCFUsingDbsnpInput() { + List md5 = new ArrayList(); + md5.add("d64942fed2a5b7b407f9537dd2b4832e"); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " --variant:dbsnp " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -T VariantsToVCF" + + " -L 1:1-30,000,000" + + " -o %s" + + " -NO_HEADER", + 1, // just one output file + md5); + executeTest("testVariantsToVCFUsingDbsnpInput", spec).getFirst(); + } + + @Test + public void testVariantsToVCFUsingGeliInput() { + List md5 = new ArrayList(); + md5.add("4accae035d271b35ee2ec58f403c68c6"); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " --variant:GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.variants.geli.calls" + + " -T VariantsToVCF" + + " -L 1:10,000,000-11,000,000" + + " -sample NA123AB" + + " -o %s" + + " -NO_HEADER", + 1, // just one output file + md5); + executeTest("testVariantsToVCFUsingGeliInput - calls", spec).getFirst(); + } + + @Test + public void testGenotypesToVCFUsingGeliInput() { + List md5 = new ArrayList(); + md5.add("2413f036ec4100b8d5db179946159a82"); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " --variant:GeliText " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.lod5.genotypes.geli.calls" + + " -T VariantsToVCF" + + " -L 1:10,100,000-10,200,000" + + " -sample NA123AB" + + " -o %s" + + " -NO_HEADER", + 1, // just one output file + md5); + executeTest("testVariantsToVCFUsingGeliInput - genotypes", spec).getFirst(); + } + + @Test + public void testGenotypesToVCFUsingHapMapInput() { + List md5 = new ArrayList(); + md5.add("f343085305e80c7a2493422e4eaad983"); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " --variant:HapMap " + validationDataLocation + "rawHapMap.yri.chr1.txt" + + " -T VariantsToVCF" + + " -L 1:1-1,000,000" + + " -o %s" + + " -NO_HEADER", + 1, // just one output file + md5); + executeTest("testVariantsToVCFUsingHapMapInput", spec).getFirst(); + } + + @Test + public void testGenotypesToVCFUsingVCFInput() { + List md5 = new ArrayList(); + md5.add("86f02e2e764ba35854cff2aa05a1fdd8"); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " --variant:VCF " + validationDataLocation + "complexExample.vcf4" + + " -T VariantsToVCF" + + " -o %s" + + " -NO_HEADER", + 1, // just one output file + md5); + executeTest("testVariantsToVCFUsingVCFInput", spec).getFirst(); + } } From 1e490e0dec3c4bcc154b5589ba587c8ebbe34771 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 09:26:06 -0400 Subject: [PATCH 156/186] Bringing up to speed with new syntax --- .../walkers/genotyper/UnifiedGenotyperPerformanceTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java index d271d78b1..4405f61d7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java @@ -15,7 +15,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-50,000,000" + - " -B:dbsnp,VCF " + b36dbSNP129 + + " -dbsnp,VCF " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); @@ -30,7 +30,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + - " -B:dbsnp,vcf " + b36dbSNP129 + + " -dbsnp,vcf " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); @@ -46,7 +46,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -L chr1:1-50,000,000" + " -nt 10" + - " -B:dbsnp,vcf " + b36dbSNP129 + + " -dbsnp,vcf " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); From 7afb5c9f1c33b42e98df83ffcdcdcfdd11c7483e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 10:11:37 -0400 Subject: [PATCH 157/186] More updates to be consistent with the new rod syntax. --- .../fasta/FastaAlternateReferenceWalker.java | 6 ++++++ .../FastaAlternateReferenceIntegrationTest.java | 13 ++++++------- .../genotyper/UnifiedGenotyperPerformanceTest.java | 6 +++--- .../indels/IndelRealignerPerformanceTest.java | 4 ++-- .../variantutils/VariantsToVCFIntegrationTest.java | 2 +- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 1f214fa62..a509fec0f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -35,6 +35,8 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.util.List; + /** * Generates an alternative reference sequence over the specified interval. Given variant ROD tracks, @@ -45,6 +47,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Reference(window=@Window(start=-1,stop=50)) @Requires(value={DataSource.REFERENCE}) public class FastaAlternateReferenceWalker extends FastaReferenceWalker { + + @Input(fullName = "variants", shortName = "V", doc="variants to model", required=false) + public List> variants; + @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false) public RodBinding snpmask = RodBinding.makeUnbound(VariantContext.class); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index be2f3cdaa..9af39e92c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -24,16 +24,15 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { executeTest("testFastaReference", spec1b); WalkerTestSpec spec2 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", 1, Arrays.asList("0567b32ebdc26604ddf2a390de4579ac")); executeTest("testFastaAlternateReferenceIndels", spec2); - // TODO : Eric, update with new DBSNP -// WalkerTestSpec spec3 = new WalkerTestSpec( -// "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -B:snps,GeliText " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.geli.calls -B:snpmask,dbsnp " + GATKDataLocation + "dbsnp_129_b36.rod -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s", -// 1, -// Arrays.asList("82705a88f6fc25880dd2331183531d9a")); -// executeTest("testFastaAlternateReferenceSnps", spec3); + WalkerTestSpec spec3 = new WalkerTestSpec( + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500;1:10,029,200-10,029,500 -o %s", + 1, + Arrays.asList("8b6cd2e20c381f9819aab2d270f5e641")); + executeTest("testFastaAlternateReferenceSnps", spec3); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java index 4405f61d7..86f8b14f1 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java @@ -15,7 +15,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-50,000,000" + - " -dbsnp,VCF " + b36dbSNP129 + + " -dbsnp:VCF " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); @@ -30,7 +30,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + - " -dbsnp,vcf " + b36dbSNP129 + + " -dbsnp:vcf " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); @@ -46,7 +46,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -L chr1:1-50,000,000" + " -nt 10" + - " -dbsnp,vcf " + b36dbSNP129 + + " -dbsnp:vcf " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java index e8b5033cf..200b9b5a7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java @@ -30,7 +30,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest { " -LOD 5" + " -maxConsensuses 100" + " -greedy 100" + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -dbsnp:vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -o /dev/null" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-5,650,000" + @@ -45,7 +45,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest { " -LOD 5" + " -maxConsensuses 100" + " -greedy 100" + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -dbsnp:vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -o /dev/null" + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + " -L chr1:1-150,000,000" + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java index f65ba2cf0..df247aed5 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCFIntegrationTest.java @@ -23,7 +23,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --variant:dbsnp " + GATKDataLocation + "dbsnp_129_b36.rod" + + " --variant:dbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" + " -T VariantsToVCF" + " -L 1:1-30,000,000" + " -o %s" + From ec76bf6d4ac1e8887bfafc461cbb3bd538f4ece5 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 11:24:48 -0400 Subject: [PATCH 158/186] VCF headers now include 'contig' lines describing the name, length, and assembly (when easily parsable) for each contig in the reference. --- .../sting/gatk/io/stubs/VCFWriterStub.java | 45 ++++++++++++++++--- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index 7a110fde5..6ade8e78c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; @@ -177,14 +178,21 @@ public class VCFWriterStub implements Stub, VCFWriter { vcfHeader = header; // Check for the command-line argument header line. If not present, add it in. - VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); - boolean foundCommandLineHeaderLine = false; - for(VCFHeaderLine line: vcfHeader.getMetaData()) { - if(line.getKey().equals(commandLineArgHeaderLine.getKey())) - foundCommandLineHeaderLine = true; + if ( !skipWritingHeader ) { + VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); + boolean foundCommandLineHeaderLine = false; + for (VCFHeaderLine line: vcfHeader.getMetaData()) { + if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) ) + foundCommandLineHeaderLine = true; + } + if ( !foundCommandLineHeaderLine ) + vcfHeader.addMetaDataLine(commandLineArgHeaderLine); + + // also put in the reference contig header lines + String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName()); + for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() ) + vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly)); } - if(!foundCommandLineHeaderLine && !skipWritingHeader) - vcfHeader.addMetaDataLine(commandLineArgHeaderLine); outputTracker.getStorage(this).writeHeader(vcfHeader); } @@ -220,4 +228,27 @@ public class VCFWriterStub implements Stub, VCFWriter { CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class); return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\""); } + + private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) { + String val; + if ( assembly != null ) + val = String.format("", contig.getSequenceName(), contig.getSequenceLength(), assembly); + else + val = String.format("", contig.getSequenceName(), contig.getSequenceLength()); + return new VCFHeaderLine("contig", val); + } + + private String getReferenceAssembly(String refPath) { + // This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot + String assembly = null; + if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 ) + assembly = "b37"; + else if ( refPath.indexOf("b36") != -1 ) + assembly = "b36"; + else if ( refPath.indexOf("hg18") != -1 ) + assembly = "hg18"; + else if ( refPath.indexOf("hg19") != -1 ) + assembly = "hg19"; + return assembly; + } } \ No newline at end of file From 78aa6db076b69a1ad0ebec9bb11b2c2337322c53 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 11:45:54 -0400 Subject: [PATCH 159/186] added the 'reference' header line too. We are now header-compliant for vcf4.1. --- .../org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index 6ade8e78c..936243f9d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -192,6 +192,8 @@ public class VCFWriterStub implements Stub, VCFWriter { String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName()); for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() ) vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly)); + + vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath())); } outputTracker.getStorage(this).writeHeader(vcfHeader); From b20c4d5286218ebccca790faa7a908235b3b9558 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 12:04:55 -0400 Subject: [PATCH 160/186] Thanks to Mark for agreeing to transition from 'variants' back to 'variant'. I think I got them all but I've been jumping all around the code, so there might be a straggler or two. --- .../walkers/annotator/VariantAnnotator.java | 3 +- .../beagle/ProduceBeagleInputWalker.java | 2 +- .../fasta/FastaAlternateReferenceWalker.java | 2 +- .../filters/VariantFiltrationWalker.java | 3 +- .../phasing/ReadBackedPhasingWalker.java | 2 +- .../walkers/variantutils/CombineVariants.java | 2 +- .../variantutils/LeftAlignVariants.java | 2 +- .../variantutils/LiftoverVariants.java | 2 +- .../walkers/variantutils/SelectVariants.java | 2 +- .../variantutils/ValidateVariants.java | 3 +- .../walkers/variantutils/VariantsToTable.java | 3 +- .../gatk/EngineFeaturesIntegrationTest.java | 2 +- .../VariantAnnotatorIntegrationTest.java | 30 +++++++++---------- .../walkers/beagle/BeagleIntegrationTest.java | 8 ++--- .../VariantFiltrationIntegrationTest.java | 22 +++++++------- .../ReadBackedPhasingIntegrationTest.java | 2 +- .../DictionaryConsistencyIntegrationTest.java | 2 +- .../LeftAlignVariantsIntegrationTest.java | 2 +- .../LiftoverVariantsIntegrationTest.java | 6 ++-- .../SelectVariantsIntegrationTest.java | 8 ++--- .../VCFStreamingIntegrationTest.java | 4 +-- .../ValidateVariantsIntegrationTest.java | 2 +- .../VariantsToTableIntegrationTest.java | 2 +- .../utils/codecs/vcf/VCFIntegrationTest.java | 4 +-- 24 files changed, 62 insertions(+), 58 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index ef408fae3..ec7d51043 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -53,7 +53,8 @@ import java.util.*; @Reference(window=@Window(start=-50,stop=50)) @By(DataSource.REFERENCE) public class VariantAnnotator extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 5bca61873..88b976e7e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -52,7 +52,7 @@ import java.util.*; */ @Requires(value={}) public class ProduceBeagleInputWalker extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Input(fullName="validation", shortName = "validation", doc="Input VCF file", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index a509fec0f..93012ee10 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -48,7 +48,7 @@ import java.util.List; @Requires(value={DataSource.REFERENCE}) public class FastaAlternateReferenceWalker extends FastaReferenceWalker { - @Input(fullName = "variants", shortName = "V", doc="variants to model", required=false) + @Input(fullName = "variant", shortName = "V", doc="variants to model", required=false) public List> variants; @Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 0daabfa45..e0bd58aea 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -52,7 +52,8 @@ import java.util.*; @Requires(value={}) @Reference(window=@Window(start=-50,stop=50)) public class VariantFiltrationWalker extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Input(fullName="mask", doc="Input ROD mask", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 7df55b4cd..f81dec2ac 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -67,7 +67,7 @@ public class ReadBackedPhasingWalker extends RodWalker variants; @Output(doc = "File to which variants should be written", required = true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 7905c2c32..f24d7a211 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -64,7 +64,7 @@ public class CombineVariants extends RodWalker { * are techincally order dependent. It is strongly recommended to provide explicit names when * a rod priority list is provided. */ - @Input(fullName = "variants", shortName = "V", doc="The VCF files to merge together", required=true) + @Input(fullName = "variant", shortName = "V", doc="The VCF files to merge together", required=true) public List> variantsToMerge; @Output(doc="File to which variants should be written",required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index d8bcb252d..52b633cdf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -50,7 +50,7 @@ import java.util.*; @Reference(window=@Window(start=-200,stop=200)) @Requires(value={}) public class LeftAlignVariants extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which variants should be written",required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index f3f0085f9..7edb4d52c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -52,7 +52,7 @@ import java.util.*; */ @Requires(value={}) public class LiftoverVariants extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which variants should be written",required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index ce885efc0..7179956c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -61,7 +61,7 @@ public class SelectVariants extends RodWalker { * Variants from this file are sent through the filtering and modifying routines as directed * by the arguments to SelectVariants, and finally are emitted. */ - @Input(fullName="variants", shortName = "V", doc="Select variants from this VCF file", required=true) + @Input(fullName="variant", shortName = "V", doc="Select variants from this VCF file", required=true) public RodBinding variants; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index a0986143c..9f64e0ca4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -55,7 +55,8 @@ import java.util.Set; @Reference(window=@Window(start=0,stop=100)) @Requires(value={}) public class ValidateVariants extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; public enum ValidationType { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 8c1c33f1b..f999f25c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -47,7 +47,8 @@ import java.util.*; */ @Requires(value={}) public class VariantsToTable extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Output(doc="File to which results should be written",required=true) diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index 80a8d2fa4..5b5083ef3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -33,7 +33,7 @@ import org.testng.annotations.Test; */ public class EngineFeaturesIntegrationTest extends WalkerTest { private void testBadRODBindingInput(String type, String name, Class c) { - WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -L 1:1 --variants:variants," + type + " " + WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -L 1:1 --variant:variant," + type + " " + b37dbSNP132 + " -R " + b37KGReference + " -o %s", 1, c); executeTest(name, spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index cbfb3cd0b..492651635 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -14,7 +14,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test file has annotations, not asking for annotations, #1", spec); } @@ -22,7 +22,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("964f1016ec9a3c55333f62dd834c14d6")); executeTest("test file has annotations, not asking for annotations, #2", spec); } @@ -30,7 +30,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, Arrays.asList("8e7de435105499cd71ffc099e268a83e")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -38,7 +38,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("64b6804cb1e27826e3a47089349be581")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -46,7 +46,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, Arrays.asList("42ccee09fa9f8c58f4a0d4f1139c094f")); executeTest("test file doesn't have annotations, not asking for annotations, #1", spec); } @@ -54,7 +54,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("f2ddfa8105c290b1f34b7a261a02a1ac")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @@ -62,7 +62,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, Arrays.asList("fd1ffb669800c2e07df1e2719aa38e49")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -70,7 +70,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, + baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, Arrays.asList("09f8e840770a9411ff77508e0ed0837f")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } @@ -78,7 +78,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testOverwritingHeader() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" --variants:VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, + baseTestString() + " -G \"Standard\" --variant:VCF " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1, Arrays.asList("78d2c19f8107d865970dbaf3e12edd92")); executeTest("test overwriting header", spec); } @@ -86,7 +86,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, + baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, Arrays.asList("16e3a1403fc376320d7c69492cad9345")); executeTest("not passing it any reads", spec); } @@ -94,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:dbsnp,vcf " + b36dbSNP129 + " -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, + baseTestString() + " -B:dbsnp,vcf " + b36dbSNP129 + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d")); executeTest("getting DB tag with dbSNP", spec); } @@ -102,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, + baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688")); executeTest("getting DB tag with HM3", spec); } @@ -110,7 +110,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variants:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variants", 1, + baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variants", 1, Arrays.asList("e9c0d832dc6b4ed06c955060f830c140")); executeTest("using expression", spec); } @@ -120,7 +120,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -A HomopolymerRun --variants:VCF " + validationDataLocation + "/" + file + " -BTI variants -NO_HEADER", 1, + baseTestString() + " -A HomopolymerRun --variant:VCF " + validationDataLocation + "/" + file + " -BTI variants -NO_HEADER", 1, Arrays.asList(MD5)); executeTest("Testing lookup vcf tabix vs. vcf tribble", spec); } @@ -129,7 +129,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testSnpEffAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( - "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variants:VCF " + + "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant:VCF " + validationDataLocation + "1000G.exomes.vcf --snpEffFile:SnpEff " + validationDataLocation + "snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000", 1, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java index d6ff95539..5f759fdbf 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java @@ -37,7 +37,7 @@ public class BeagleIntegrationTest extends WalkerTest { public void testBeagleOutput() { WalkerTestSpec spec = new WalkerTestSpec( "-T BeagleOutputToVCF -R " + hg19Reference + " " + - "--variants:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " + + "--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " + "--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " + "--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " + "--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " + @@ -49,7 +49,7 @@ public class BeagleIntegrationTest extends WalkerTest { public void testBeagleInput() { WalkerTestSpec spec = new WalkerTestSpec( "-T ProduceBeagleInput -R " + hg19Reference + " " + - "--variants:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " + + "--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " + "-o %s", 1, Arrays.asList("a01c704246f3dd1b9c65774007e51e69")); executeTest("test BeagleInput", spec); } @@ -57,7 +57,7 @@ public class BeagleIntegrationTest extends WalkerTest { @Test public void testBeagleInput2() { WalkerTestSpec spec = new WalkerTestSpec( - "-T ProduceBeagleInput --variants:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+ + "-T ProduceBeagleInput --variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_HSQ_chr22_14-16m.vcf "+ "--validation:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/NA12878_OMNI_chr22_14-16m.vcf "+ "-L 22:14000000-16000000 -o %s -bvcf %s -bs 0.8 -valp 0.98 -R /humgen/1kg/reference/human_g1k_v37.fasta -NO_HEADER ",2, Arrays.asList("660986891b30cdc937e0f2a3a5743faa","e96ddd51da9f4a797b2aa8c20e404166")); @@ -68,7 +68,7 @@ public class BeagleIntegrationTest extends WalkerTest { public void testBeagleOutput2() { WalkerTestSpec spec = new WalkerTestSpec( "-T BeagleOutputToVCF -R "+hg19Reference+" "+ - "--variants:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.vcf "+ + "--variant:VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.vcf "+ "--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+ "--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+ "--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+ diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index f613407e2..1cb43ceb1 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -15,7 +15,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testNoAction() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("8a105fa5eebdfffe7326bc5b3d8ffd1c")); executeTest("test no action", spec); } @@ -23,7 +23,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -window 10 --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -window 10 --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("27b13f179bb4920615dff3a32730d845")); executeTest("test clustered SNPs", spec); } @@ -31,17 +31,17 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testMasks() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseTestString() + " -maskName foo --mask:VCF3 " + validationDataLocation + "vcfexample2.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -maskName foo --mask:VCF3 " + validationDataLocation + "vcfexample2.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("578f9e774784c25871678e6464fd212b")); executeTest("test mask all", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseTestString() + " -maskName foo --mask:VCF " + validationDataLocation + "vcfMask.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -maskName foo --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("bfa86a674aefca1b13d341cb14ab3c4f")); executeTest("test mask some", spec2); WalkerTestSpec spec3 = new WalkerTestSpec( - baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + validationDataLocation + "vcfMask.vcf --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -maskName foo -maskExtend 10 --mask:VCF " + validationDataLocation + "vcfMask.vcf --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("5939f80d14b32d88587373532d7b90e5")); executeTest("test mask extend", spec3); } @@ -49,7 +49,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("45219dbcfb6f81bba2ea0c35f5bfd368")); executeTest("test filter #1", spec); } @@ -57,7 +57,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("c95845e817da7352b9b72bc9794f18fb")); executeTest("test filter #2", spec); } @@ -65,7 +65,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testFilterWithSeparateNames() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("b8cdd7f44ff1a395e0a9b06a87e1e530")); executeTest("test filter with separate names #2", spec); } @@ -73,12 +73,12 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testGenotypeFilters() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("96b61e4543a73fe725e433f007260039")); executeTest("test genotype filter #1", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variants:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("6c8112ab17ce39c8022c891ae73bf38e")); executeTest("test genotype filter #2", spec2); } @@ -86,7 +86,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { @Test public void testDeletions() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variants:VCF " + validationDataLocation + "twoDeletions.vcf", 1, + baseTestString() + " --filterExpression 'QUAL < 100' --filterName foo --variant:VCF " + validationDataLocation + "twoDeletions.vcf", 1, Arrays.asList("569546fd798afa0e65c5b61b440d07ac")); executeTest("test deletions", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java index 3566ecd05..e1d22f107 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingIntegrationTest.java @@ -11,7 +11,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest { return "-T ReadBackedPhasing" + " -R " + reference + " -I " + validationDataLocation + reads + - " --variants " + validationDataLocation + VCF + + " --variant " + validationDataLocation + VCF + " --cacheWindowSize " + cacheWindowSize + " --maxPhaseSites " + maxPhaseSites + " --phaseQualityThresh " + phaseQualityThresh + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java index 1392f136a..4be848164 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/qc/DictionaryConsistencyIntegrationTest.java @@ -56,7 +56,7 @@ public class DictionaryConsistencyIntegrationTest extends WalkerTest { } private WalkerTest.WalkerTestSpec testVCF(String ref, String vcf, Class c) { - return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variants:vcf " + return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variant:vcf " + vcf + " -F POS,CHROM -R " + ref + " -o %s", 1, c); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java index 2f77a8f55..2139a53e7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariantsIntegrationTest.java @@ -38,7 +38,7 @@ public class LeftAlignVariantsIntegrationTest extends WalkerTest { @Test public void testLeftAlignment() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LeftAlignVariants -o %s -R " + b37KGReference + " --variants:vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER", + "-T LeftAlignVariants -o %s -R " + b37KGReference + " --variant:vcf " + validationDataLocation + "forLeftAlignVariantsTest.vcf -NO_HEADER", 1, Arrays.asList("158b1d71b28c52e2789f164500b53732")); executeTest("test left alignment", spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java index c3795f98e..d10bb4452 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariantsIntegrationTest.java @@ -38,7 +38,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { @Test public void testb36Tohg19() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LiftoverVariants -o %s -R " + b36KGReference + " --variants:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, Arrays.asList("70aeaca5b74cc7ba8e2da7b71ff0fbfd")); executeTest("test b36 to hg19", spec); @@ -47,7 +47,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { @Test public void testb36Tohg19UnsortedSamples() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LiftoverVariants -o %s -R " + b36KGReference + " --variants:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + "-T LiftoverVariants -o %s -R " + b36KGReference + " --variant:vcf3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.noheader.unsortedSamples.vcf -chain " + validationDataLocation + "b36ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, Arrays.asList("3fd7ec2dc4064ef410786276b0dc9d08")); executeTest("test b36 to hg19, unsorted samples", spec); @@ -56,7 +56,7 @@ public class LiftoverVariantsIntegrationTest extends WalkerTest { @Test public void testhg18Tohg19Unsorted() { WalkerTestSpec spec = new WalkerTestSpec( - "-T LiftoverVariants -o %s -R " + hg18Reference + " --variants:vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", + "-T LiftoverVariants -o %s -R " + hg18Reference + " --variant:vcf " + validationDataLocation + "liftover_test.vcf -chain " + validationDataLocation + "hg18ToHg19.broad.over.chain -dict /seq/references/Homo_sapiens_assembly19/v0/Homo_sapiens_assembly19.dict", 1, Arrays.asList("ab2c6254225d7e2ecf52eee604d5673b")); executeTest("test hg18 to hg19, unsorted", spec); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 7332c40cb..b2ac3f4a6 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -16,7 +16,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variants:VCF3 " + testfile + " -NO_HEADER"), + baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant:VCF3 " + testfile + " -NO_HEADER"), 1, Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") ); @@ -29,7 +29,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testfile = validationDataLocation + "test.dup.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -sn A -sn B -sn C --variants:VCF3 " + testfile + " -NO_HEADER"), + baseTestString(" -sn A -sn B -sn C --variant:VCF3 " + testfile + " -NO_HEADER"), 1, Arrays.asList("b74038779fe6485dbb8734ae48178356") ); @@ -42,7 +42,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variants:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e") ); @@ -55,7 +55,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variants:VCF " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant:VCF " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a") ); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java index 458233b09..ec3d1f580 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java @@ -56,7 +56,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants" + " -R " + b36KGReference + - " --variants:vcf3,storage=STREAM " + tmpFifo.getAbsolutePath() + + " --variant:vcf3,storage=STREAM " + tmpFifo.getAbsolutePath() + " --NO_HEADER" + " -o %s", 1, @@ -80,7 +80,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { WalkerTestSpec selectTestSpec = new WalkerTestSpec( "-T SelectVariants" + " -R " + b36KGReference + - " --variants:vcf3,storage=STREAM " + testFile + + " --variant:vcf3,storage=STREAM " + testFile + " --NO_HEADER" + " -select 'QD > 2.0'" + " -o " + tmpFifo.getAbsolutePath(), diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index 73a389ba6..4d5f0359d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -34,7 +34,7 @@ import java.util.Arrays; public class ValidateVariantsIntegrationTest extends WalkerTest { public static String baseTestString(String file, String type) { - return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variants:vcf " + validationDataLocation + file + " --validationType " + type; + return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + validationDataLocation + file + " --validationType " + type; } @Test diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java index 96cece3ca..19021c1c2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -35,7 +35,7 @@ import java.io.File; public class VariantsToTableIntegrationTest extends WalkerTest { private String variantsToTableCmd(String moreArgs) { return "-R " + hg18Reference + - " --variants:vcf " + validationDataLocation + "/soap_gatk_annotated.vcf" + + " --variant:vcf " + validationDataLocation + "/soap_gatk_annotated.vcf" + " -T VariantsToTable" + " -F CHROM -F POS -F ID -F REF -F ALT -F QUAL -F FILTER -F TRANSITION -F DP -F SB -F set -F RankSumP -F refseq.functionalClass*" + " -L chr1 -KMA -o %s" + moreArgs; diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index e758ce0a2..2ef116708 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -17,11 +17,11 @@ public class VCFIntegrationTest extends WalkerTest { String baseCommand = "-R " + b37KGReference + " -NO_HEADER -o %s "; - String test1 = baseCommand + "-T VariantAnnotator --variants " + testVCF + " -BTI variants"; + String test1 = baseCommand + "-T VariantAnnotator --variant " + testVCF + " -BTI variant"; WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(md5ofInputVCF)); List result = executeTest("Test Variant Annotator with no changes", spec1).getFirst(); - String test2 = baseCommand + "-T VariantsToVCF --variants " + result.get(0).getAbsolutePath(); + String test2 = baseCommand + "-T VariantsToVCF --variant " + result.get(0).getAbsolutePath(); WalkerTestSpec spec2 = new WalkerTestSpec(test2, 1, Arrays.asList(md5ofInputVCF)); executeTest("Test Variants To VCF from new output", spec2); } From 5a3c99b7b9508e277cd14bfa46ef4dbf2fa2134a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 12:30:46 -0400 Subject: [PATCH 162/186] Fixing 'variants' change in qscript --- .../sting/queue/extensions/gatk/VcfGatherFunction.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala index fcb3e690a..f8a6cb2cb 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala @@ -43,7 +43,7 @@ class VcfGatherFunction extends CombineVariants with GatherFunction { this.intervals = this.originalGATK.intervals this.intervalsString = this.originalGATK.intervalsString - this.variants = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) } + this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) } this.rod_priority_list = (0 until this.gatherParts.size).map("input"+_).mkString(",") this.out = this.originalOutput this.assumeIdenticalSamples = true From 489e5cffc17f7aa00584a1fa6329bcfc354f69c7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 14:29:15 -0400 Subject: [PATCH 163/186] Missed a few 'variants' --- .../gatk/walkers/beagle/BeagleOutputToVCFWalker.java | 2 +- .../annotator/VariantAnnotatorIntegrationTest.java | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 1c155e786..a34dfd080 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -54,7 +54,7 @@ import static java.lang.Math.log10; */ @Requires(value={}) public class BeagleOutputToVCFWalker extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 492651635..596ac5c36 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -86,7 +86,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testNoReads() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, + baseTestString() + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, Arrays.asList("16e3a1403fc376320d7c69492cad9345")); executeTest("not passing it any reads", spec); } @@ -94,7 +94,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithDbsnp() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:dbsnp,vcf " + b36dbSNP129 + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, + baseTestString() + " -B:dbsnp,vcf " + b36dbSNP129 + " -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d")); executeTest("getting DB tag with dbSNP", spec); } @@ -102,7 +102,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testDBTagWithHapMap() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variants", 1, + baseTestString() + " -B:compH3,VCF " + validationDataLocation + "fakeHM3.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1, Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688")); executeTest("getting DB tag with HM3", spec); } @@ -110,7 +110,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testUsingExpression() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variants", 1, + baseTestString() + " -B:foo,VCF " + validationDataLocation + "targetAnnotations.vcf -G \"Standard\" --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -BTI variant", 1, Arrays.asList("e9c0d832dc6b4ed06c955060f830c140")); executeTest("using expression", spec); } @@ -120,7 +120,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { final String MD5 = "13269d5a2e16f06fd755cc0fb9271acf"; for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString() + " -A HomopolymerRun --variant:VCF " + validationDataLocation + "/" + file + " -BTI variants -NO_HEADER", 1, + baseTestString() + " -A HomopolymerRun --variant:VCF " + validationDataLocation + "/" + file + " -BTI variant -NO_HEADER", 1, Arrays.asList(MD5)); executeTest("Testing lookup vcf tabix vs. vcf tribble", spec); } From bd1cf4c7bcde4f15f632b360a33ab9148739090b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 8 Aug 2011 15:09:12 -0400 Subject: [PATCH 164/186] Pacbio Pipeline Added the base quality "filling" step to allow the pipeline to handle raw pacbio BAM files. This is the first step towards a generic pacbio data processing pipeline. --- .../qscripts/RecalibrateBaseQualities.scala | 102 ++++++++++++++---- 1 file changed, 84 insertions(+), 18 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala index cbe53db8d..469325f6d 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala @@ -3,6 +3,8 @@ package org.broadinstitute.sting.queue.qscripts import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.util.QScriptUtils +import net.sf.samtools.SAMFileHeader.SortOrder +import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceReadGroups} /** * Created by IntelliJ IDEA. @@ -14,52 +16,116 @@ import org.broadinstitute.sting.queue.util.QScriptUtils class RecalibrateBaseQualities extends QScript { - @Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true) - var GATKjar: File = _ - - @Input(doc="input BAM file - or list of BAM files", shortName="i", required=true) + @Input(doc="input FASTA file, BAM file - or list of FASTA/BAM files. ", shortName="i", required=true) var input: File = _ @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true) var R: String = _ @Input(doc="Reference fasta file", shortName="R", required=true) - var reference: File = _ // new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + var reference: File = _ + + @Input(doc="dbsnp VCF file to use ", shortName="D", required=true) + var dbSNP: File = _ + + @Input(doc="Default base qualities. Overrides the file's original base qualities with given value. Must be used if the file does not have base qualities." , shortName = "dbq", required=false) + var dbq: Int = -1 + + @Input(doc="Number of jobs to scatter/gather. Default is the number of contigs in the dataset" , shortName = "sg", required=false) + var threads: Int = -1 + + @Input(doc="Sample Name" , shortName = "sn", required=false) + var sample: String = "" + + @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) + var bwaPath: File = _ + + + - @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=true) - var dbSNP: File = _ // new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") val queueLogDir: String = ".qlog/" var nContigs: Int = 0 + var ADD_BASE_QUALITIES = false def script = { - val bamList = QScriptUtils.createListFromFile(input) - nContigs = QScriptUtils.getNumberOfContigs(bamList(0)) + if (dbq >= 0) + ADD_BASE_QUALITIES = true - for (bam <- bamList) { + val fileList = QScriptUtils.createListFromFile(input) + nContigs = if (threads >= 0) {threads} else {QScriptUtils.getNumberOfContigs(fileList(0))} - val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv") - val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv") - val recalBam: File = swapExt(bam, ".bam", ".recal.bam") + for (file <- fileList) { + val qualBam: File = swapExt(file, ".bam", ".quals.bam") + val rgBam: File = if (ADD_BASE_QUALITIES) {swapExt(file, ".bam", ".rg.bam")} else {file} + val recalFile1: File = swapExt(file, ".bam", ".recal1.csv") + val recalFile2: File = swapExt(file, ".bam", ".recal2.csv") + val recalBam: File = swapExt(file, ".bam", ".recal.bam") val path1: String = recalBam + ".before" val path2: String = recalBam + ".after" - add(cov(bam, recalFile1), - recal(bam, recalFile1, recalBam), + + if (ADD_BASE_QUALITIES) { + add(addQuals(file, qualBam, dbq), + addReadGroup(qualBam, rgBam, sample)) + } + + add(cov(rgBam, recalFile1), + recal(rgBam, recalFile1, recalBam), cov(recalBam, recalFile2), analyzeCovariates(recalFile1, path1), analyzeCovariates(recalFile2, path2)) } } - trait CommandLineGATKArgs extends CommandLineGATK { - this.jarFile = GATKjar - this.reference_sequence = reference + + // General arguments to non-GATK tools + trait ExternalCommonArgs extends CommandLineFunction { this.memoryLimit = 4 this.isIntermediate = true } + trait CommandLineGATKArgs extends CommandLineGATK { + this.reference_sequence = reference + } + + + case class align(@Input inFastq: File, @Output outSam: File) extends ExternalCommonArgs { + def commandLine = bwaPath + " bwasw " + reference + " " + inFastq + " > " + outSam + this.analysisName = queueLogDir + outSam + ".bwa_sam_se" + this.jobName = queueLogDir + outSam + ".bwa_sam_se" + } + + case class sortSam (@Input inSam: File, @Output outBam: File) extends SortSam with ExternalCommonArgs { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") + this.input = List(inSam) + this.output = outBam + this.sortOrder = SortOrder.coordinate + this.analysisName = queueLogDir + outBam + ".sortSam" + this.jobName = queueLogDir + outBam + ".sortSam" + } + + case class addQuals(inBam: File, outBam: File, qual: Int) extends PrintReads with CommandLineGATKArgs { + this.input_file :+= inBam + this.out = outBam + this.DBQ = qual + } + + case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups { + @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") + this.input = List(inBam) + this.output = outBam + this.RGID = "1" + this.RGCN = "BI" + this.RGPL = "PacBio_RS" + this.RGSM = sample + this.RGLB = "default_library" + this.RGPU = "default_pu" + this.analysisName = queueLogDir + outBam + ".rg" + this.jobName = queueLogDir + outBam + ".rg" + } + case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") From 22d25638234c77d33080517768a3066044143c81 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 8 Aug 2011 16:02:28 -0400 Subject: [PATCH 165/186] added BWA SW alignment The pipeline now accepts fasta/fastq files and aligns them using BWA SW, adds default basequalities, creates read groups and performs BQSR. --- .../qscripts/RecalibrateBaseQualities.scala | 76 ++++++++++++------- .../sting/queue/util/QScriptUtils.scala | 10 +-- 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala index 469325f6d..75e8c8325 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala @@ -5,6 +5,8 @@ import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.util.QScriptUtils import net.sf.samtools.SAMFileHeader.SortOrder import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceReadGroups} +import org.broadinstitute.sting.utils.exceptions.UserException +import org.broadinstitute.sting.commandline.Hidden /** * Created by IntelliJ IDEA. @@ -16,7 +18,7 @@ import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceRe class RecalibrateBaseQualities extends QScript { - @Input(doc="input FASTA file, BAM file - or list of FASTA/BAM files. ", shortName="i", required=true) + @Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true) var input: File = _ @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true) @@ -28,51 +30,67 @@ class RecalibrateBaseQualities extends QScript { @Input(doc="dbsnp VCF file to use ", shortName="D", required=true) var dbSNP: File = _ - @Input(doc="Default base qualities. Overrides the file's original base qualities with given value. Must be used if the file does not have base qualities." , shortName = "dbq", required=false) - var dbq: Int = -1 + @Input(doc="Number of jobs to scatter/gather. Default: 0." , shortName = "sg", required=false) + var threads: Int = 0 - @Input(doc="Number of jobs to scatter/gather. Default is the number of contigs in the dataset" , shortName = "sg", required=false) - var threads: Int = -1 + @Input(doc="Sample Name to fill in the Read Group information (only necessary if using fasta/fastq)" , shortName = "sn", required=false) + var sample: String = "NA" - @Input(doc="Sample Name" , shortName = "sn", required=false) - var sample: String = "" - - @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) + @Input(doc="The path to the binary of bwa to align fasta/fastq files", fullName="path_to_bwa", shortName="bwa", required=false) var bwaPath: File = _ + @Hidden + @Input(doc="The default base qualities to use before recalibration. Default is Q20 (should be good for every dataset)." , shortName = "dbq", required=false) + var dbq: Int = 20 + val queueLogDir: String = ".qlog/" - var nContigs: Int = 0 - var ADD_BASE_QUALITIES = false def script = { - if (dbq >= 0) - ADD_BASE_QUALITIES = true + val fileList: List[File] = QScriptUtils.createListFromFile(input) - val fileList = QScriptUtils.createListFromFile(input) - nContigs = if (threads >= 0) {threads} else {QScriptUtils.getNumberOfContigs(fileList(0))} + for (file: File <- fileList) { - for (file <- fileList) { - val qualBam: File = swapExt(file, ".bam", ".quals.bam") - val rgBam: File = if (ADD_BASE_QUALITIES) {swapExt(file, ".bam", ".rg.bam")} else {file} - val recalFile1: File = swapExt(file, ".bam", ".recal1.csv") - val recalFile2: File = swapExt(file, ".bam", ".recal2.csv") - val recalBam: File = swapExt(file, ".bam", ".recal.bam") + var USE_BWA: Boolean = false + + println("DEBUG: processing " + file + "\nDEBUG: name -- " + file.getName) + + if (file.endsWith(".fasta") || file.endsWith(".fq")) { + if (bwaPath == null) { + throw new UserException("You provided a fasta/fastq file but didn't provide the path for BWA"); + } + USE_BWA = true + } + + // FASTA -> BAM steps + val alignedSam: File = file.getName + ".aligned.sam" + val sortedBam: File = swapExt(alignedSam, ".sam", ".bam") + val qualBam: File = swapExt(sortedBam, ".bam", ".q.bam") + val rgBam: File = swapExt(file, ".bam", ".rg.bam") + + val bamBase = if (USE_BWA) {rgBam} else {file} + + // BAM Steps + val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.csv") + val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.csv") + val recalBam: File = swapExt(bamBase, ".bam", ".recal.bam") val path1: String = recalBam + ".before" val path2: String = recalBam + ".after" - if (ADD_BASE_QUALITIES) { - add(addQuals(file, qualBam, dbq), + if (USE_BWA) { + add(align(file, alignedSam), + sortSam(alignedSam, sortedBam), + addQuals(sortedBam, qualBam, dbq), addReadGroup(qualBam, rgBam, sample)) } - add(cov(rgBam, recalFile1), - recal(rgBam, recalFile1, recalBam), + add(cov(bamBase, recalFile1), + recal(bamBase, recalFile1, recalBam), cov(recalBam, recalFile2), analyzeCovariates(recalFile1, path1), analyzeCovariates(recalFile2, path2)) @@ -86,7 +104,7 @@ class RecalibrateBaseQualities extends QScript { this.isIntermediate = true } - trait CommandLineGATKArgs extends CommandLineGATK { + trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs { this.reference_sequence = reference } @@ -112,7 +130,7 @@ class RecalibrateBaseQualities extends QScript { this.DBQ = qual } - case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups { + case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups with ExternalCommonArgs { @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = List(inBam) this.output = outBam @@ -133,7 +151,7 @@ class RecalibrateBaseQualities extends QScript { this.recal_file = outRecalFile this.analysisName = queueLogDir + outRecalFile + ".covariates" this.jobName = queueLogDir + outRecalFile + ".covariates" - this.scatterCount = nContigs + this.scatterCount = threads } case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs { @@ -143,7 +161,7 @@ class RecalibrateBaseQualities extends QScript { this.isIntermediate = false this.analysisName = queueLogDir + outBam + ".recalibration" this.jobName = queueLogDir + outBam + ".recalibration" - this.scatterCount = nContigs + this.scatterCount = threads } case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 99aaa9474..12bd880d8 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -22,15 +22,15 @@ object QScriptUtils { * to have empty lines and comment lines (lines starting with #). */ def createListFromFile(in: File):List[File] = { - // If the file provided ends with .bam, it is not a bam list, we treat it as a single file. + // If the file provided ends with .bam, .fasta or .fq, it is not a bam list, we treat it as a single file. // and return a list with only this file. - if (in.toString.endsWith(".bam")) + if (in.toString.endsWith(".bam") || in.toString.endsWith(".fasta") || in.toString.endsWith(".fq")) return List(in) var list: List[File] = List() - for (bam <- fromFile(in).getLines) - if (!bam.startsWith("#") && !bam.isEmpty ) - list :+= new File(bam.trim()) + for (file <- fromFile(in).getLines) + if (!file.startsWith("#") && !file.isEmpty ) + list :+= new File(file.trim()) list.sortWith(_.compareTo(_) < 0) } From 481630da00d2060290defb6e7d0ca6fb9a93cb40 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 9 Aug 2011 11:10:29 -0400 Subject: [PATCH 166/186] BWA parameters added --- .../sting/queue/qscripts/RecalibrateBaseQualities.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala index 75e8c8325..9f3dd9a2c 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala @@ -110,7 +110,7 @@ class RecalibrateBaseQualities extends QScript { case class align(@Input inFastq: File, @Output outSam: File) extends ExternalCommonArgs { - def commandLine = bwaPath + " bwasw " + reference + " " + inFastq + " > " + outSam + def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z10 -t8 " + reference + " " + inFastq + " > " + outSam this.analysisName = queueLogDir + outSam + ".bwa_sam_se" this.jobName = queueLogDir + outSam + ".bwa_sam_se" } From 86afe878a7ccfff5878432e8795130ba40068f31 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 9 Aug 2011 20:55:15 -0400 Subject: [PATCH 170/186] ReducedRead optimization: single pass likelihood calculation -- Low level add() now takes a nObs argument and rather than += likelihood now does += nObs * likelihood --- .../DiploidSNPGenotypeLikelihoods.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index 2014801e4..5f6865d04 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -276,13 +276,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { if ( elt.isReducedRead() ) { // reduced read representation byte qual = elt.getReducedQual(); - for ( int i = 0; i < elt.getReducedCount(); i++ ) { - add(obsBase, qual, (byte)0, (byte)0); - } - return elt.getQual(); + add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods + return elt.getReducedCount(); // we added nObs bases here } else { byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); - return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0) : 0; + return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0; } } @@ -309,9 +307,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { * @param qual1 * @param obsBase2 * @param qual2 can be 0, indicating no second base was observed for this fragment + * @param nObs The number of times this quad of values was seen. Generally 1, but reduced reads + * can have nObs > 1 for synthetic reads * @return */ - private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2) { + private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2, int nObs) { // TODO-- Right now we assume that there are at most 2 reads per fragment. This assumption is fine // TODO-- given the current state of next-gen sequencing, but may need to be fixed in the future. // TODO-- However, when that happens, we'll need to be a lot smarter about the caching we do here. @@ -332,19 +332,17 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { for ( DiploidGenotype g : DiploidGenotype.values() ) { double likelihood = likelihoods[g.ordinal()]; - - //if ( VERBOSE ) { - // System.out.printf(" L(%c | G=%s, Q=%d, S=%s) = %f / %f%n", - // observedBase, g, qualityScore, pow(10,likelihood) * 100, likelihood); - //} - - log10Likelihoods[g.ordinal()] += likelihood; - log10Posteriors[g.ordinal()] += likelihood; + log10Likelihoods[g.ordinal()] += likelihood * nObs; + log10Posteriors[g.ordinal()] += likelihood * nObs; } return 1; } + private int add(byte obsBase1, byte qual1, byte obsBase2, byte qual2) { + return add(obsBase1, qual1, obsBase2, qual2, 1); + } + // ------------------------------------------------------------------------------------- // // Dealing with the cache routines From 08631546c8f3593e06dcc32e5c2e1f5fbb60f730 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 23:19:40 -0400 Subject: [PATCH 171/186] Partial commit for David so he can see what I want to do with the VariantAnnotator. Added a DbsnpArgumentCollection that people can use in their walkers to ensure that we have a standard syntax whenever allowing dbsnp rods. Added it to UG, but didn't hook it up. Maybe we should do the same for the 'variant' rod? --- .../arguments/DbsnpArgumentCollection.java | 48 +++++++++++++++++++ .../walkers/annotator/VariantAnnotator.java | 19 ++++++++ .../walkers/genotyper/UnifiedGenotyper.java | 8 ++++ 3 files changed, 75 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java new file mode 100644 index 000000000..b77b175bc --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.arguments; + + +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.simpleframework.xml.*; + +/** + * @author ebanks + * @version 1.0 + */ +@Root +public class DbsnpArgumentCollection { + + /** + * A dbSNP VCF file. + */ + @Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false) + public RodBinding dbsnp = RodBinding.makeUnbound(VariantContext.class); + +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index ec7d51043..594216d53 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -34,6 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -60,6 +62,23 @@ public class VariantAnnotator extends RodWalker { @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) public RodBinding snpEffFile; + /** + * A dbSNP VCF file from which to annotate. + * + * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. + */ + @ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + + /** + * A comparisons VCF file from which to annotate. + * + * If a record in the 'variant' track overlaps with a record from the provided comp track, the INFO field will be annotated + * as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field). Records that are filtered in the comp track will be ignored. + * Note that 'dbSNP' has been special-cased (see the --dbsnp argument). + */ + @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false) + public RodBinding comps = RodBinding.makeUnbound(VariantContext.class); + @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 812511322..6836b14fc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.DownsampleType; +import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -59,6 +60,13 @@ public class UnifiedGenotyper extends LocusWalker Date: Tue, 9 Aug 2011 23:21:17 -0400 Subject: [PATCH 172/186] Bringing Indel Realigner up to speed with the new rod binding syntax; now use -known to specify the known indels track. --- .../sting/gatk/walkers/indels/IndelRealigner.java | 12 ++++++------ .../indels/IndelRealignerIntegrationTest.java | 6 +++--- .../indels/IndelRealignerPerformanceTest.java | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 61f21c488..cb7627888 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -30,10 +30,7 @@ import net.sf.samtools.*; import net.sf.samtools.util.RuntimeIOException; import net.sf.samtools.util.SequenceUtil; import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -86,6 +83,9 @@ public class IndelRealigner extends ReadWalker { USE_SW } + @Input(fullName="known", shortName = "known", doc="Input VCF file with known indels", required=false) + public RodBinding known = RodBinding.makeUnbound(VariantContext.class); + @Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true) protected String intervalsFile = null; @@ -558,8 +558,8 @@ public class IndelRealigner extends ReadWalker { if ( indelRodsSeen.contains(rod) ) continue; indelRodsSeen.add(rod); - if ( VariantContextAdaptors.canBeConvertedToVariantContext(rod)) - knownIndelsToTry.add(VariantContextAdaptors.toVariantContext("", rod, ref)); + if ( rod instanceof VariantContext ) + knownIndelsToTry.add((VariantContext)rod); } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 19dc99682..28e5a15de 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -28,7 +28,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { executeTest("test realigner defaults", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseCommand + "-B:indels,vcf " + knownIndels, + baseCommand + "-known " + knownIndels, 1, Arrays.asList(base_md5_with_SW_or_VCF)); executeTest("test realigner defaults with VCF", spec2); @@ -37,7 +37,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { @Test public void testKnownsOnly() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -B:indels,vcf " + knownIndels, + baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels, 1, Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888")); executeTest("realigner known indels only from VCF", spec1); @@ -46,7 +46,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { @Test public void testUseSW() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseCommand + "--consensusDeterminationModel USE_SW -B:indels,vcf " + knownIndels, + baseCommand + "--consensusDeterminationModel USE_SW -known " + knownIndels, 1, Arrays.asList(base_md5_with_SW_or_VCF)); executeTest("realigner use SW from VCF", spec1); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java index 200b9b5a7..77675b0f4 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerPerformanceTest.java @@ -30,7 +30,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest { " -LOD 5" + " -maxConsensuses 100" + " -greedy 100" + - " -dbsnp:vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -o /dev/null" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-5,650,000" + @@ -45,7 +45,7 @@ public class IndelRealignerPerformanceTest extends WalkerTest { " -LOD 5" + " -maxConsensuses 100" + " -greedy 100" + - " -dbsnp:vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " -known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -o /dev/null" + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + " -L chr1:1-150,000,000" + From 1ea5ec276bea63c485ff2b8cd4cf31265682ba25 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 23:28:59 -0400 Subject: [PATCH 173/186] Minor cleanup --- .../gatk/walkers/indels/IndelRealigner.java | 16 ---------------- .../indels/IndelRealignerIntegrationTest.java | 1 - 2 files changed, 17 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index cb7627888..dcd4f5dec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -36,7 +36,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.BAQMode; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -159,21 +158,6 @@ public class IndelRealigner extends ReadWalker { protected boolean CHECKEARLY = false; - // DEPRECATED - - @Deprecated - @Argument(fullName="sortInCoordinateOrderEvenThoughItIsHighlyUnsafe", doc="This argument is no longer used.", required=false) - protected boolean DEPRECATED_SORT_IN_COORDINATE_ORDER = false; - - @Deprecated - @Argument(fullName="realignReadsWithBadMates", doc="This argument is no longer used.", required=false) - protected boolean DEPRECATED_REALIGN_MATES = false; - - @Deprecated - @Argument(fullName="useOnlyKnownIndels", shortName="knownsOnly", doc="This argument is no longer used. See --consensusDeterminationModel instead.", required=false) - protected boolean DEPRECATED_KNOWNS_ONLY = false; - - // DEBUGGING OPTIONS FOLLOW @Hidden diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 28e5a15de..0ff6fc244 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -1,7 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.indels; import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.Test; import java.util.Arrays; From 9e53fd688034da583dd4fad78b2cc6946ae91a3f Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 10 Aug 2011 07:28:35 -0400 Subject: [PATCH 174/186] Fixed VCFGatherFunction to not provide incorrect rod_priority_list -- simply don't provide one, since you are just 'cating' the files together and genotypes never overlap --- .../sting/queue/extensions/gatk/VcfGatherFunction.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala index f8a6cb2cb..d70022147 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala @@ -44,7 +44,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction { this.intervalsString = this.originalGATK.intervalsString this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) } - this.rod_priority_list = (0 until this.gatherParts.size).map("input"+_).mkString(",") this.out = this.originalOutput this.assumeIdenticalSamples = true From c60cf52f73d436bd7159957159d1e9356cb05fd1 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 10 Aug 2011 10:20:37 -0400 Subject: [PATCH 176/186] Updating VQSR for new RodBinding syntax. Cleaning up indel specific parts of VQSR. --- .../ApplyRecalibration.java | 22 ++-- .../VariantDataManager.java | 93 ++++++++-------- .../VariantRecalibrator.java | 105 ++++++++++++------ ...ntRecalibrationWalkersIntegrationTest.java | 12 +- 4 files changed, 129 insertions(+), 103 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 74b7b8e7d..abe27e483 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -28,9 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; @@ -56,6 +56,11 @@ public class ApplyRecalibration extends RodWalker { ///////////////////////////// // Inputs ///////////////////////////// + /** + * The raw input variants to be recalibrated. + */ + @Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true) + public List> input; @Input(fullName="recal_file", shortName="recalFile", doc="The output recal file used by ApplyRecalibration", required=true) private File RECAL_FILE; @Input(fullName="tranches_file", shortName="tranchesFile", doc="The input tranches file describing where to cut the data", required=true) @@ -101,17 +106,8 @@ public class ApplyRecalibration extends RodWalker { } Collections.reverse(tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity) to worst (highest truth sensitivity) - for( final ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { - if( d.getName().startsWith("input") ) { - inputNames.add(d.getName()); - logger.info("Found input variant track with name " + d.getName()); - } else { - logger.info("Not evaluating ROD binding " + d.getName()); - } - } - - if( inputNames.size() == 0 ) { - throw new UserException.BadInput( "No input variant tracks found. Input variant binding names must begin with 'input'." ); + for( final RodBinding rod : input ) { + inputNames.add( rod.getName() ); } if( IGNORE_INPUT_FILTERS != null ) { @@ -168,7 +164,7 @@ public class ApplyRecalibration extends RodWalker { return 1; } - for( VariantContext vc : tracker.getValues(VariantContext.class, inputNames, context.getLocation()) ) { + for( VariantContext vc : tracker.getValues(input, context.getLocation()) ) { if( vc != null ) { if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { String filterString = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 8687b5796..08026a45e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -26,10 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; /** @@ -105,31 +106,6 @@ public class VariantDataManager { } } - public void addTrainingSet( final TrainingSet trainingSet ) { - trainingSets.add( trainingSet ); - } - - public boolean checkHasTrainingSet() { - for( final TrainingSet trainingSet : trainingSets ) { - if( trainingSet.isTraining ) { return true; } - } - return false; - } - - public boolean checkHasTruthSet() { - for( final TrainingSet trainingSet : trainingSets ) { - if( trainingSet.isTruth ) { return true; } - } - return false; - } - - public boolean checkHasKnownSet() { - for( final TrainingSet trainingSet : trainingSets ) { - if( trainingSet.isKnown ) { return true; } - } - return false; - } - public ExpandingArrayList getTrainingData() { final ExpandingArrayList trainingData = new ExpandingArrayList(); for( final VariantDatum datum : data ) { @@ -240,13 +216,14 @@ public class VariantDataManager { if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } - if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) { - // normalize QD by event length for indel case - int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now - if (eventLength > 0) // sanity check - value /= (double)eventLength; - } + if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) { + // normalize QD by event length for indel case + int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now + if (eventLength > 0) { // sanity check + value /= (double)eventLength; + } + } if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } @@ -257,30 +234,44 @@ public class VariantDataManager { return value; } - public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) { + public void parseTrainingSets( final RefMetaDataTracker tracker, final GenomeLoc genomeLoc, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC, final HashMap rodToPriorMap, + final List> training, final List> truth, final List> known, final List> badSites) { datum.isKnown = false; datum.atTruthSite = false; datum.atTrainingSite = false; datum.atAntiTrainingSite = false; datum.prior = 2.0; - datum.consensusCount = 0; - for( final TrainingSet trainingSet : trainingSets ) { - for( final VariantContext trainVC : tracker.getValues(VariantContext.class, trainingSet.name, ref.getLocus()) ) { - if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && - ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) && - (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) { - - datum.isKnown = datum.isKnown || trainingSet.isKnown; - datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth; - datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining; - datum.prior = Math.max( datum.prior, trainingSet.prior ); - datum.consensusCount += ( trainingSet.isConsensus ? 1 : 0 ); + for( final RodBinding rod : training ) { + for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) { + if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) { + datum.atTrainingSite = true; + datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) ); } + } + } + for( final RodBinding rod : truth ) { + for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) { + if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) { + datum.atTruthSite = true; + datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) ); + } + } + } + for( final RodBinding rod : known ) { + for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) { + if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) { + datum.isKnown = true; + datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) ); + } + } + } + for( final RodBinding rod : badSites ) { + for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) { if( trainVC != null ) { - datum.atAntiTrainingSite = datum.atAntiTrainingSite || trainingSet.isAntiTraining; + datum.atAntiTrainingSite = true; + datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) ); } - } } } @@ -292,4 +283,10 @@ public class VariantDataManager { (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"))); } } + + private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) { + return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && + ((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) && + (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 7bd7ea46d..da9da936b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -25,13 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; @@ -57,11 +53,51 @@ import java.util.*; public class VariantRecalibrator extends RodWalker, ExpandingArrayList> implements TreeReducible> { - public static final String VQS_LOD_KEY = "VQSLOD"; - public static final String CULPRIT_KEY = "culprit"; + public static final String VQS_LOD_KEY = "VQSLOD"; // Log odds ratio of being a true variant versus being false under the trained gaussian mixture model + public static final String CULPRIT_KEY = "culprit"; // The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out @ArgumentCollection private VariantRecalibratorArgumentCollection VRAC = new VariantRecalibratorArgumentCollection(); + ///////////////////////////// + // Inputs + ///////////////////////////// + /** + * The raw input variants to be recalibrated. + */ + @Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true) + public List> input; + /** + * A list of training variants used to train the Gaussian mixture model. + * + * Input variants which are found to overlap with these training sites are used to build the Gaussian mixture model. + */ + @Input(fullName="training", shortName = "training", doc="A list of training variants used to train the Gaussian mixture model", required=true) + public List> training; + /** + * A list of true variants to be used when deciding the truth sensitivity cut of the final callset. + * + * When deciding where to set the cutoff in VQSLOD sensitivity to these truth sites is used. + * Typically one might want to say I dropped my threshold until I got back 99% of HapMap sites, for example. + */ + @Input(fullName="truth", shortName = "truth", doc="A list of true variants to be used when deciding the truth sensitivity cut of the final callset", required=true) + public List> truth; + /** + * A list of known variants to be used for metric comparison purposes. + * + * The known / novel status of a variant isn't used by the algorithm itself and is only used for reporting / display purposes. + * The output metrics are stratified by known status in order to aid in comparisons with other call sets. + */ + @Input(fullName="known", shortName = "known", doc="A list of known variants to be used for metric comparison purposes", required=false) + public List> known = Collections.emptyList(); + /** + * A list of known bad variants used to supplement training the negative model. + * + * In addition to using the worst 3% of variants as compared to the Gaussian mixture model, we can also supplement the list + * with a database of known bad variants. Maybe these are loci which are frequently filtered out in many projects (centromere, for example). + */ + @Input(fullName="badSites", shortName = "badSites", doc="A list of known bad variants used to supplement training the negative model", required=false) + public List> badSites = Collections.emptyList(); + ///////////////////////////// // Outputs ///////////////////////////// @@ -96,9 +132,9 @@ public class VariantRecalibrator extends RodWalker ignoreInputFilterSet = new TreeSet(); - private final Set inputNames = new HashSet(); private final VariantRecalibratorEngine engine = new VariantRecalibratorEngine( VRAC ); + private final HashMap rodToPriorMap = new HashMap(); //--------------------------------------------------------------------------------------------------------------- // @@ -123,31 +159,24 @@ public class VariantRecalibrator extends RodWalker> allInputBindings = new ArrayList>(); + allInputBindings.addAll(truth); + allInputBindings.addAll(training); + allInputBindings.addAll(known); + allInputBindings.addAll(badSites); + for( final RodBinding rod : allInputBindings ) { + try { + rodToPriorMap.put(rod.getName(), (rod.getTags().containsKey("prior") ? Double.parseDouble(rod.getTags().getValue("prior")) : 0.0) ); + } catch( NumberFormatException e ) { + throw new UserException.BadInput("Bad rod binding syntax. Prior key-value tag detected but isn't parsable. Expecting something like -training:prior=12.0 my.set.vcf"); + } + } } //--------------------------------------------------------------------------------------------------------------- @@ -163,10 +192,12 @@ public class VariantRecalibrator extends RodWalker Date: Wed, 10 Aug 2011 10:23:33 -0400 Subject: [PATCH 177/186] TrainingSets class is obsolete now. --- .../variantrecalibration/TrainingSet.java | 71 ------------------- .../VariantDataManager.java | 2 - 2 files changed, 73 deletions(-) delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java deleted file mode 100755 index 6c1a7ddbc..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2011 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.variantrecalibration; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.commandline.Tags; - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 3/12/11 - */ - -public class TrainingSet { - - public String name; - public boolean isKnown = false; - public boolean isTraining = false; - public boolean isAntiTraining = false; - public boolean isTruth = false; - public boolean isConsensus = false; - public double prior = 0.0; - - protected final static Logger logger = Logger.getLogger(TrainingSet.class); - - public TrainingSet( final String name, final Tags tags ) { - this.name = name; - - // Parse the tags to decide which tracks have which properties - if( tags != null ) { - isKnown = tags.containsKey("known") && tags.getValue("known").equals("true"); - isTraining = tags.containsKey("training") && tags.getValue("training").equals("true"); - isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true"); - isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true"); - isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true"); - prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior ); - } - - // Report back to the user which tracks were found and the properties that were detected - if( !isConsensus && !isAntiTraining ) { - logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", this.name, isKnown, isTraining, isTruth, prior) ); - } else if( isConsensus ) { - logger.info( String.format( "Found consensus track: %s", this.name) ); - } else { - logger.info( String.format( "Found bad sites training track: %s", this.name) ); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 08026a45e..cb4d94332 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -52,7 +52,6 @@ public class VariantDataManager { private final double[] meanVector; private final double[] varianceVector; // this is really the standard deviation public final ArrayList annotationKeys; - private final ExpandingArrayList trainingSets; private final VariantRecalibratorArgumentCollection VRAC; protected final static Logger logger = Logger.getLogger(VariantDataManager.class); @@ -63,7 +62,6 @@ public class VariantDataManager { this.VRAC = VRAC; meanVector = new double[this.annotationKeys.size()]; varianceVector = new double[this.annotationKeys.size()]; - trainingSets = new ExpandingArrayList(); } public void setData( final ExpandingArrayList data ) { From 06cdc4d5f948e813b22aacd61b066c08dfe36a9a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 10 Aug 2011 12:00:56 -0400 Subject: [PATCH 178/186] Added a StandardVariantContextInputArgumentCollection that is now used for consistency by many of the core tools. --- ...VariantContextInputArgumentCollection.java | 55 +++++++++++++++++++ .../walkers/annotator/VariantAnnotator.java | 11 ++-- .../beagle/BeagleOutputToVCFWalker.java | 15 +++-- .../beagle/ProduceBeagleInputWalker.java | 9 +-- .../filters/VariantFiltrationWalker.java | 15 ++--- .../phasing/ReadBackedPhasingWalker.java | 13 +++-- .../walkers/variantutils/CombineVariants.java | 7 +-- .../variantutils/LeftAlignVariants.java | 18 +++--- .../variantutils/LiftoverVariants.java | 22 ++++---- .../walkers/variantutils/SelectVariants.java | 8 +-- .../variantutils/ValidateVariants.java | 15 ++--- .../walkers/variantutils/VariantsToTable.java | 12 ++-- 12 files changed, 124 insertions(+), 76 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java new file mode 100644 index 000000000..dd5f628c1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.arguments; + + +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.simpleframework.xml.Root; + +/** + * @author ebanks + * @version 1.0 + */ +@Root +public class StandardVariantContextInputArgumentCollection { + + /** + * The VCF input file(s) + * + * The variant track can take any number of arguments on the command line. Each -V argument + * will be included as an input to the tool. If no explicit name is provided, + * the -V arguments will be named using the default algorithm: variant, variant2, variant3, etc. + * The user can override this by providing an explicit name -V:name,vcf for each -V argument, + * and each named argument will be labeled as such in the output (i.e., set=name rather than + * set=variant2). The order of arguments does not matter except for the naming. + */ + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public RodBinding variants; + +} + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 594216d53..9bf70017a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -35,7 +36,6 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -56,8 +56,7 @@ import java.util.*; @By(DataSource.REFERENCE) public class VariantAnnotator extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) public RodBinding snpEffFile; @@ -142,7 +141,7 @@ public class VariantAnnotator extends RodWalker { listAnnotationsAndExit(); // get the list of all sample names from the variant VCF input rod, if applicable - List rodName = Arrays.asList(variants.getName()); + List rodName = Arrays.asList(variantCollection.variants.getName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName); // add the non-VCF sample from the command-line, if applicable @@ -166,7 +165,7 @@ public class VariantAnnotator extends RodWalker { // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones Set hInfo = new HashSet(); hInfo.addAll(engine.getVCFAnnotationDescriptions()); - for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variants.getName())) ) { + for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList(variantCollection.variants.getName())) ) { if ( isUniqueHeaderLine(line, hInfo) ) hInfo.add(line); } @@ -225,7 +224,7 @@ public class VariantAnnotator extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(variants, context.getLocation()); + Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation()); if ( VCs.size() == 0 ) return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index a34dfd080..221861062 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -25,10 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.beagle; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -54,8 +52,9 @@ import static java.lang.Math.log10; */ @Requires(value={}) public class BeagleOutputToVCFWalker extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) public RodBinding comp = RodBinding.makeUnbound(VariantContext.class); @@ -111,7 +110,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { hInfo.add(new VCFInfoHeaderLine("AFH", 1, VCFHeaderLineType.Float, "Allele Number from Comparison ROD at this site")); } - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variantCollection.variants.getName())); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); @@ -123,7 +122,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { return 0; GenomeLoc loc = context.getLocation(); - VariantContext vc_input = tracker.getFirstValue(variants, loc); + VariantContext vc_input = tracker.getFirstValue(variantCollection.variants, loc); VariantContext vc_comp = tracker.getFirstValue(comp, loc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 88b976e7e..4837aa54d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.beagle; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -52,8 +53,8 @@ import java.util.*; */ @Requires(value={}) public class ProduceBeagleInputWalker extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + + @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Input(fullName="validation", shortName = "validation", doc="Input VCF file", required=false) public RodBinding validation = RodBinding.makeUnbound(VariantContext.class); @@ -98,7 +99,7 @@ public class ProduceBeagleInputWalker extends RodWalker { public void initialize() { - samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); + samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variantCollection.variants.getName())); beagleWriter.print("marker alleleA alleleB"); for ( String sample : samples ) @@ -120,7 +121,7 @@ public class ProduceBeagleInputWalker extends RodWalker { public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { if( tracker != null ) { GenomeLoc loc = context.getLocation(); - VariantContext variant_eval = tracker.getFirstValue(variants, loc); + VariantContext variant_eval = tracker.getFirstValue(variantCollection.variants, loc); VariantContext validation_eval = tracker.getFirstValue(validation, loc); if ( goodSite(variant_eval,validation_eval) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index e0bd58aea..e5968692b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -26,13 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broad.tribble.Feature; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; @@ -53,8 +50,8 @@ import java.util.*; @Reference(window=@Window(start=-50,stop=50)) public class VariantFiltrationWalker extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Input(fullName="mask", doc="Input ROD mask", required=false) public RodBinding mask = RodBinding.makeUnbound(Feature.class); @@ -100,7 +97,7 @@ public class VariantFiltrationWalker extends RodWalker { private void initializeVcfWriter() { - final List inputNames = Arrays.asList(variants.getName()); + final List inputNames = Arrays.asList(variantCollection.variants.getName()); // setup the header fields Set hInfo = new HashSet(); @@ -152,7 +149,7 @@ public class VariantFiltrationWalker extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(variants, context.getLocation()); + Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation()); // is there a SNP mask present? boolean hasMask = tracker.hasValues(mask); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index f81dec2ac..ac4fba4b4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -24,12 +24,12 @@ package org.broadinstitute.sting.gatk.walkers.phasing; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.DisjointSet; @@ -67,8 +67,8 @@ public class ReadBackedPhasingWalker extends RodWalker variants; + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Output(doc = "File to which variants should be written", required = true) protected VCFWriter writer = null; @@ -175,8 +175,9 @@ public class ReadBackedPhasingWalker extends RodWalker rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); - Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(variants.getName()).getGenotypeSamples() : samplesToPhase); + String trackName = variantCollection.variants.getName(); + Map rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); + Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase); writer.writeHeader(new VCFHeader(hInfo, samples)); } @@ -207,7 +208,7 @@ public class ReadBackedPhasingWalker extends RodWalker unprocessedList = new LinkedList(); - for (VariantContext vc : tracker.getValues(variants, context.getLocation())) { + for (VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) { if (samplesToPhase != null) vc = reduceVCToSamples(vc, samplesToPhase); if (ReadBackedPhasingWalker.processVariantInPhasing(vc)) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index f24d7a211..fb172e1b7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.SampleUtils; @@ -64,8 +63,8 @@ public class CombineVariants extends RodWalker { * are techincally order dependent. It is strongly recommended to provide explicit names when * a rod priority list is provided. */ - @Input(fullName = "variant", shortName = "V", doc="The VCF files to merge together", required=true) - public List> variantsToMerge; + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public List> variants; @Output(doc="File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; @@ -157,7 +156,7 @@ public class CombineVariants extends RodWalker { // get all of the vcf rods at this locus // Need to provide reference bases to simpleMerge starting at current locus - Collection vcs = tracker.getValues(variantsToMerge, context.getLocation()); + Collection vcs = tracker.getValues(variants, context.getLocation()); if ( sitesOnlyVCF ) { vcs = VariantContextUtils.sitesOnlyVariantContexts(vcs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index 52b633cdf..f88810aed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -28,9 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -50,8 +50,9 @@ import java.util.*; @Reference(window=@Window(start=-200,stop=200)) @Requires(value={}) public class LeftAlignVariants extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Output(doc="File to which variants should be written",required=true) protected VCFWriter baseWriter = null; @@ -59,10 +60,11 @@ public class LeftAlignVariants extends RodWalker { private SortingVCFWriter writer; public void initialize() { - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); + String trackName = variantCollection.variants.getName(); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - Set headerLines = vcfHeaders.get(variants.getName()).getMetaData(); + Set headerLines = vcfHeaders.get(trackName).getMetaData(); baseWriter.writeHeader(new VCFHeader(headerLines, samples)); writer = new SortingVCFWriter(baseWriter, 200); @@ -72,7 +74,7 @@ public class LeftAlignVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(variants, context.getLocation()); + Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation()); int changedSites = 0; for ( VariantContext vc : VCs ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index 7edb4d52c..f93fb84e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -29,10 +29,8 @@ import net.sf.picard.liftover.LiftOver; import net.sf.picard.util.Interval; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -52,8 +50,9 @@ import java.util.*; */ @Requires(value={}) public class LiftoverVariants extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Output(doc="File to which variants should be written",required=true) protected File file = null; @@ -88,12 +87,13 @@ public class LiftoverVariants extends RodWalker { throw new UserException.BadInput("the chain file you are using is not compatible with the reference you are trying to lift over to; please use the appropriate chain file for the given reference"); } - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); + String trackName = variantCollection.variants.getName(); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); Set metaData = new HashSet(); - if ( vcfHeaders.containsKey(variants.getName()) ) - metaData.addAll(vcfHeaders.get(variants.getName()).getMetaData()); + if ( vcfHeaders.containsKey(trackName) ) + metaData.addAll(vcfHeaders.get(trackName).getMetaData()); if ( RECORD_ORIGINAL_LOCATION ) { metaData.add(new VCFInfoHeaderLine("OriginalChr", 1, VCFHeaderLineType.String, "Original contig name for the record")); metaData.add(new VCFInfoHeaderLine("OriginalStart", 1, VCFHeaderLineType.Integer, "Original start position for the record")); @@ -146,7 +146,7 @@ public class LiftoverVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(variants, context.getLocation()); + Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation()); for ( VariantContext vc : VCs ) convertAndWrite(vc, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 7179956c1..e254fe61f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -61,8 +62,7 @@ public class SelectVariants extends RodWalker { * Variants from this file are sent through the filtering and modifying routines as directed * by the arguments to SelectVariants, and finally are emitted. */ - @Input(fullName="variant", shortName = "V", doc="Select variants from this VCF file", required=true) - public RodBinding variants; + @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); /** * If provided, we will filter out variants that are "discordant" to the variants in this file @@ -194,7 +194,7 @@ public class SelectVariants extends RodWalker { */ public void initialize() { // Get list of samples to include in the output - List rodNames = Arrays.asList(variants.getName()); + List rodNames = Arrays.asList(variantCollection.variants.getName()); Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); @@ -318,7 +318,7 @@ public class SelectVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getValues(variants, context.getLocation()); + Collection vcs = tracker.getValues(variantCollection.variants, context.getLocation()); if ( vcs == null || vcs.size() == 0) { return 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 9f64e0ca4..cc7854f12 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -28,13 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; @@ -56,8 +53,8 @@ import java.util.Set; @Requires(value={}) public class ValidateVariants extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); public enum ValidationType { ALL, REF, IDS, ALLELES, CHR_COUNTS @@ -78,14 +75,14 @@ public class ValidateVariants extends RodWalker { private File file = null; public void initialize() { - file = new File(variants.getSource()); + file = new File(variantCollection.variants.getSource()); } public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(variants, context.getLocation()); + Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation()); for ( VariantContext vc : VCs ) validate(vc, tracker, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index f999f25c0..fa25b0191 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -24,12 +24,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -48,8 +46,8 @@ import java.util.*; @Requires(value={}) public class VariantsToTable extends RodWalker { - @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Output(doc="File to which results should be written",required=true) protected PrintStream out; @@ -138,7 +136,7 @@ public class VariantsToTable extends RodWalker { return 0; if ( ++nRecords < MAX_RECORDS || MAX_RECORDS == -1 ) { - for ( VariantContext vc : tracker.getValues(variants, context.getLocation())) { + for ( VariantContext vc : tracker.getValues(variantCollection.variants, context.getLocation())) { if ( (keepMultiAllelic || vc.isBiallelic()) && ( showFiltered || vc.isNotFiltered() ) ) { List vals = extractFields(vc, fieldsToTake, ALLOW_MISSING_DATA); out.println(Utils.join("\t", vals)); From c884b6bf1f01d9d6c2a59898ece5113830c567ac Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 10 Aug 2011 12:07:43 -0400 Subject: [PATCH 179/186] Fixed comment --- .../StandardVariantContextInputArgumentCollection.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java index dd5f628c1..847120414 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java @@ -39,14 +39,9 @@ import org.simpleframework.xml.Root; public class StandardVariantContextInputArgumentCollection { /** - * The VCF input file(s) + * The VCF file we are using. * - * The variant track can take any number of arguments on the command line. Each -V argument - * will be included as an input to the tool. If no explicit name is provided, - * the -V arguments will be named using the default algorithm: variant, variant2, variant3, etc. - * The user can override this by providing an explicit name -V:name,vcf for each -V argument, - * and each named argument will be labeled as such in the output (i.e., set=name rather than - * set=variant2). The order of arguments does not matter except for the naming. + * Variants from this file are used by this tool as input. */ @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; From a42f90db1104d918019a1d3cead1bd6fbdd730d2 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 10 Aug 2011 12:20:18 -0400 Subject: [PATCH 180/186] Moving more tools over to use the standard VC arg collection. Also, while I'm in there, I removed all of the empty references to @Requires given that it's no longer relevant. --- .../walkers/annotator/VariantAnnotator.java | 1 - .../beagle/BeagleOutputToVCFWalker.java | 2 -- .../beagle/ProduceBeagleInputWalker.java | 2 -- .../VariantsToBeagleUnphasedWalker.java | 2 -- .../walkers/diffengine/DiffObjectsWalker.java | 2 -- .../filters/VariantFiltrationWalker.java | 1 - .../walkers/genotyper/UGCalcLikelihoods.java | 1 - .../variantutils/FilterLiftedVariants.java | 19 ++++++++++--------- .../variantutils/LeftAlignVariants.java | 1 - .../variantutils/LiftoverVariants.java | 2 -- .../variantutils/RandomlySplitVariants.java | 17 +++++++---------- .../walkers/variantutils/SelectVariants.java | 2 -- .../variantutils/ValidateVariants.java | 1 - .../VariantValidationAssessor.java | 2 -- .../walkers/variantutils/VariantsToTable.java | 2 -- .../walkers/variantutils/VariantsToVCF.java | 1 - .../sting/gatk/WalkerManagerUnitTest.java | 2 -- 17 files changed, 17 insertions(+), 43 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 9bf70017a..efa40d9a8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -50,7 +50,6 @@ import java.util.*; /** * Annotates variant calls with context information. Users can specify which of the available annotations to use. */ -@Requires(value={}) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @Reference(window=@Window(start=-50,stop=50)) @By(DataSource.REFERENCE) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 221861062..ee2e4853b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -32,7 +32,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; @@ -50,7 +49,6 @@ import static java.lang.Math.log10; /** * Takes files produced by Beagle imputation engine and creates a vcf with modified annotations. */ -@Requires(value={}) public class BeagleOutputToVCFWalker extends RodWalker { @ArgumentCollection diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index 4837aa54d..a46ec8b48 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve; import org.broadinstitute.sting.utils.GenomeLoc; @@ -51,7 +50,6 @@ import java.util.*; /** * Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input variant file */ -@Requires(value={}) public class ProduceBeagleInputWalker extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java index d26bfeca4..22c39d794 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphasedWalker.java @@ -32,7 +32,6 @@ import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; @@ -55,7 +54,6 @@ import java.util.Set; * in input variant file. Will additional hold back a fraction of the sites for evaluation, marking the * genotypes at that sites as missing, and writing the truth of these sites to a second VCF file */ -@Requires(value={}) public class VariantsToBeagleUnphasedWalker extends RodWalker { @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java index f43d1342d..5889d19e5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsWalker.java @@ -29,7 +29,6 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import java.io.File; @@ -131,7 +130,6 @@ import java.util.List; * @author Mark DePristo * @since 7/4/11 */ -@Requires(value={}) public class DiffObjectsWalker extends RodWalker { /** * Writes out a file of the DiffEngine format: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index e5968692b..8ee1e3a89 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -46,7 +46,6 @@ import java.util.*; /** * Filters variant calls using a number of user-selectable, parameterizable criteria. */ -@Requires(value={}) @Reference(window=@Window(start=-50,stop=50)) public class VariantFiltrationWalker extends RodWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java index 4f78fab36..503d87cbe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCalcLikelihoods.java @@ -49,7 +49,6 @@ import java.util.TreeSet; * the name 'allele' so we know which alternate allele to use at each site. */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) -@Requires(value={}) @Reference(window=@Window(start=-200,stop=200)) @By(DataSource.READS) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 751633f9d..4c2222f3a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -24,9 +24,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -46,10 +46,10 @@ import java.util.Set; * Filters a lifted-over VCF file for ref bases that have been changed. */ @Reference(window=@Window(start=0,stop=100)) -@Requires(value={}) public class FilterLiftedVariants extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); private static final int MAX_VARIANT_SIZE = 100; @@ -59,10 +59,11 @@ public class FilterLiftedVariants extends RodWalker { private long failedLocs = 0, totalLocs = 0; public void initialize() { - Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(variants.getName())); - Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(variants.getName())); + String trackName = variantCollection.variants.getName(); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(variants.getName()) ? vcfHeaders.get(variants.getName()).getMetaData() : null, samples); + final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey(trackName) ? vcfHeaders.get(trackName).getMetaData() : null, samples); writer.writeHeader(vcfHeader); } @@ -89,7 +90,7 @@ public class FilterLiftedVariants extends RodWalker { if ( tracker == null ) return 0; - Collection VCs = tracker.getValues(variants, context.getLocation()); + Collection VCs = tracker.getValues(variantCollection.variants, context.getLocation()); for ( VariantContext vc : VCs ) filterAndWrite(ref.getBases(), vc); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java index f88810aed..c47a015c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java @@ -48,7 +48,6 @@ import java.util.*; * Left-aligns indels from a variants file. */ @Reference(window=@Window(start=-200,stop=200)) -@Requires(value={}) public class LeftAlignVariants extends RodWalker { @ArgumentCollection diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index f93fb84e3..1c76a21ea 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -34,7 +34,6 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; @@ -48,7 +47,6 @@ import java.util.*; /** * Lifts a VCF file over from one build to another. Note that the resulting VCF could be mis-sorted. */ -@Requires(value={}) public class LiftoverVariants extends RodWalker { @ArgumentCollection diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java index 5f5c9547b..1fefd20fc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java @@ -24,15 +24,12 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; @@ -45,10 +42,10 @@ import java.util.*; /** * Takes a VCF file, randomly splits variants into two different sets, and outputs 2 new VCFs with the results. */ -@Requires(value={}) public class RandomlySplitVariants extends RodWalker { - @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) - public RodBinding variants; + + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Output(fullName="out1", shortName="o1", doc="File #1 to which variants should be written", required=true) protected VCFWriter vcfWriter1 = null; @@ -72,7 +69,7 @@ public class RandomlySplitVariants extends RodWalker { iFraction = (int)(fraction * 1000.0); // setup the header info - final List inputNames = Arrays.asList(variants.getName()); + final List inputNames = Arrays.asList(variantCollection.variants.getName()); Set samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames); Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); @@ -94,7 +91,7 @@ public class RandomlySplitVariants extends RodWalker { if ( tracker == null ) return 0; - Collection vcs = tracker.getValues(variants, context.getLocation()); + Collection vcs = tracker.getValues(variantCollection.variants, context.getLocation()); for ( VariantContext vc : vcs ) { int random = GenomeAnalysisEngine.getRandomGenerator().nextInt(1000); if ( random < iFraction ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index e254fe61f..c45ff280b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -38,7 +38,6 @@ import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -54,7 +53,6 @@ import java.util.*; * Takes a VCF file, selects variants based on sample(s) in which it was found and/or on various annotation criteria, * recompute the value of certain annotations based on the new sample set, and output a new VCF with the results. */ -@Requires(value={}) public class SelectVariants extends RodWalker { /** * The VCF file we are selecting variants from. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index cc7854f12..c3e7dbe0c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -50,7 +50,6 @@ import java.util.Set; * Validates a variants file. */ @Reference(window=@Window(start=0,stop=100)) -@Requires(value={}) public class ValidateVariants extends RodWalker { @ArgumentCollection diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 218f5377e..6ed0bbd16 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -36,7 +36,6 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @@ -47,7 +46,6 @@ import java.util.*; * Converts Sequenom files to a VCF annotated with QC metrics (HW-equilibrium, % failed probes) */ @Reference(window=@Window(start=0,stop=40)) -@Requires(value={}) public class VariantValidationAssessor extends RodWalker { @Input(fullName="variants", shortName = "V", doc="Input VCF file", required=true) public RodBinding variants; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index fa25b0191..af3593ce4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -43,7 +42,6 @@ import java.util.*; /** * Emits specific fields as dictated by the user from one or more VCF files. */ -@Requires(value={}) public class VariantsToTable extends RodWalker { @ArgumentCollection diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 497d98b99..f9e9562ca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -56,7 +56,6 @@ import java.util.*; /** * Converts variants from other file formats to VCF format. */ -@Requires(value={}) @Reference(window=@Window(start=-40,stop=40)) public class VariantsToVCF extends RodWalker { diff --git a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java index cd43927a4..6149a1e51 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/WalkerManagerUnitTest.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk; import org.testng.Assert; import org.broadinstitute.sting.commandline.Hidden; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.qc.CountLociWalker; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; @@ -64,7 +63,6 @@ public class WalkerManagerUnitTest { } @Hidden -@Requires(value={}) class UninstantiableWalker extends Walker { // Private constructor will generate uninstantiable message private UninstantiableWalker() {} From 480e7a798476ac577d7e059ab592a38d5298f1d2 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 9 Aug 2011 16:30:08 -0400 Subject: [PATCH 181/186] Correctly initialize the optional SnpEff rod binding in VariantAnnotator using RodBinding.makeUnbound() --- .../sting/gatk/walkers/annotator/VariantAnnotator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index efa40d9a8..54231e9b4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -58,7 +58,7 @@ public class VariantAnnotator extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) - public RodBinding snpEffFile; + public RodBinding snpEffFile = RodBinding.makeUnbound(SnpEffFeature.class); /** * A dbSNP VCF file from which to annotate. From 577f861f69779361e8b2248e8fa3f37bf26ac897 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 10 Aug 2011 11:39:41 -0400 Subject: [PATCH 182/186] Pass the rodBindings into the VariantAnnotator engine, and from there to the annotation classes themselves. --- .../gatk/walkers/annotator/AlleleBalance.java | 4 +++- .../annotator/AlleleBalanceBySample.java | 4 +++- .../gatk/walkers/annotator/BaseCounts.java | 4 +++- .../walkers/annotator/ChromosomeCounts.java | 4 +++- .../walkers/annotator/DepthOfCoverage.java | 4 +++- .../annotator/DepthPerAlleleBySample.java | 4 +++- .../gatk/walkers/annotator/FisherStrand.java | 4 +++- .../gatk/walkers/annotator/GCContent.java | 4 +++- .../sting/gatk/walkers/annotator/GLstats.java | 4 +++- .../walkers/annotator/HaplotypeScore.java | 4 +++- .../gatk/walkers/annotator/HardyWeinberg.java | 4 +++- .../walkers/annotator/HomopolymerRun.java | 4 +++- .../gatk/walkers/annotator/IndelType.java | 4 +++- .../sting/gatk/walkers/annotator/LowMQ.java | 4 +++- .../walkers/annotator/MappingQualityZero.java | 4 +++- .../annotator/MappingQualityZeroBySample.java | 6 +++-- .../annotator/MappingQualityZeroFraction.java | 4 +++- .../gatk/walkers/annotator/NBaseCount.java | 4 +++- .../gatk/walkers/annotator/QualByDepth.java | 4 +++- .../walkers/annotator/RMSMappingQuality.java | 4 +++- .../gatk/walkers/annotator/RankSumTest.java | 4 +++- .../ReadDepthAndAllelicFractionBySample.java | 4 +++- .../gatk/walkers/annotator/SBByDepth.java | 4 +++- .../gatk/walkers/annotator/SampleList.java | 4 +++- .../sting/gatk/walkers/annotator/SnpEff.java | 22 +++++++++---------- .../walkers/annotator/SpanningDeletions.java | 4 +++- .../annotator/TechnologyComposition.java | 4 +++- .../walkers/annotator/VariantAnnotator.java | 22 +++++++++++++++++-- .../annotator/VariantAnnotatorEngine.java | 13 ++++++----- .../interfaces/GenotypeAnnotation.java | 5 ++++- .../interfaces/InfoFieldAnnotation.java | 7 +++--- .../walkers/genotyper/UnifiedGenotyper.java | 5 ++++- 32 files changed, 130 insertions(+), 50 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 784927ab4..219ccbc0c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -44,7 +46,7 @@ import java.util.Map; public class AlleleBalance extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index f70a87dc5..df9890d64 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -17,7 +19,7 @@ import java.util.*; public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { Double ratio = annotateSNP(stratifiedContext, vc, g); if (ratio == null) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index 7cd159c5d..76daaa06b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -31,6 +31,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -48,7 +50,7 @@ import java.util.Map; public class BaseCounts extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 9b30079d0..b2a3e6a26 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -50,7 +52,7 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( ! vc.hasGenotypes() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index d8907c57f..f2abbc5b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,7 +20,7 @@ import java.util.Map; public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 20513421d..958075a92 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -28,7 +30,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { if ( g == null || !g.isCalled() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index e71febece..0ad643a4e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import cern.jet.math.Arithmetic; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -46,7 +48,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( ! vc.isVariant() || vc.isFiltered() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 588d3e98a..f8e422e23 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,7 +20,7 @@ import java.util.Map; public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { double content = computeGCContent(ref); Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%.2f", content)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java index 862e12f7d..8fde3a20f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -27,7 +29,7 @@ public class GLstats extends InfoFieldAnnotation implements StandardAnnotation { private static final int MIN_SAMPLES = 10; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { final Map genotypes = vc.getGenotypes(); if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 2196de389..24571aee1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import net.sf.samtools.SAMRecord; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -54,7 +56,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; private final static char REGEXP_WILDCARD = '.'; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 2d9424e98..292a6c5e5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -24,7 +26,7 @@ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgress private static final int MIN_GENOTYPE_QUALITY = 10; private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { final Map genotypes = vc.getGenotypes(); if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 870e9992b..97ac3ab6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -20,7 +22,7 @@ public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnot private boolean ANNOTATE_INDELS = true; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( !vc.isBiallelic() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index b1c16ba0d..7308f7f25 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -21,7 +23,7 @@ import java.util.*; */ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { int run; if (vc.isMixed()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 5de9aaa3b..492578839 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,7 +20,7 @@ import java.util.Map; public class LowMQ extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 60bfe945f..2c5314822 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -20,7 +22,7 @@ import java.util.Map; public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java index f2b7b72b9..f4d80f554 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -50,8 +52,8 @@ import java.util.Map; * To change this template use File | Settings | File Templates. */ public class MappingQualityZeroBySample extends GenotypeAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, - AlignmentContext context, VariantContext vc, Genotype g) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, + ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { if ( g == null || !g.isCalled() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index 3a6c9dce9..2b8296778 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -20,7 +22,7 @@ import java.util.Map; public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index 9f67acf65..3ce01bc2a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -22,7 +24,7 @@ import java.util.Map; */ public class NBaseCount extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 20bee9008..7b97a9c38 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,7 +20,7 @@ import java.util.Map; public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index d1d9871e7..38345a1c4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -22,7 +24,7 @@ import java.util.Map; public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 643056c1d..cad10c77d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -25,7 +27,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements Standar static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final boolean DEBUG = false; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java index f3e99235a..68cc86478 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -58,7 +60,7 @@ public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { if ( g == null || !g.isCalled() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java index a5ebd8db2..662b5cdce 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,7 +20,7 @@ import java.util.Map; public class SBByDepth extends AnnotationByDepth { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index 3712ca8ae..0977a041f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -43,7 +45,7 @@ import java.util.Map; public class SampleList extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( vc.isMonomorphic() || !vc.hasGenotypes() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index cac59c8bb..26a9b2edd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -69,8 +70,11 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio public static final String CODON_NUM_KEY = "CODON_NUM"; public static final String CDS_SIZE_KEY = "CDS_SIZE"; - public Map annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { - List features = tracker.getValues(Feature.class); + public static final String SNPEFF_ROD_NAME = "snpEffFile"; + + public Map annotate ( RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { + RodBinding snpEffRodBinding = (RodBinding)rodBindings.get(SNPEFF_ROD_NAME); + List features = tracker.getValues(snpEffRodBinding); // Add only annotations for one of the most biologically-significant effects as defined in // the SnpEffConstants class: @@ -83,18 +87,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio return generateAnnotations(mostSignificantEffect); } - private SnpEffFeature getMostSignificantEffect ( List features ) { + private SnpEffFeature getMostSignificantEffect ( List snpEffFeatures ) { SnpEffFeature mostSignificantEffect = null; - for ( Feature feature : features ) { - if ( feature instanceof SnpEffFeature ) { - SnpEffFeature snpEffFeature = (SnpEffFeature)feature; + for ( SnpEffFeature snpEffFeature : snpEffFeatures ) { + if ( mostSignificantEffect == null || + snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) { - if ( mostSignificantEffect == null || - snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) { - - mostSignificantEffect = snpEffFeature; - } + mostSignificantEffect = snpEffFeature; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 332b0226b..c11634c94 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,7 +20,7 @@ import java.util.Map; public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index 626142cd2..12e7259a9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -29,7 +31,7 @@ public class TechnologyComposition extends InfoFieldAnnotation implements Experi private String n454 ="Num454"; private String nSolid = "NumSOLiD"; private String nOther = "NumOther"; - public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(RefMetaDataTracker tracker, Map> rodBindings, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 54231e9b4..a72c483e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; @@ -57,6 +58,12 @@ public class VariantAnnotator extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + /** + * A SnpEff output file from which to add annotations. + * + * The INFO field will be annotated with information on the most biologically-significant effect + * listed in the SnpEff output file for each variant. + */ @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) public RodBinding snpEffFile = RodBinding.makeUnbound(SnpEffFeature.class); @@ -112,6 +119,8 @@ public class VariantAnnotator extends RodWalker { private Collection indelBufferContext; + private Map> rodBindings = new HashMap>(); + private void listAnnotationsAndExit() { List> infoAnnotationClasses = new PluginManager(InfoFieldAnnotation.class).getPlugins(); @@ -154,10 +163,12 @@ public class VariantAnnotator extends RodWalker { logger.warn("There are no samples input at all; use the --sampleName argument to specify one if desired."); } + initializeRodBindingMap(); + if ( USE_ALL_ANNOTATIONS ) - engine = new VariantAnnotatorEngine(getToolkit()); + engine = new VariantAnnotatorEngine(getToolkit(), rodBindings); else - engine = new VariantAnnotatorEngine(getToolkit(), annotationGroupsToUse, annotationsToUse); + engine = new VariantAnnotatorEngine(getToolkit(), annotationGroupsToUse, annotationsToUse, rodBindings); engine.initializeExpressions(expressionsToUse); // setup the header fields @@ -177,6 +188,13 @@ public class VariantAnnotator extends RodWalker { } } + private void initializeRodBindingMap() { + rodBindings.put(variantCollection.variants.getName(), variantCollection.variants); + rodBindings.put(snpEffFile.getName(), snpEffFile); + rodBindings.put(dbsnp.dbsnp.getName(), dbsnp.dbsnp); + rodBindings.put(comps.getName(), comps); + } + public static boolean isUniqueHeaderLine(VCFHeaderLine line, Set currentSet) { if ( !(line instanceof VCFCompoundHeaderLine) ) return true; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 78207cb86..cae9ab00c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -55,7 +56,7 @@ public class VariantAnnotatorEngine { private List requestedExpressions = new ArrayList(); private HashMap dbAnnotations = new HashMap(); - + private Map> rodBindings; private static class VAExpression { public String fullName, bindingName, fieldName; @@ -72,16 +73,18 @@ public class VariantAnnotatorEngine { } // use this constructor if you want all possible annotations - public VariantAnnotatorEngine(GenomeAnalysisEngine engine) { + public VariantAnnotatorEngine(GenomeAnalysisEngine engine, Map> rodBindings) { requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations(); requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations(); initializeDBs(engine); + this.rodBindings = rodBindings; } // use this constructor if you want to select specific annotations (and/or interfaces) - public VariantAnnotatorEngine(GenomeAnalysisEngine engine, List annotationGroupsToUse, List annotationsToUse) { + public VariantAnnotatorEngine(GenomeAnalysisEngine engine, List annotationGroupsToUse, List annotationsToUse, Map> rodBindings) { initializeAnnotations(annotationGroupsToUse, annotationsToUse); initializeDBs(engine); + this.rodBindings = rodBindings; } // select specific expressions to use @@ -137,7 +140,7 @@ public class VariantAnnotatorEngine { // go through all the requested info annotationTypes for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) { - Map annotationsFromCurrentType = annotationType.annotate(tracker, ref, stratifiedContexts, vc); + Map annotationsFromCurrentType = annotationType.annotate(tracker, rodBindings, ref, stratifiedContexts, vc); if ( annotationsFromCurrentType != null ) infoAnnotations.putAll(annotationsFromCurrentType); } @@ -202,7 +205,7 @@ public class VariantAnnotatorEngine { Map genotypeAnnotations = new HashMap(genotype.getAttributes()); for ( GenotypeAnnotation annotation : requestedGenotypeAnnotations ) { - Map result = annotation.annotate(tracker, ref, context, vc, genotype); + Map result = annotation.annotate(tracker, rodBindings, ref, context, vc, genotype); if ( result != null ) genotypeAnnotations.putAll(result); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index e982582ee..29d90b5bf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -13,7 +15,8 @@ import java.util.Map; public abstract class GenotypeAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts/genotype split by sample - public abstract Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g); + public abstract Map annotate(RefMetaDataTracker tracker, Map> rodBindings, + ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g); // return the descriptions used for the VCF FORMAT meta field public abstract List getDescriptions(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index 84438ccd8..63dea93d2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.List; @@ -13,7 +13,8 @@ import java.util.Map; public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts split by sample - public abstract Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc); + public abstract Map annotate(RefMetaDataTracker tracker, Map> rodBindings, + ReferenceContext ref, Map stratifiedContexts, VariantContext vc); // return the descriptions used for the VCF INFO meta field public abstract List getDescriptions(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 6836b14fc..07ba27639 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,9 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -138,7 +140,8 @@ public class UnifiedGenotyper extends LocusWalker>()); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples); // initialize the header From 0497170bc94f0d3c3df49ea2da3d1bd1b7ae562f Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 10 Aug 2011 12:56:31 -0400 Subject: [PATCH 183/186] SnpEffCodec now implements SelfScopingFeatureCodec so that we no longer have to specify the codec name on the command line for SnpEff files. --- .../utils/codecs/snpEff/SnpEffCodec.java | 19 ++++++++++++++++++- .../VariantAnnotatorIntegrationTest.java | 4 ++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java index 827df16bb..eada8521f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -27,11 +27,16 @@ package org.broadinstitute.sting.utils.codecs.snpEff; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.TribbleException; +import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.LineReader; +import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; + import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; /** @@ -68,7 +73,7 @@ import java.io.IOException; * * @author David Roazen */ -public class SnpEffCodec implements FeatureCodec { +public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec { public static final int EXPECTED_NUMBER_OF_FIELDS = 23; public static final String FIELD_DELIMITER_PATTERN = "\\t"; @@ -255,4 +260,16 @@ public class SnpEffCodec implements FeatureCodec { } } } + + public boolean canDecode ( final File potentialInput ) { + try { + LineReader reader = new AsciiLineReader(new FileInputStream(potentialInput)); + readHeader(reader); + } + catch ( Exception e ) { + return false; + } + + return true; + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 596ac5c36..af29bd01f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -129,8 +129,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testSnpEffAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( - "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant:VCF " + - validationDataLocation + "1000G.exomes.vcf --snpEffFile:SnpEff " + validationDataLocation + + "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant " + + validationDataLocation + "1000G.exomes.vcf --snpEffFile " + validationDataLocation + "snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000", 1, Arrays.asList("c08648a078368c80530bff004b3157f1") From 749c8bfbcd8b8e42a951bb1c6bcff69321d9eb53 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 10 Aug 2011 13:42:35 -0400 Subject: [PATCH 184/186] Moving more tools over to the new rod system --- .../walkers/annotator/VariantAnnotator.java | 7 +++-- .../coverage/CompareCallableLociWalker.java | 29 ++++++++++--------- .../gatk/walkers/indels/IndelRealigner.java | 2 +- .../indels/RealignerTargetCreator.java | 10 ++++++- .../phasing/MergeAndMatchHaplotypes.java | 14 +++++++-- .../walkers/phasing/PhaseByTransmission.java | 11 +++++-- ...pareCallableLociWalkerIntegrationTest.java | 2 +- .../UnifiedGenotyperPerformanceTest.java | 6 ++-- ...RealignerTargetCreatorIntegrationTest.java | 4 +-- ...RealignerTargetCreatorPerformanceTest.java | 4 +-- ...ergeAndMatchHaplotypesIntegrationTest.java | 4 +-- .../PhaseByTransmissionIntegrationTest.java | 2 +- 12 files changed, 60 insertions(+), 35 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index efa40d9a8..d26a81a06 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -50,12 +50,14 @@ import java.util.*; /** * Annotates variant calls with context information. Users can specify which of the available annotations to use. */ +@Requires(value={}) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @Reference(window=@Window(start=-50,stop=50)) @By(DataSource.REFERENCE) public class VariantAnnotator extends RodWalker { - @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) public RodBinding snpEffFile; @@ -65,7 +67,8 @@ public class VariantAnnotator extends RodWalker { * * rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. */ - @ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + @ArgumentCollection + protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); /** * A comparisons VCF file from which to annotate. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java index becbbeedf..cd5fdc505 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalker.java @@ -22,10 +22,11 @@ package org.broadinstitute.sting.gatk.walkers.coverage; -import org.broad.tribble.Feature; -import org.broad.tribble.bed.FullBEDFeature; +import org.broad.tribble.bed.BEDFeature; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -44,11 +45,11 @@ public class CompareCallableLociWalker extends RodWalker compTrack1; - @Argument(shortName="comp2", doc="First comparison track name", required=false) - protected String COMP2 = "comp2"; + @Input(fullName="comp2", shortName = "comp2", doc="Second comparison track name", required=true) + public RodBinding compTrack2; @Argument(shortName="printState", doc="If provided, prints sites satisfying this state pair", required=false) protected String printState = null; @@ -78,8 +79,8 @@ public class CompareCallableLociWalker extends RodWalker map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker != null ) { - CallableLociWalker.CallableBaseState comp1 = getCallableBaseState(tracker, COMP1); - CallableLociWalker.CallableBaseState comp2 = getCallableBaseState(tracker, COMP2); + CallableLociWalker.CallableBaseState comp1 = getCallableBaseState(tracker, compTrack1); + CallableLociWalker.CallableBaseState comp2 = getCallableBaseState(tracker, compTrack2); if ( printState != null && comp1.getState() == printState1 && comp2.getState() == printState2 ) { out.printf("%s %s %s %s%n", comp1.getLocation(), comp1.getState(), comp2.getLocation(), comp2.getState()); @@ -91,14 +92,14 @@ public class CompareCallableLociWalker extends RodWalker rodBinding) { //System.out.printf("tracker %s%n", tracker); - List bindings = tracker.getValues(Feature.class, track); - if ( bindings.size() != 1 || ! (bindings.get(0) instanceof FullBEDFeature)) { - throw new UserException.MalformedFile(String.format("%s track isn't a properly formated CallableBases object!", track)); + List bindings = tracker.getValues(rodBinding); + if ( bindings.size() != 1 ) { + throw new UserException.MalformedFile(String.format("%s track isn't a properly formated CallableBases object!", rodBinding.getName())); } - FullBEDFeature bed = (FullBEDFeature)bindings.get(0); + BEDFeature bed = bindings.get(0); GenomeLoc loc = getToolkit().getGenomeLocParser().createGenomeLoc(bed.getChr(), bed.getStart(), bed.getEnd()); CallableLociWalker.CalledState state = CallableLociWalker.CalledState.valueOf(bed.getName()); return new CallableLociWalker.CallableBaseState(getToolkit().getGenomeLocParser(),loc, state); @@ -128,7 +129,7 @@ public class CompareCallableLociWalker extends RodWalker { } @Input(fullName="known", shortName = "known", doc="Input VCF file with known indels", required=false) - public RodBinding known = RodBinding.makeUnbound(VariantContext.class); + public List> known = Collections.emptyList(); @Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true) protected String intervalsFile = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 22a10144f..fbb62f17e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.indels; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.BadCigarFilter; @@ -46,6 +48,8 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Collections; +import java.util.List; /** * Emits intervals for the Local Indel Realigner to target for cleaning. Ignores 454 reads, MQ0 reads, and reads with consecutive indel operators in the CIGAR string. @@ -56,9 +60,13 @@ import java.util.ArrayList; @By(DataSource.REFERENCE) @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) public class RealignerTargetCreator extends RodWalker { + @Output protected PrintStream out; + @Input(fullName="known", shortName = "known", doc="Input VCF file with known indels", required=false) + public List> known = Collections.emptyList(); + // mismatch/entropy/SNP arguments @Argument(fullName="windowSize", shortName="window", doc="window size for calculating entropy or SNP clusters", required=false) protected int windowSize = 10; @@ -110,7 +118,7 @@ public class RealignerTargetCreator extends RodWalker { @Output protected VCFWriter vcfWriter = null; + @Input(fullName="pbt", shortName = "pbt", doc="Input VCF truth file", required=true) + public RodBinding pbtTrack; + + @Input(fullName="rbp", shortName = "rbp", doc="Input VCF truth file", required=true) + public RodBinding rbpTrack; + private Map pbtCache = new HashMap(); private Map rbpCache = new HashMap(); @@ -31,7 +39,7 @@ public class MergeAndMatchHaplotypes extends RodWalker { public void initialize() { ArrayList rodNames = new ArrayList(); - rodNames.add("pbt"); + rodNames.add(pbtTrack.getName()); Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); @@ -44,8 +52,8 @@ public class MergeAndMatchHaplotypes extends RodWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker != null) { - Collection pbts = tracker.getValues(VariantContext.class, "pbt", ref.getLocus()); - Collection rbps = tracker.getValues(VariantContext.class, "rbp", ref.getLocus()); + Collection pbts = tracker.getValues(pbtTrack, ref.getLocus()); + Collection rbps = tracker.getValues(rbpTrack, ref.getLocus()); VariantContext pbt = pbts.iterator().hasNext() ? pbts.iterator().next() : null; VariantContext rbp = rbps.iterator().hasNext() ? rbps.iterator().next() : null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 0178cdfaf..3eedc2a28 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -1,7 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.phasing; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -31,13 +33,16 @@ import java.util.*; * begin. */ public class PhaseByTransmission extends RodWalker { + + @ArgumentCollection + protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + @Argument(shortName="f", fullName="familySpec", required=true, doc="Patterns for the family structure (usage: mom+dad=child). Specify several trios by supplying this argument many times and/or a file containing many patterns.") public ArrayList familySpecs = null; @Output protected VCFWriter vcfWriter = null; - private final String ROD_NAME = "variant"; private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP"; private final String SOURCE_NAME = "PhaseByTransmission"; @@ -102,7 +107,7 @@ public class PhaseByTransmission extends RodWalker { trios = getFamilySpecsFromCommandLineInput(familySpecs); ArrayList rodNames = new ArrayList(); - rodNames.add(ROD_NAME); + rodNames.add(variantCollection.variants.getName()); Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); @@ -289,7 +294,7 @@ public class PhaseByTransmission extends RodWalker { @Override public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (tracker != null) { - VariantContext vc = tracker.getFirstValue(VariantContext.class, ROD_NAME, context.getLocation()); + VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation()); Map genotypeMap = vc.getGenotypes(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java index 4a32d6701..1ba7a5e85 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CompareCallableLociWalkerIntegrationTest.java @@ -30,7 +30,7 @@ import org.testng.annotations.Test; import java.util.Arrays; public class CompareCallableLociWalkerIntegrationTest extends WalkerTest { - final static String commonArgs = "-R " + hg18Reference + " -T CompareCallableLoci -B:comp1,Bed " + validationDataLocation + "1kg_slx.chr1_10mb.callable.bed -B:comp2,Bed " + validationDataLocation + "ga2_slx.chr1_10mb.callable.bed -o %s"; + final static String commonArgs = "-R " + hg18Reference + " -T CompareCallableLoci --comp1:Bed " + validationDataLocation + "1kg_slx.chr1_10mb.callable.bed --comp2:Bed " + validationDataLocation + "ga2_slx.chr1_10mb.callable.bed -o %s"; @Test public void testCompareCallableLociWalker1() { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java index 86f8b14f1..fb7e84d22 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperPerformanceTest.java @@ -15,7 +15,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-50,000,000" + - " -dbsnp:VCF " + b36dbSNP129 + + " --dbsnp:VCF " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); @@ -30,7 +30,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + - " -dbsnp:vcf " + b36dbSNP129 + + " --dbsnp:vcf " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); @@ -46,7 +46,7 @@ public class UnifiedGenotyperPerformanceTest extends WalkerTest { " -glm BOTH" + " -L chr1:1-50,000,000" + " -nt 10" + - " -dbsnp:vcf " + b36dbSNP129 + + " --dbsnp:vcf " + b36dbSNP129 + " -o /dev/null", 0, new ArrayList(0)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java index aabf01415..1873ccbe2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -17,13 +17,13 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest { executeTest("test standard", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - "-T RealignerTargetCreator -B:dbsnp,vcf " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", + "-T RealignerTargetCreator --known " + b36dbSNP129 + " -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", 1, Arrays.asList("0367d39a122c8ac0899fb868a82ef728")); executeTest("test dbsnp", spec2); WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - "-T RealignerTargetCreator -R " + b36KGReference + " -B:indels,VCF " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI indels -o %s", + "-T RealignerTargetCreator -R " + b36KGReference + " --known " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -BTI known -o %s", 1, Arrays.asList("5206cee6c01b299417bf2feeb8b3dc96")); executeTest("test rods only", spec3); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorPerformanceTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorPerformanceTest.java index 9490206c8..cc37cc191 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorPerformanceTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorPerformanceTest.java @@ -12,7 +12,7 @@ public class RealignerTargetCreatorPerformanceTest extends WalkerTest { WalkerTestSpec spec1 = new WalkerTestSpec( "-R " + hg18Reference + " -T RealignerTargetCreator" + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -I " + evaluationDataLocation + "NA12878.GAII.chr1.50MB.bam" + " -L chr1:1-50,000,000" + " -o /dev/null", @@ -23,7 +23,7 @@ public class RealignerTargetCreatorPerformanceTest extends WalkerTest { WalkerTestSpec spec2 = new WalkerTestSpec( "-R " + hg18Reference + " -T RealignerTargetCreator" + - " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.hg18.vcf" + + " --known " + GATKDataLocation + "dbsnp_132.hg18.vcf" + " -I " + evaluationDataLocation + "NA12878.ESP.WEx.chr1.bam" + " -L " + evaluationDataLocation + "whole_exome_agilent_designed_120.targets.chr1.interval_list" + " -o /dev/null", diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java index 21435dd7d..cf6b4e581 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/MergeAndMatchHaplotypesIntegrationTest.java @@ -16,8 +16,8 @@ public class MergeAndMatchHaplotypesIntegrationTest extends WalkerTest { buildCommandLine( "-T MergeAndMatchHaplotypes", "-R " + b37KGReference, - "-B:pbt,VCF " + fundamentalTestPBTVCF, - "-B:rbp,VCF " + fundamentalTestRBPVCF, + "--pbt " + fundamentalTestPBTVCF, + "--rbp " + fundamentalTestRBPVCF, "-o %s" ), 1, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java index 59750e18f..c663c1dd7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -16,7 +16,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-T PhaseByTransmission", "-NO_HEADER", "-R " + b37KGReference, - "-B:variant,VCF " + fundamentalTestVCF, + "--variant " + fundamentalTestVCF, "-f NA12892+NA12891=NA12878", "-o %s" ), From 07ad8c78a9014d9fd4e652904974d6f975def6fe Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 10 Aug 2011 14:24:40 -0400 Subject: [PATCH 185/186] More tools moved over. Fixed the VariantContextIntegrationTest which was not useful because the md5s were all removed. In the future, instead of removing md5s (putting it in 'parameterization' mode), you should instead use @Test{enabled=false} since it's easier to track. --- .../walkers/genotyper/UGCallVariants.java | 20 +++++++--------- .../walkers/qc/RodSystemValidationWalker.java | 7 +++++- .../variantutils/ValidateVariants.java | 8 +++++-- .../SelectVariantsIntegrationTest.java | 4 ++-- .../ValidateVariantsIntegrationTest.java | 2 +- .../VariantContextIntegrationTest.java | 23 +++++++++---------- 6 files changed, 34 insertions(+), 30 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java index d91f8d2e4..500b11360 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java @@ -25,7 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -51,6 +53,9 @@ public class UGCallVariants extends RodWalker { @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); + @Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true) + public List> variants; + // control the output @Output(doc="File to which variants should be written",required=true) protected VCFWriter writer = null; @@ -63,13 +68,8 @@ public class UGCallVariants extends RodWalker { public void initialize() { - for ( ReferenceOrderedDataSource d : getToolkit().getRodDataSources() ) { - if ( d.getName().startsWith("variant") ) - trackNames.add(d.getName()); - } - if ( trackNames.size() == 0 ) - throw new UserException("At least one track bound to a name beginning with 'variant' must be provided."); - + for ( RodBinding rb : variants ) + trackNames.add(rb.getName()); Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), trackNames); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples); @@ -93,11 +93,7 @@ public class UGCallVariants extends RodWalker { if ( tracker == null ) return null; - List VCs = new ArrayList(); - for ( String name : trackNames ) { - VariantContext vc = tracker.getFirstValue(VariantContext.class, name, context.getLocation()); - VCs.add(vc); - } + List VCs = tracker.getValues(variants, context.getLocation()); VariantContext mergedVC = mergeVCsWithGLs(VCs); if ( mergedVC == null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java index edfaea768..1c24f3879 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/RodSystemValidationWalker.java @@ -25,7 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.qc; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; @@ -54,6 +56,9 @@ public class RodSystemValidationWalker extends RodWalker { // the divider to use in some of the text output private static final String DIVIDER = ","; + @Input(fullName="eval", shortName = "eval", doc="Input VCF eval file", required=true) + public List> eval; + @Output public PrintStream out; @@ -108,7 +113,7 @@ public class RodSystemValidationWalker extends RodWalker { // if the argument was set, check for equivalence if (allRecordsVariantContextEquivalent && tracker != null) { - Collection col = tracker.getValues(VariantContext.class); + Collection col = tracker.getValues(eval); VariantContext con = null; for (VariantContext contextInList : col) if (con == null) con = contextInList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index c3e7dbe0c..5c7fb268c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -29,6 +29,7 @@ import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -55,6 +56,9 @@ public class ValidateVariants extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + @ArgumentCollection + protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + public enum ValidationType { ALL, REF, IDS, ALLELES, CHR_COUNTS } @@ -137,8 +141,8 @@ public class ValidateVariants extends RodWalker { // get the RS IDs Set rsIDs = null; - if ( tracker.hasValues(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { - List dbsnpList = tracker.getValues(Feature.class, DbSNPHelper.STANDARD_DBSNP_TRACK_NAME); + if ( tracker.hasValues(dbsnp.dbsnp) ) { + List dbsnpList = tracker.getValues(dbsnp.dbsnp, ref.getLocus()); rsIDs = new HashSet(); for ( Object d : dbsnpList ) { if (d instanceof DbSNPFeature ) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index b2ac3f4a6..bec0d5dd4 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -55,7 +55,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "NA12878.hg19.example1.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant:VCF " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a") ); @@ -68,7 +68,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String testFile = validationDataLocation + "combine.3.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + b36KGReference + " -sn NA12892 -B:variant,VCF " + testFile + " -o %s -NO_HEADER", + "-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s -NO_HEADER", 1, Arrays.asList("") ); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index 4d5f0359d..adf3b21a8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -95,7 +95,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { @Test public void testBadID() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString("validationExampleBad.vcf", "IDS") + " -B:dbsnp,vcf " + b36dbSNP129, + baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129, 0, UserException.MalformedFile.class ); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java index 7cdb6af95..67fe7d012 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextIntegrationTest.java @@ -15,8 +15,7 @@ public class VariantContextIntegrationTest extends WalkerTest { " -R " + b36KGReference; private static String root = cmdRoot + - " -L 1:1-1,000,000 -B:dbsnp,vcf " + b36dbSNP129 + - " -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf"; + " -L 1:1-1,000,000 -V " + b36dbSNP129; private static final class VCITTest extends TestDataProvider { String args, md5; @@ -30,15 +29,15 @@ public class VariantContextIntegrationTest extends WalkerTest { @DataProvider(name = "VCITTestData") public Object[][] createVCITTestData() { - new VCITTest("--printPerLocus", ""); - new VCITTest("--printPerLocus --onlyContextsOfType SNP", ""); - new VCITTest("--printPerLocus --onlyContextsOfType INDEL", ""); - new VCITTest("--printPerLocus --onlyContextsOfType MIXED", ""); - new VCITTest("--printPerLocus --onlyContextsOfType NO_VARIATION", ""); - new VCITTest("--printPerLocus --takeFirstOnly", ""); - new VCITTest("--printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", ""); - new VCITTest("--printPerLocus --onlyContextsStartinAtCurrentPosition", ""); - new VCITTest("--printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", ""); + new VCITTest("--printPerLocus", "e9d0f1fe80659bb55b40aa6c3a2e921e"); + new VCITTest("--printPerLocus --onlyContextsOfType SNP", "0e620db3e45771df42c54a9c0ae4a29f"); + new VCITTest("--printPerLocus --onlyContextsOfType INDEL", "b725c204fefe3814644d50e7c20f9dfe"); + new VCITTest("--printPerLocus --onlyContextsOfType MIXED", "3ccc33f496a1718df55722d11cc14334"); + new VCITTest("--printPerLocus --onlyContextsOfType NO_VARIATION", "39335acdb34c8a2af433dc50d619bcbc"); + new VCITTest("--printPerLocus --takeFirstOnly", "3a45561da042b2b44b6a679744f16103"); + new VCITTest("--printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "4746f269ecc377103f83eb61cc162c39"); + new VCITTest("--printPerLocus --onlyContextsStartinAtCurrentPosition", "2749e3fae458650a85a2317e346dc44c"); + new VCITTest("--printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "9bd48c2a40813023e29ffaa23d59d382"); return VCITTest.getTests(VCITTest.class); } @@ -58,7 +57,7 @@ public class VariantContextIntegrationTest extends WalkerTest { public void testToVCF() { // this really just tests that we are seeing the same number of objects over all of chr1 - WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -B:vcf,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s", + WalkerTestSpec spec = new WalkerTestSpec( cmdRoot + " -NO_HEADER -V:VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.500.vcf -L 1:1-1000000 -o %s --outputVCF %s", 2, // just one output file Arrays.asList("e3c35d0c4b5d4935c84a270f9df0951f", "ff91731213fd0bbdc200ab6fd1c93e63")); executeTest("testToVCF", spec); From b0ff5b1ff7995e819f2fd14c9eef85d06f1d436a Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Aug 2011 16:16:53 -0400 Subject: [PATCH 186/186] a better name for the pacbio processing pipeline --- ...alibrateBaseQualities.scala => PacbioProcessingPipeline.scala} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename public/scala/qscript/org/broadinstitute/sting/queue/qscripts/{RecalibrateBaseQualities.scala => PacbioProcessingPipeline.scala} (100%) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala similarity index 100% rename from public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala rename to public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala