diff --git a/public/java/src/org/broadinstitute/sting/commandline/Advanced.java b/public/java/src/org/broadinstitute/sting/commandline/Advanced.java new file mode 100644 index 000000000..7aeefe261 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/commandline/Advanced.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.commandline; + +import java.lang.annotation.*; + +/** + * Indicates that a walker argument should is considered an advanced option. + * + * @author Mark DePristo + * @version 0.1 + */ +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.TYPE,ElementType.FIELD}) +public @interface Advanced { +} diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java index e0e2ac378..8ec0d650a 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java @@ -151,6 +151,14 @@ public class ArgumentSource { return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class); } + /** + * Is the given argument considered an advanced option when displaying on the command-line argument system. + * @return True if so. False otherwise. + */ + public boolean isAdvanced() { + return field.isAnnotationPresent(Advanced.class); + } + /** * Is this command-line argument dependent on some primitive argument types? * @return True if this command-line argument depends on other arguments; false otherwise. diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 02af884a2..b12ae8e75 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -325,7 +325,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) { - Class parameterType = getParameterizedTypeClass(type); + Class parameterType = JVMUtils.getParameterizedTypeClass(type); return RodBinding.makeUnbound((Class)parameterType); } @@ -338,6 +338,8 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); String value = getArgumentValue( defaultDefinition, matches ); + Class parameterType = JVMUtils.getParameterizedTypeClass(type); + try { String name = defaultDefinition.fullName; String tribbleType = null; @@ -373,18 +375,18 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { if ( featureDescriptor != null ) { tribbleType = featureDescriptor.getName(); logger.warn("Dynamically determined type of " + file + " to be " + tribbleType); - } else { - throw new UserException.CommandLineException( - String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + - "Please add an explicit type tag :TYPE listing the correct type from among the supported types: %s", - manager.userFriendlyListOfAvailableFeatures())); } } + + if ( tribbleType == null ) + throw new UserException.CommandLineException( + String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " + + "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s", + manager.userFriendlyListOfAvailableFeatures(parameterType))); } } Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); - Class parameterType = getParameterizedTypeClass(type); RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); parsingEngine.addTags(result,tags); parsingEngine.addRodBinding(result); @@ -399,16 +401,6 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { value, source.field.getName(), e.getMessage())); } } - - private Class getParameterizedTypeClass(Type t) { - if ( t instanceof ParameterizedType ) { - ParameterizedType parameterizedType = (ParameterizedType)t; - if ( parameterizedType.getActualTypeArguments().length != 1 ) - throw new ReviewedStingException("BUG: more than 1 generic type found on class" + t); - return (Class)parameterizedType.getActualTypeArguments()[0]; - } else - throw new ReviewedStingException("BUG: could not find generic type on class " + t); - } } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 8a13dadbf..b8488dc9a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.filters.ReadFilter; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.walkers.Attribution; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -49,7 +50,7 @@ import java.util.*; @DocumentedGATKFeature( groupName = "GATK Engine", summary = "Features and arguments for the GATK engine itself, available to all walkers.", - extraDocs = { ReadFilter.class, UserException.class }) + extraDocs = { UserException.class }) public class CommandLineGATK extends CommandLineExecutable { @Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run") private String analysisName = null; @@ -174,6 +175,10 @@ public class CommandLineGATK extends CommandLineExecutable { StringBuilder additionalHelp = new StringBuilder(); Formatter formatter = new Formatter(additionalHelp); + formatter.format("Available Reference Ordered Data types:%n"); + formatter.format(new FeatureManager().userFriendlyListOfAvailableFeatures()); + formatter.format("%n"); + formatter.format("For a full description of this walker, see its GATKdocs at:%n"); formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java rename to public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java index cd77a9e7e..4ec451567 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java @@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord; * Filter out FailsVendorQualityCheck reads. */ -public class FailsVendorQualityCheckReadFilter extends ReadFilter { +public class FailsVendorQualityCheckFilter extends ReadFilter { public boolean filterOut( final SAMRecord read ) { return read.getReadFailsVendorQualityCheckFlag(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityFilter.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityFilter.java index 75369b306..ed9c37dca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityFilter.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.commandline.Argument; * @version 0.1 */ -public class MappingQualityReadFilter extends ReadFilter { +public class MappingQualityFilter extends ReadFilter { @Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false) public int MIN_MAPPING_QUALTY_SCORE = 10; diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableFilter.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableFilter.java index 1afec36d1..ccdb40d31 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableFilter.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.QualityUtils; * @version 0.1 */ -public class MappingQualityUnavailableReadFilter extends ReadFilter { +public class MappingQualityUnavailableFilter extends ReadFilter { public boolean filterOut(SAMRecord rec) { return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroFilter.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroFilter.java index e49d4117c..57db8419c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroFilter.java @@ -33,7 +33,7 @@ import net.sf.samtools.SAMRecord; * @version 0.1 */ -public class MappingQualityZeroReadFilter extends ReadFilter { +public class MappingQualityZeroFilter extends ReadFilter { public boolean filterOut(SAMRecord rec) { return (rec.getMappingQuality() == 0); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java rename to public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java index 31c2144ce..50cd30f71 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java @@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord; * Filter out duplicate reads. */ -public class NotPrimaryAlignmentReadFilter extends ReadFilter { +public class NotPrimaryAlignmentFilter extends ReadFilter { public boolean filterOut( final SAMRecord read ) { return read.getNotPrimaryAlignmentFlag(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index 26a400071..c99aea254 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -36,7 +36,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.GATKDocUtils; +import org.broadinstitute.sting.utils.help.HelpUtils; +import javax.mail.Header; import java.io.File; import java.util.*; @@ -50,7 +53,7 @@ import java.util.*; * @author depristo */ public class FeatureManager { - public static class FeatureDescriptor { + public static class FeatureDescriptor implements Comparable { final String name; final FeatureCodec codec; @@ -62,6 +65,7 @@ public class FeatureManager { public String getName() { return name; } + public String getSimpleFeatureName() { return getFeatureClass().getSimpleName(); } public FeatureCodec getCodec() { return codec; } @@ -70,13 +74,18 @@ public class FeatureManager { @Override public String toString() { - return String.format("FeatureDescriptor name=%s codec=%s feature=%s", getName(), getCodecClass().getName(), getFeatureClass().getName()); + return String.format("FeatureDescriptor name=%s codec=%s feature=%s", + getName(), getCodecClass().getName(), getFeatureClass().getName()); + } + + @Override + public int compareTo(FeatureDescriptor o) { + return getName().compareTo(o.getName()); } } private final PluginManager pluginManager; - private final Collection featureDescriptors = new HashSet(); - + private final Collection featureDescriptors = new TreeSet(); /** * Construct a FeatureManager @@ -114,7 +123,7 @@ public class FeatureManager { */ @Requires("featureClass != null") public Collection getByFeature(Class featureClass) { - Set consistentDescriptors = new HashSet(); + Set consistentDescriptors = new TreeSet(); if (featureClass == null) throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object"); @@ -189,10 +198,40 @@ public class FeatureManager { */ @Ensures("result != null") public String userFriendlyListOfAvailableFeatures() { - List names = new ArrayList(); - for ( final FeatureDescriptor descriptor : featureDescriptors ) - names.add(descriptor.getName()); - return Utils.join(",", names); + return userFriendlyListOfAvailableFeatures(Feature.class); + } + + /** + * Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load + * restricted to only Codecs producting Features consistent with the requiredFeatureType + * @return + */ + @Ensures("result != null") + public String userFriendlyListOfAvailableFeatures(Class requiredFeatureType) { + final String nameHeader="Name", featureHeader = "FeatureType", docHeader="Documentation"; + + int maxNameLen = nameHeader.length(), maxFeatureNameLen = featureHeader.length(); + for ( final FeatureDescriptor descriptor : featureDescriptors ) { + if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) { + maxNameLen = Math.max(maxNameLen, descriptor.getName().length()); + maxFeatureNameLen = Math.max(maxFeatureNameLen, descriptor.getSimpleFeatureName().length()); + } + } + + StringBuilder docs = new StringBuilder(); + String format = "%" + maxNameLen + "s %" + maxFeatureNameLen + "s %s%n"; + docs.append(String.format(format, nameHeader, featureHeader, docHeader)); + for ( final FeatureDescriptor descriptor : featureDescriptors ) { + if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) { + String oneDoc = String.format(format, + descriptor.getName(), + descriptor.getSimpleFeatureName(), + GATKDocUtils.helpLinksToGATKDocs(descriptor.getCodecClass())); + docs.append(oneDoc); + } + } + + return docs.toString(); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java index 4bfedb672..e2db1dc52 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; +import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.utils.GenomeLoc; @@ -17,7 +17,7 @@ import java.util.Set; * To change this template use File | Settings | File Templates. */ @Requires({DataSource.READS,DataSource.REFERENCE}) -@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class}) +@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class}) public abstract class DuplicateWalker extends Walker { // Do we actually want to operate on the context? public boolean filter(GenomeLoc loc, AlignmentContext context, Set> readSets ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index b0b2687f4..8152f74c2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.walkers; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; -import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter; -import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; +import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter; +import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -18,7 +18,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @By(DataSource.READS) @Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) @PartitionBy(PartitionType.INTERVAL) -@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class}) +@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 83a8ce7d7..70f3c6a1a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -44,7 +44,9 @@ import java.util.Set; public abstract class AlleleFrequencyCalculationModel implements Cloneable { public enum Model { + /** The default model with the best performance in all cases */ EXACT, + /** For posterity we have kept around the older GRID_SEARCH model, but this gives inferior results and shouldn't be used. */ GRID_SEARCH } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 594c1dd28..60dfe4fe7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -53,7 +53,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { } public enum GENOTYPING_MODE { + /** the default; the Unified Genotyper will choose the most likely alternate allele */ DISCOVERY, + /** only the alleles passed in from a VCF rod bound to the -alleles argument will be used for genotyping */ GENOTYPE_GIVEN_ALLELES } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 1a76bfd07..e7f89bf08 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -36,31 +36,54 @@ import java.io.File; public class UnifiedArgumentCollection { - // control the various models to be used @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false) public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; + /** + * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus. + */ @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false) public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; + /** + * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are: + * het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2 + */ @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY; @Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false) public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE; + /** + * Specifies how to determine the alternate allele to use for genotyping + */ @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false) public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false) public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; + /** + * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with + * confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this + * is the default). Note that the confidence (QUAL) values for multi-sample low-pass (e.g. 4x per sample) calling might + * be significantly smaller with the new EXACT model than with our older GRID_SEARCH model, as the latter tended to + * over-estimate the confidence; for low-pass calling we tend to use much smaller thresholds (e.g. 4). + */ @Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false) public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0; + /** + * the minimum phred-scaled Qscore threshold to emit low confidence calls. Genotypes with confidence >= this but less + * than the calling threshold are emitted but marked as filtered. + */ @Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false) public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0; + /** + * This argument is not enabled by default because it increases the runtime by an appreciable amount. + */ @Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false) public boolean COMPUTE_SLOD = false; @@ -80,7 +103,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "abort_at_too_much_coverage", doc = "Don't call a site if the downsampled coverage is greater than this value", required = false) public int COVERAGE_AT_WHICH_TO_ABORT = -1; - // control the various parameters to be used @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false) public int MIN_BASE_QUALTY_SCORE = 17; @@ -91,11 +113,17 @@ public class UnifiedArgumentCollection { @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false) public Double MAX_DELETION_FRACTION = 0.05; - // indel-related arguments + /** + * A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site. + * Decreasing this value will increase sensitivity but at the cost of larger calling time and a larger number of false positives. + */ @Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false) public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5; + /** + * This argument informs the prior probability of having an indel at a site. + */ @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false) public double INDEL_HETEROZYGOSITY = 1.0/8000; @@ -126,22 +154,23 @@ public class UnifiedArgumentCollection { @Hidden @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false) public boolean OUTPUT_DEBUG_INDEL_INFO = false; + @Hidden @Argument(fullName = "dovit", shortName = "dovit", doc = "Output indel debug info", required = false) public boolean dovit = false; + @Hidden @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false) public boolean GSA_PRODUCTION_ONLY = false; + @Hidden - @Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false) public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL; @Hidden - @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) + @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) public boolean IGNORE_SNP_ALLELES = false; - @Deprecated @Argument(fullName="output_all_callable_bases", shortName="all_bases", doc="Please use --output_mode EMIT_ALL_SITES instead" ,required=false) private Boolean ALL_BASES_DEPRECATED = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index d31bb6fb9..d5dbdedd6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.BadMateFilter; -import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; @@ -45,13 +45,73 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; import java.util.*; - /** - * A variant caller which unifies the approaches of several disparate callers. Works for single-sample and - * multi-sample data. The user can choose from several different incorporated calculation models. + * A variant caller which unifies the approaches of several disparate callers -- Works for single-sample and multi-sample data. + * + *

+ * The GATK Unified Genotyper is a multiple-sample, technology-aware SNP and indel caller. It uses a Bayesian genotype + * likelihood model to estimate simultaneously the most likely genotypes and allele frequency in a population of N samples, + * emitting an accurate posterior probability of there being a segregating variant allele at each locus as well as for the + * genotype of each sample. The system can either emit just the variant sites or complete genotypes (which includes + * homozygous reference calls) satisfying some phred-scaled confidence value. The genotyper can make accurate calls on + * both single sample data and multi-sample data. + * + *

Input

+ *

+ * The read data from which to make variant calls. + *

+ * + *

Output

+ *

+ * A raw, unfiltered, highly specific callset in VCF format. + *

+ * + *

Example generic command for multi-sample SNP calling

+ *
+ * java -jar GenomeAnalysisTK.jar \
+ *   -R resources/Homo_sapiens_assembly18.fasta \
+ *   -T UnifiedGenotyper \
+ *   -I sample1.bam [-I sample2.bam ...] \
+ *   --dbsnp dbSNP.vcf \
+ *   -o snps.raw.vcf \
+ *   -stand_call_conf [50.0] \
+ *   -stand_emit_conf 10.0 \
+ *   -dcov [50] \
+ *   [-L targets.interval_list]
+ * 
+ * + *

+ * The above command will call all of the samples in your provided BAM files [-I arguments] together and produce a VCF file + * with sites and genotypes for all samples. The easiest way to get the dbSNP file is from the GATK resource bundle. Several + * arguments have parameters that should be chosen based on the average coverage per sample in your data. See the detailed + * argument descriptions below. + *

+ * + *

Example command for generating calls at all sites

+ *
+ * java -jar /path/to/GenomeAnalysisTK.jar \
+ *   -l INFO \
+ *   -R resources/Homo_sapiens_assembly18.fasta \
+ *   -T UnifiedGenotyper \
+ *   -I /DCC/ftp/pilot_data/data/NA12878/alignment/NA12878.SLX.maq.SRP000031.2009_08.bam \
+ *   -o my.vcf \
+ *   --output_mode EMIT_ALL_SITES
+ * 
+ * + *

Caveats

+ *
    + *
  • The system is under active and continuous development. All outputs, the underlying likelihood model, arguments, and + * file formats are likely to change.
  • + *
  • The system can be very aggressive in calling variants. In the 1000 genomes project for pilot 2 (deep coverage of ~35x) + * we expect the raw Qscore > 50 variants to contain at least ~10% FP calls. We use extensive post-calling filters to eliminate + * most of these FPs. Variant Quality Score Recalibration is a tool to perform this filtering.
  • + *
  • We only handle diploid genotypes
  • + *
+ * */ + @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) -@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} ) +@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} ) @Reference(window=@Window(start=-200,stop=200)) @By(DataSource.REFERENCE) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) @@ -61,10 +121,9 @@ public class UnifiedGenotyper extends LocusWalker getDbsnpRodBinding() { return dbsnp.dbsnp; } @@ -72,7 +131,9 @@ public class UnifiedGenotyper extends LocusWalker> getCompRodBindings() { return Collections.emptyList(); } public List> getResourceRodBindings() { return Collections.emptyList(); } - // control the output + /** + * A raw, unfiltered, highly specific callset in VCF format. + */ @Output(doc="File to which variants should be written",required=true) protected VCFWriter writer = null; @@ -82,9 +143,15 @@ public class UnifiedGenotyper extends LocusWalker annotationsToUse = new ArrayList(); + /** + * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups. + */ @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false) protected String[] annotationClassesToUse = { "Standard" }; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index b3f77fc06..06455df6d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -51,8 +51,11 @@ public class UnifiedGenotyperEngine { public static final String LOW_QUAL_FILTER_NAME = "LowQual"; public enum OUTPUT_MODE { + /** the default */ EMIT_VARIANTS_ONLY, + /** include confident reference sites */ EMIT_ALL_CONFIDENT_SITES, + /** any callable site regardless of confidence */ EMIT_ALL_SITES } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 129be7f55..8680f3537 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -178,6 +178,7 @@ public class IndelRealigner extends ReadWalker { * will only proceed with the realignment (even above the given threshold) if it minimizes entropy among the reads (and doesn't simply * push the mismatch column to another position). This parameter is just a heuristic and should be adjusted based on your particular data set. */ + @Advanced @Argument(fullName="entropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false) protected double MISMATCH_THRESHOLD = 0.15; @@ -185,30 +186,35 @@ public class IndelRealigner extends ReadWalker { * For expert users only! To minimize memory consumption you can lower this number (but then the tool may skip realignment on regions with too much coverage; * and if the number is too low, it may generate errors during realignment). Just make sure to give Java enough memory! 4Gb should be enough with the default value. */ + @Advanced @Argument(fullName="maxReadsInMemory", shortName="maxInMemory", doc="max reads allowed to be kept in memory at a time by the SAMFileWriter", required=false) protected int MAX_RECORDS_IN_MEMORY = 150000; /** * For expert users only! */ + @Advanced @Argument(fullName="maxIsizeForMovement", shortName="maxIsize", doc="maximum insert size of read pairs that we attempt to realign", required=false) protected int MAX_ISIZE_FOR_MOVEMENT = 3000; /** * For expert users only! */ + @Advanced @Argument(fullName="maxPositionalMoveAllowed", shortName="maxPosMove", doc="maximum positional move in basepairs that a read can be adjusted during realignment", required=false) protected int MAX_POS_MOVE_ALLOWED = 200; /** * For expert users only! If you need to find the optimal solution regardless of running time, use a higher number. */ + @Advanced @Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false) protected int MAX_CONSENSUSES = 30; /** * For expert users only! If you need to find the optimal solution regardless of running time, use a higher number. */ + @Advanced @Argument(fullName="maxReadsForConsensuses", shortName="greedy", doc="max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)", required=false) protected int MAX_READS_FOR_CONSENSUSES = 120; @@ -216,9 +222,11 @@ public class IndelRealigner extends ReadWalker { * For expert users only! If this value is exceeded at a given interval, realignment is not attempted and the reads are passed to the output file(s) as-is. * If you need to allow more reads (e.g. with very deep coverage) regardless of memory, use a higher number. */ + @Advanced @Argument(fullName="maxReadsForRealignment", shortName="maxReads", doc="max reads allowed at an interval for realignment", required=false) protected int MAX_READS = 20000; + @Advanced @Argument(fullName="noOriginalAlignmentTags", shortName="noTags", required=false, doc="Don't output the original cigar or alignment start tags for each realigned read in the output bam") protected boolean NO_ORIGINAL_ALIGNMENT_TAGS = false; @@ -226,6 +234,7 @@ public class IndelRealigner extends ReadWalker { * For expert users only! This tool assumes that the target interval list is sorted; if the list turns out to be unsorted, it will throw an exception. * Use this argument when your interval list is not sorted to instruct the Realigner to first sort it in memory. */ + @Advanced @Argument(fullName="targetIntervalsAreNotSorted", shortName="targetNotSorted", required=false, doc="The target intervals are not sorted") protected boolean TARGET_NOT_SORTED = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 49916e1a2..bede50a0b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.BadCigarFilter; import org.broadinstitute.sting.gatk.filters.BadMateFilter; -import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; @@ -98,7 +98,7 @@ import java.util.List; * * @author ebanks */ -@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, BadCigarFilter.class}) +@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, BadCigarFilter.class}) @Reference(window=@Window(start=-1,stop=50)) @Allows(value={DataSource.READS, DataSource.REFERENCE}) @By(DataSource.REFERENCE) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 9f6ac2a91..546bbe1a6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; -import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter; import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper; @@ -78,7 +78,7 @@ import java.util.*; * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords. */ -@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class}) +@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class}) public class SomaticIndelDetectorWalker extends ReadWalker { // @Output // PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 34c7912d9..17a6e20f1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.sample.Sample; -import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; @@ -91,7 +91,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr @By(DataSource.READS) // Filter out all reads with zero mapping quality -@ReadFilters({MappingQualityZeroReadFilter.class}) +@ReadFilters({MappingQualityZeroFilter.class}) public class ReadBackedPhasingWalker extends RodWalker { private static final boolean DEBUG = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 5ffc61fe3..98c8950e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -29,8 +29,8 @@ import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; -import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; +import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; @@ -68,6 +68,8 @@ import java.util.Map; * *

Input

*

+ * The input read data whose base quality scores need to be assessed. + *

* A database of known polymorphic sites to skip over. *

* @@ -95,7 +97,7 @@ import java.util.Map; @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file -@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality +@ReadFilters( {MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class} ) // Filter out all reads with zero or unavailable mapping quality @Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta @PartitionBy(PartitionType.LOCUS) public class CountCovariatesWalker extends LocusWalker implements TreeReducible { @@ -134,6 +136,10 @@ public class CountCovariatesWalker extends LocusWalkerInput *

+ * The input read data whose base quality scores need to be recalibrated. + *

* The recalibration table file in CSV format that was generated by the CountCovariates walker. *

* diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index d81a57aad..517c2362a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -157,6 +157,10 @@ public class VariantRecalibrator extends RodWalker alleles = new HashSet(); - alleles.add(ref); - if ( alt != null ) - alleles.add(alt); - - HashMap attrs = new HashMap(); - String id = array[array.length - 1]; - if ( id.indexOf("dbsnp") != -1 ) { - attrs.put(VariantContext.ID_KEY, parseID(id)); - } - - // create a new feature given the array - return new VariantContext("CGI", array[3], start, end, alleles, VariantContext.NO_NEG_LOG_10PERROR, null, attrs); - } - - public Class getFeatureType() { - return VariantContext.class; - } - - // There's no spec and no character to distinguish header lines... - private final static int NUM_HEADER_LINES = 12; - public Object readHeader(LineReader reader) { - return null; - - //String headerLine = null; - //try { - // for (int i = 0; i < NUM_HEADER_LINES; i++) - // headerLine = reader.readLine(); - //} catch (IOException e) { - // throw new IllegalArgumentException("Unable to read a line from the line reader"); - //} - //return headerLine; - } - - private static final Pattern DBSNP_PATTERN = Pattern.compile("^dbsnp\\.\\d+:(.*)"); - private String parseID(String raw) { - StringBuilder sb = null; - - String[] ids = raw.split(";"); - for ( String id : ids ) { - Matcher matcher = DBSNP_PATTERN.matcher(id); - if ( matcher.matches() ) { - String rsID = matcher.group(1); - if ( sb == null ) { - sb = new StringBuilder(rsID); - } else { - sb.append(";"); - sb.append(rsID); - } - } - } - - return sb == null ? null : sb.toString(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java deleted file mode 100755 index 284c43e90..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java +++ /dev/null @@ -1,209 +0,0 @@ -package org.broadinstitute.sting.utils.codecs.soapsnp; - -import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; -import org.broad.tribble.NameAwareCodec; -import org.broad.tribble.TribbleException; -import org.broad.tribble.exception.CodecLineParsingException; -import org.broad.tribble.readers.LineReader; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.*; - -/** - * @author depristo - *

- * a codec for parsing soapsnp files (see http://soap.genomics.org.cn/soapsnp.html#usage2) - *

- * - * A simple text file format with the following whitespace separated fields: - * -1) Chromosome ID -2) Coordinate on chromosome, start from 1 -3) Reference genotype -4) Consensus genotype -5) Quality score of consensus genotype -6) Best base -7) Average quality score of best base -8) Count of uniquely mapped best base -9) Count of all mapped best base -10) Second best bases -11) Average quality score of second best base -12) Count of uniquely mapped second best base -13) Count of all mapped second best base -14) Sequencing depth of the site -15) Rank sum test p_value -16) Average copy number of nearby region -17) Whether the site is a dbSNP. - */ -public class SoapSNPCodec implements FeatureCodec, NameAwareCodec { - private String[] parts; - - // we store a name to give to each of the variant contexts we emit - private String name = "Unknown"; - - public Feature decodeLoc(String line) { - return decode(line); - } - - /** - * Decode a line as a Feature. - * - * @param line - * - * @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is - * a comment) - */ - public Feature decode(String line) { - try { - // parse into lines - parts = line.trim().split("\\s+"); - - // check that we got the correct number of tokens in the split - if (parts.length != 18) - throw new CodecLineParsingException("Invalid SoapSNP row found -- incorrect element count. Expected 18, got " + parts.length + " line = " + line); - - String contig = parts[0]; - long start = Long.valueOf(parts[1]); - AlleleAndGenotype allelesAndGenotype = parseAlleles(parts[2], parts[3], line); - - double negLog10PError = Integer.valueOf(parts[4]) / 10.0; - - Map attributes = new HashMap(); - attributes.put("BestBaseQ", parts[6]); - attributes.put("SecondBestBaseQ", parts[10]); - attributes.put("RankSumP", parts[15]); - // add info to keys - - //System.out.printf("Alleles = " + allelesAndGenotype.alleles); - //System.out.printf("genotype = " + allelesAndGenotype.genotype); - - VariantContext vc = new VariantContext(name, contig, start, start, allelesAndGenotype.alleles, allelesAndGenotype.genotype, negLog10PError, VariantContext.PASSES_FILTERS, attributes); - - //System.out.printf("line = %s%n", line); - //System.out.printf("vc = %s%n", vc); - - return vc; - } catch (CodecLineParsingException e) { - throw new TribbleException("Unable to parse line " + line,e); - } catch (NumberFormatException e) { - e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. - throw new TribbleException("Unable to parse line " + line,e); - } - } - - private static class AlleleAndGenotype { - Collection alleles; - Collection genotype; - - public AlleleAndGenotype(Collection alleles, Genotype genotype) { - this.alleles = alleles; - this.genotype = new HashSet(); - this.genotype.add(genotype); - } - } - - private AlleleAndGenotype parseAlleles(String ref, String consensusGenotype, String line) { - /* A Adenine - C Cytosine - G Guanine - T (or U) Thymine (or Uracil) - R A or G - Y C or T - S G or C - W A or T - K G or T - M A or C - B C or G or T - D A or G or T - H A or C or T - V A or C or G - N any base - . or - gap - */ - if ( ref.equals(consensusGenotype) ) - throw new TribbleException.InternalCodecException("Ref base and consensus genotype are the same " + ref); - - Allele refAllele = Allele.create(ref, true); - List genotypeAlleles = null; - - char base = consensusGenotype.charAt(0); - - switch ( base ) { - case 'A': case 'C': case 'G': case 'T': - Allele a = Allele.create(consensusGenotype); - genotypeAlleles = Arrays.asList(a, a); - break; - case 'R': case 'Y': case 'S': case 'W': case 'K': case 'M': - genotypeAlleles = determineAlt(refAllele, ref.charAt(0), base); - break; - default: - throw new TribbleException("Unexpected consensus genotype " + consensusGenotype + " at line = " + line); - } - - - Collection alleles = new HashSet(genotypeAlleles); - alleles.add(refAllele); - Genotype genotype = new Genotype("unknown", genotypeAlleles); // todo -- probably should include genotype quality - - return new AlleleAndGenotype( alleles, genotype ); - } - - private static final Map IUPAC_SNPS = new HashMap(); - static { - IUPAC_SNPS.put('R', "AG"); - IUPAC_SNPS.put('Y', "CT"); - IUPAC_SNPS.put('S', "GC"); - IUPAC_SNPS.put('W', "AT"); - IUPAC_SNPS.put('K', "GT"); - IUPAC_SNPS.put('M', "AC"); - } - - private List determineAlt(Allele ref, char refbase, char alt) { - String alts = IUPAC_SNPS.get(alt); - if ( alts == null ) - throw new IllegalStateException("BUG: unexpected consensus genotype " + alt); - - Allele a1 = alts.charAt(0) == refbase ? ref : Allele.create((byte)alts.charAt(0)); - Allele a2 = alts.charAt(1) == refbase ? ref : Allele.create((byte)alts.charAt(1)); - - //if ( a1 != ref && a2 != ref ) - // throw new IllegalStateException("BUG: unexpected consensus genotype " + alt + " does not contain the reference base " + ref); - - return Arrays.asList(a1, a2); - } - - /** - * @return VariantContext - */ - public Class getFeatureType() { - return VariantContext.class; - } - - public Object readHeader(LineReader reader) { - - return null; // we don't have a meaningful header - } - - /** - * get the name of this codec - * @return our set name - */ - public String getName() { - return name; - } - - /** - * set the name of this codec - * @param name new name - */ - public void setName(String name) { - this.name = name; - } - - public static void main(String[] args) { - System.out.printf("Testing " + args[0]); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java index 710503ca8..89163dfcb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java @@ -39,6 +39,5 @@ public @interface DocumentedGATKFeature { public boolean enable() default true; public String groupName(); public String summary() default ""; - public Class handler() default GenericDocumentationHandler.class; public Class[] extraDocs() default {}; } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java new file mode 100644 index 000000000..66354202f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.help; + +/** + * Documentation unit. Effectively a class version of the DocumentedGATKFeature + * + * @author depristo + */ +class DocumentedGATKFeatureObject { + private final Class classToDoc; + private final boolean enable; + private final String groupName, summary; + private final Class[] extraDocs; + + public DocumentedGATKFeatureObject(Class classToDoc, final boolean enable, final String groupName, final String summary, final Class[] extraDocs) { + this.classToDoc = classToDoc; + this.enable = enable; + this.groupName = groupName; + this.summary = summary; + this.extraDocs = extraDocs; + } + + public DocumentedGATKFeatureObject(Class classToDoc, final String groupName, final String summary) { + this(classToDoc, true, groupName, summary, new Class[]{}); + } + + public Class getClassToDoc() { return classToDoc; } + public boolean enable() { return enable; } + public String groupName() { return groupName; } + public String summary() { return summary; } + public Class[] extraDocs() { return extraDocs; } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java index e2909cf15..983805c4d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java @@ -36,9 +36,9 @@ public class GATKDocUtils { public static String helpLinksToGATKDocs(Class c) { String classPath = htmlFilenameForClass(c); StringBuilder b = new StringBuilder(); - b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n"); - b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n"); - b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n"); + b.append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath); + //b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n"); + //b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n"); return b.toString(); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java index 1f6db2757..41c855329 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java @@ -51,7 +51,7 @@ class GATKDocWorkUnit implements Comparable { /** The javadoc documentation for clazz */ final ClassDoc classDoc; /** The annotation that lead to this Class being in GATKDoc */ - final DocumentedGATKFeature annotation; + final DocumentedGATKFeatureObject annotation; /** When was this walker built, and what's the absolute version number */ final String buildTimestamp, absoluteVersion; @@ -60,7 +60,7 @@ class GATKDocWorkUnit implements Comparable { Map forTemplate; public GATKDocWorkUnit(String name, String filename, String group, - DocumentedGATKFeature annotation, DocumentedGATKFeatureHandler handler, + DocumentedGATKFeatureObject annotation, DocumentedGATKFeatureHandler handler, ClassDoc classDoc, Class clazz, String buildTimestamp, String absoluteVersion) { this.annotation = annotation; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 8f3ec293a..5755d2b37 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -33,6 +33,7 @@ import freemarker.template.TemplateException; import org.apache.commons.io.FileUtils; import org.apache.log4j.Level; import org.apache.log4j.Logger; +import org.broad.tribble.FeatureCodec; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.*; @@ -50,6 +51,14 @@ public class GATKDoclet { RootDoc rootDoc; + final static Collection STATIC_DOCS = new ArrayList(); + static { + STATIC_DOCS.add(new DocumentedGATKFeatureObject(FeatureCodec.class, + "Reference ordered data (ROD) codecs", + "Tribble codecs for reading reference ordered data such as VCF or BED files")); + } + + /** * Extracts the contents of certain types of javadoc and adds them to an XML file. * @param rootDoc The documentation root. @@ -99,7 +108,7 @@ public class GATKDoclet { //if ( clazz != null && clazz.getName().equals("org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance")) // logger.debug("foo"); - DocumentedGATKFeature feature = getFeatureForClassDoc(doc); + DocumentedGATKFeatureObject feature = getFeatureForClassDoc(doc); DocumentedGATKFeatureHandler handler = createHandler(doc, feature); if ( handler != null && handler.includeInDocs(doc) ) { logger.info("Generating documentation for class " + doc); @@ -146,33 +155,36 @@ public class GATKDoclet { } } - private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeature feature) { - try { - if ( feature != null ) { - if ( feature.enable() ) { - DocumentedGATKFeatureHandler handler = feature.handler().newInstance(); - handler.setDoclet(this); - return handler; - } else { - logger.info("Skipping disabled Documentation for " + doc); - } + private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeatureObject feature) { + if ( feature != null ) { + if ( feature.enable() ) { + DocumentedGATKFeatureHandler handler = new GenericDocumentationHandler(); + handler.setDoclet(this); + return handler; + } else { + logger.info("Skipping disabled Documentation for " + doc); } - } catch ( IllegalAccessException e) { - throw new RuntimeException(e); // the constructor is now private -- this is an error - } catch ( InstantiationException e) { - throw new RuntimeException(e); // the constructor is now private -- this is an error } return null; } - private DocumentedGATKFeature getFeatureForClassDoc(ClassDoc doc) { - // todo -- what do I need the ? extends Object to pass the compiler? + private DocumentedGATKFeatureObject getFeatureForClassDoc(ClassDoc doc) { Class docClass = getClassForClassDoc(doc); - if ( docClass != null && docClass.isAnnotationPresent(DocumentedGATKFeature.class) ) { - return docClass.getAnnotation(DocumentedGATKFeature.class); - } else { + + if ( docClass == null ) return null; // not annotated so it shouldn't be documented + + if ( docClass.isAnnotationPresent(DocumentedGATKFeature.class) ) { + DocumentedGATKFeature f = docClass.getAnnotation(DocumentedGATKFeature.class); + return new DocumentedGATKFeatureObject(docClass, f.enable(), f.groupName(), f.summary(), f.extraDocs()); + } else { + for ( DocumentedGATKFeatureObject staticDocs : STATIC_DOCS ) { + if ( staticDocs.getClassToDoc().isAssignableFrom(docClass) ) { + return new DocumentedGATKFeatureObject(docClass, staticDocs.enable(), staticDocs.groupName(), staticDocs.summary(), staticDocs.extraDocs()); + } + } + return null; } } @@ -217,16 +229,15 @@ public class GATKDoclet { Collections.sort(indexData); - Set docFeatures = new HashSet(); + List> groups = new ArrayList>(); + Set seenDocumentationFeatures = new HashSet(); List> data = new ArrayList>(); for ( GATKDocWorkUnit workUnit : indexData ) { data.add(workUnit.indexDataMap()); - docFeatures.add(workUnit.annotation); - } - - List> groups = new ArrayList>(); - for ( DocumentedGATKFeature feature : docFeatures ) { - groups.add(toMap(feature)); + if ( ! seenDocumentationFeatures.contains(workUnit.annotation.groupName()) ) { + groups.add(toMap(workUnit.annotation)); + seenDocumentationFeatures.add(workUnit.annotation.groupName()); + } } root.put("data", data); @@ -237,7 +248,7 @@ public class GATKDoclet { return root; } - private static final Map toMap(DocumentedGATKFeature annotation) { + private static final Map toMap(DocumentedGATKFeatureObject annotation) { Map root = new HashMap(); root.put("name", annotation.groupName()); root.put("summary", annotation.summary()); diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 6ddf8a157..d7add9af0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -30,14 +30,18 @@ import com.sun.javadoc.FieldDoc; import com.sun.javadoc.RootDoc; import com.sun.javadoc.Tag; import org.apache.log4j.Logger; +import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; import java.io.*; -import java.lang.reflect.Field; +import java.lang.reflect.*; import java.util.*; /** @@ -52,13 +56,13 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { @Override public boolean includeInDocs(ClassDoc doc) { - return true; -// try { -// Class type = HelpUtils.getClassForDoc(doc); -// return JVMUtils.isConcrete(type); -// } catch ( ClassNotFoundException e ) { -// return false; -// } +// return true; + try { + Class type = HelpUtils.getClassForDoc(doc); + return JVMUtils.isConcrete(type); + } catch ( ClassNotFoundException e ) { + return false; + } } @@ -107,13 +111,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { // attempt to instantiate the class Object instance = makeInstanceIfPossible(toProcess.clazz); - Map> args = new HashMap>(); + Map>> args = new HashMap>>(); root.put("arguments", args); - args.put("all", new ArrayList()); - args.put("required", new ArrayList()); - args.put("optional", new ArrayList()); - args.put("hidden", new ArrayList()); - args.put("depreciated", new ArrayList()); + args.put("all", new ArrayList>()); + args.put("required", new ArrayList>()); + args.put("optional", new ArrayList>()); + args.put("advanced", new ArrayList>()); + args.put("hidden", new ArrayList>()); + args.put("depreciated", new ArrayList>()); try { for ( ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(classdoc)) ) { ArgumentDefinition argDef = argumentSource.createArgumentDefinitions().get(0); @@ -123,6 +128,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { logger.debug(String.format("Processing %s", argumentSource)); String kind = "optional"; if ( argumentSource.isRequired() ) kind = "required"; + else if ( argumentSource.isAdvanced() ) kind = "advanced"; else if ( argumentSource.isHidden() ) kind = "hidden"; else if ( argumentSource.isDeprecated() ) kind = "depreciated"; @@ -149,11 +155,37 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { logger.debug(String.format("Skipping hidden feature %s", argumentSource)); } } + + // sort the arguments + for (Map.Entry>> entry : args.entrySet()) { + entry.setValue(sortArguments(entry.getValue())); + } } catch ( ClassNotFoundException e ) { throw new RuntimeException(e); } } + private List> sortArguments(List> unsorted) { + Collections.sort(unsorted, new CompareArgumentsByName()); + return unsorted; + } + + private class CompareArgumentsByName implements Comparator> { + public int compare(Map x, Map y) { + return elt(x).compareTo(elt(y)); + } + + private String elt(Map m) { + String v = m.get("name").toString().toLowerCase(); + if ( v.startsWith("--") ) + return v.substring(2); + else if ( v.startsWith("-") ) + return v.substring(1); + else + throw new RuntimeException("Expect to see arguments beginning with at least one -, but found " + v); + } + } + private Object getFieldValue(Class c, Object instance, String fieldName) { Field field = JVMUtils.findField(c, fieldName); if ( field != null ) { @@ -255,20 +287,6 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { put("name", otherUnit.name);}}); } - - List> hierarchyDocs = new ArrayList>(); - for (final GATKDocWorkUnit other : all ) { - final String relation = classRelationship(toProcess.clazz, other.clazz); - if ( relation != null ) - hierarchyDocs.add( - new HashMap(){{ - put("filename", other.filename); - put("relation", relation); - put("name", other.name);}}); - - } - - root.put("relatedDocs", hierarchyDocs); root.put("extradocs", extraDocsData); } @@ -309,6 +327,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { return fieldDoc; Field field = HelpUtils.getFieldForFieldDoc(fieldDoc); + if ( field == null ) + throw new RuntimeException("Could not find the field corresponding to " + fieldDoc + ", presumably because the field is inaccessible"); if ( field.isAnnotationPresent(ArgumentCollection.class) ) { ClassDoc typeDoc = getRootDoc().classNamed(fieldDoc.type().qualifiedTypeName()); if ( typeDoc == null ) @@ -333,15 +353,82 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { return null; } + private static final int MAX_DISPLAY_NAME = 30; + Pair displayNames(String s1, String s2) { + if ( s1 == null ) return new Pair(s2, null); + if ( s2 == null ) return new Pair(s1, null); + + String l = s1.length() > s2.length() ? s1 : s2; + String s = s1.length() > s2.length() ? s2 : s1; + + if ( l.length() > MAX_DISPLAY_NAME ) + return new Pair(s, l); + else + return new Pair(l, s); + } + + protected String argumentTypeString(Type type) { + if (type instanceof ParameterizedType) { + ParameterizedType parameterizedType = (ParameterizedType)type; + List subs = new ArrayList(); + for (Type actualType: parameterizedType.getActualTypeArguments()) + subs.add(argumentTypeString(actualType)); + return argumentTypeString(((ParameterizedType)type).getRawType()) + "[" + Utils.join(",", subs) + "]"; + } else if (type instanceof GenericArrayType) { + return argumentTypeString(((GenericArrayType)type).getGenericComponentType()) + "[]"; + } else if (type instanceof WildcardType) { + throw new RuntimeException("We don't support wildcards in arguments: " + type); + } else if (type instanceof Class) { + return ((Class) type).getSimpleName(); + } else { + throw new StingException("Unknown type: " + type); + } + } + + protected Class getFeatureTypeIfPossible(Type type) { + if ( type instanceof ParameterizedType) { + ParameterizedType paramType = (ParameterizedType)type; + if ( RodBinding.class.isAssignableFrom((Class)paramType.getRawType()) ) { + return (Class)JVMUtils.getParameterizedTypeClass(type); + } else { + for ( Type paramtype : paramType.getActualTypeArguments() ) { + Class x = getFeatureTypeIfPossible(paramtype); + if ( x != null ) + return x; + } + } + } + + return null; + } + protected Map docForArgument(FieldDoc fieldDoc, ArgumentSource source, ArgumentDefinition def) { Map root = new HashMap(); - root.put("name", def.shortName != null ? "-" + def.shortName : "--" + def.fullName ); + Pair names = displayNames("-" + def.shortName, "--" + def.fullName); - if ( def.shortName != null && def.fullName != null ) - root.put("synonyms", "--" + def.fullName); + root.put("name", names.getFirst() ); + + if ( names.getSecond() != null ) + root.put("synonyms", names.getSecond()); root.put("required", def.required ? "yes" : "no"); - root.put("type", def.argumentType.getSimpleName()); + + // type of the field + root.put("type", argumentTypeString(source.field.getGenericType())); + + Class featureClass = getFeatureTypeIfPossible(source.field.getGenericType()); + if ( featureClass != null ) { + // deal with the allowable types + FeatureManager manager = new FeatureManager(); + List rodTypes = new ArrayList(); + for (FeatureManager.FeatureDescriptor descriptor : manager.getByFeature(featureClass) ) { + rodTypes.add(String.format("%s", + GATKDocUtils.htmlFilenameForClass(descriptor.getCodecClass()), + descriptor.getName())); + } + + root.put("rodTypes", Utils.join(", ", rodTypes)); + } // summary and fulltext root.put("summary", def.doc != null ? def.doc : ""); diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index b0e25e55b..9d4b23a8b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -156,12 +156,13 @@ public class ListFileUtils { FeatureManager.FeatureDescriptor descriptor = builderForValidation.getByTriplet(triplet); if ( descriptor == null ) throw new UserException.UnknownTribbleType(rodBinding.getTribbleType(), - String.format("Field %s had provided type %s but there's no such Tribble type. Available types are %s", - rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures())); + String.format("Field %s had provided type %s but there's no such Tribble type. The compatible types are: %n%s", + rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures(rodBinding.getType()))); if ( ! rodBinding.getType().isAssignableFrom(descriptor.getFeatureClass()) ) throw new UserException.BadArgumentValue(rodBinding.getName(), - String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s producing %s. Please make sure that you have provided the correct file type and/or that you are not binding your rod to a name matching one of the available types.", - rodBinding.getName(), rodBinding.getType(), descriptor.getName(), descriptor.getFeatureClass())); + String.format("Field %s expects Features of type %s, but the input file produces Features of type %s. The compatible types are: %n%s", + rodBinding.getName(), rodBinding.getType().getSimpleName(), descriptor.getSimpleFeatureName(), + builderForValidation.userFriendlyListOfAvailableFeatures(rodBinding.getType()))); rodBindings.add(triplet); diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java index 90262b9c1..bae8e99ed 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -54,7 +54,8 @@ import java.util.*; public class FeatureManagerUnitTest extends BaseTest { private static final File RANDOM_FILE = new File(validationDataLocation + "exampleGATKReport.eval"); private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf"); - private static final File VCF4_FILE = new File(validationDataLocation + "vcf4.1.example.vcf"); + private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf"); + private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz"); private FeatureManager manager; private GenomeLocParser genomeLocParser; @@ -98,7 +99,8 @@ public class FeatureManagerUnitTest extends BaseTest { } public String toString() { - return String.format("FMTest name=%s codec=%s feature=%s file=%s", name, codec, feature, associatedFile); + return String.format("FMTest name=%s codec=%s feature=%s file=%s", + name, codec.getSimpleName(), feature.getSimpleName(), associatedFile); } } @@ -106,6 +108,7 @@ public class FeatureManagerUnitTest extends BaseTest { public Object[][] createTests() { new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE); new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE); + new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ); new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null); return FMTest.getTests(FMTest.class); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 4abf0a102..3267173a7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -38,21 +38,21 @@ public class CombineVariantsIntegrationTest extends WalkerTest { return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args; } - public void test1InOut(String file, String md5, boolean vcf3) { - test1InOut(file, md5, "", vcf3); + public void test1InOut(String file, String md5) { + test1InOut(file, md5, ""); } - public void test1InOut(String file, String md5, String args, boolean vcf3) { + public void test1InOut(String file, String md5, String args) { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -priority v1 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args), + baseTestString(" -priority v1 -V:v1 " + validationDataLocation + file + args), 1, Arrays.asList(md5)); executeTest("testInOut1--" + file, spec); } - public void combine2(String file1, String file2, String args, String md5, boolean vcf3) { + public void combine2(String file1, String file2, String args, String md5) { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -priority v1,v2 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -V:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args), + baseTestString(" -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 "+ validationDataLocation + file2 + args), 1, Arrays.asList(md5)); executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); @@ -78,23 +78,23 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); } - @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); } - @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); } - @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); } - @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0", false); } // official project VCF files in tabix format + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0"); } + @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo"); } + @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null"); } + @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0"); } // official project VCF files in tabix format - @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); } - @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); } + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b"); } + @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e"); } @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); } - @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format - @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); } + @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format + @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a"); } - @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083", false); } + @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); } - @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); } + @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); } @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); } @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); } diff --git a/public/testdata/HiSeq.10000.vcf.gz b/public/testdata/HiSeq.10000.vcf.gz new file mode 100644 index 000000000..15e91010c Binary files /dev/null and b/public/testdata/HiSeq.10000.vcf.gz differ diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html index c11200121..7fc8cd7bd 100644 --- a/settings/helpTemplates/generic.template.html +++ b/settings/helpTemplates/generic.template.html @@ -20,16 +20,21 @@ <#macro argumentDetails arg>

${arg.name}<#if arg.synonyms??> / ${arg.synonyms} (<#if arg.attributes??>${arg.attributes} ${arg.type}<#if arg.defaultValue??> with default value ${arg.defaultValue})

- ${arg.summary}. ${arg.fulltext}
+

+ ${arg.summary}. ${arg.fulltext} + <#if arg.rodTypes??>${arg.name} binds reference ordered data. This argument supports ROD files of the + following types: ${arg.rodTypes} <#if arg.options??> -

The ${arg.name} argument is an enumerated type (${arg.type}), which can have one of the following values:

+
+ The ${arg.name} argument is an enumerated type (${arg.type}), which can have one of the following values:
<#list arg.options as option> -
${option.name} -
${option.summary} +
${option.name}
+
${option.summary}
+

<#macro relatedByType name type> @@ -77,6 +82,7 @@ <@argumentlist name="Required" myargs=arguments.required/> <@argumentlist name="Optional" myargs=arguments.optional/> + <@argumentlist name="Advanced" myargs=arguments.advanced/> <@argumentlist name="Hidden" myargs=arguments.hidden/> <@argumentlist name="Depreciated" myargs=arguments.depreciated/> @@ -98,7 +104,7 @@ <#-- This class is related to other documented classes via sub/super relationships --> - <#if relatedDocs?size != 0> + <#if relatedDocs?? && relatedDocs?size != 0>

Related capabilities

<@relatedByType name="Superclasses" type="superclass"/> diff --git a/settings/helpTemplates/style.css b/settings/helpTemplates/style.css index 2abcf0397..375df2f51 100644 --- a/settings/helpTemplates/style.css +++ b/settings/helpTemplates/style.css @@ -42,6 +42,10 @@ p.version text-align: center; } +p.args +{ + margin-left: 3em; +} h1, h2, h3, h4 { @@ -81,17 +85,17 @@ hr * enum DT layout */ -dl { - border: 1px solid #ccc; +dl.enum { + margin-left: 3em; + border: 1px dashed #ccc; } -dt { +dt.enum { font-weight: bold; text-decoration: underline; } -dd { - margin: 0; +dd.enum { padding: 0 0 0.5em 0; }