diff --git a/public/java/src/org/broadinstitute/sting/commandline/Advanced.java b/public/java/src/org/broadinstitute/sting/commandline/Advanced.java
new file mode 100644
index 000000000..7aeefe261
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/commandline/Advanced.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+import java.lang.annotation.*;
+
+/**
+ * Indicates that a walker argument should is considered an advanced option.
+ *
+ * @author Mark DePristo
+ * @version 0.1
+ */
+@Documented
+@Inherited
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.TYPE,ElementType.FIELD})
+public @interface Advanced {
+}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java
index e0e2ac378..8ec0d650a 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java
@@ -151,6 +151,14 @@ public class ArgumentSource {
return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class);
}
+ /**
+ * Is the given argument considered an advanced option when displaying on the command-line argument system.
+ * @return True if so. False otherwise.
+ */
+ public boolean isAdvanced() {
+ return field.isAnnotationPresent(Advanced.class);
+ }
+
/**
* Is this command-line argument dependent on some primitive argument types?
* @return True if this command-line argument depends on other arguments; false otherwise.
diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
index 02af884a2..b12ae8e75 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@@ -325,7 +325,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
- Class parameterType = getParameterizedTypeClass(type);
+ Class parameterType = JVMUtils.getParameterizedTypeClass(type);
return RodBinding.makeUnbound((Class extends Feature>)parameterType);
}
@@ -338,6 +338,8 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches );
+ Class extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
+
try {
String name = defaultDefinition.fullName;
String tribbleType = null;
@@ -373,18 +375,18 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
if ( featureDescriptor != null ) {
tribbleType = featureDescriptor.getName();
logger.warn("Dynamically determined type of " + file + " to be " + tribbleType);
- } else {
- throw new UserException.CommandLineException(
- String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
- "Please add an explicit type tag :TYPE listing the correct type from among the supported types: %s",
- manager.userFriendlyListOfAvailableFeatures()));
}
}
+
+ if ( tribbleType == null )
+ throw new UserException.CommandLineException(
+ String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
+ "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
+ manager.userFriendlyListOfAvailableFeatures(parameterType)));
}
}
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
- Class parameterType = getParameterizedTypeClass(type);
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
parsingEngine.addTags(result,tags);
parsingEngine.addRodBinding(result);
@@ -399,16 +401,6 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
value, source.field.getName(), e.getMessage()));
}
}
-
- private Class getParameterizedTypeClass(Type t) {
- if ( t instanceof ParameterizedType ) {
- ParameterizedType parameterizedType = (ParameterizedType)t;
- if ( parameterizedType.getActualTypeArguments().length != 1 )
- throw new ReviewedStingException("BUG: more than 1 generic type found on class" + t);
- return (Class)parameterizedType.getActualTypeArguments()[0];
- } else
- throw new ReviewedStingException("BUG: could not find generic type on class " + t);
- }
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
index 8a13dadbf..b8488dc9a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
+import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Attribution;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -49,7 +50,7 @@ import java.util.*;
@DocumentedGATKFeature(
groupName = "GATK Engine",
summary = "Features and arguments for the GATK engine itself, available to all walkers.",
- extraDocs = { ReadFilter.class, UserException.class })
+ extraDocs = { UserException.class })
public class CommandLineGATK extends CommandLineExecutable {
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
private String analysisName = null;
@@ -174,6 +175,10 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp);
+ formatter.format("Available Reference Ordered Data types:%n");
+ formatter.format(new FeatureManager().userFriendlyListOfAvailableFeatures());
+ formatter.format("%n");
+
formatter.format("For a full description of this walker, see its GATKdocs at:%n");
formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType));
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java
index cd77a9e7e..4ec451567 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckFilter.java
@@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out FailsVendorQualityCheck reads.
*/
-public class FailsVendorQualityCheckReadFilter extends ReadFilter {
+public class FailsVendorQualityCheckFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) {
return read.getReadFailsVendorQualityCheckFlag();
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityFilter.java
similarity index 96%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityFilter.java
index 75369b306..ed9c37dca 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityFilter.java
@@ -35,7 +35,7 @@ import org.broadinstitute.sting.commandline.Argument;
* @version 0.1
*/
-public class MappingQualityReadFilter extends ReadFilter {
+public class MappingQualityFilter extends ReadFilter {
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
public int MIN_MAPPING_QUALTY_SCORE = 10;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableFilter.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableFilter.java
index 1afec36d1..ccdb40d31 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityUnavailableFilter.java
@@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
* @version 0.1
*/
-public class MappingQualityUnavailableReadFilter extends ReadFilter {
+public class MappingQualityUnavailableFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroFilter.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroFilter.java
index e49d4117c..57db8419c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityZeroFilter.java
@@ -33,7 +33,7 @@ import net.sf.samtools.SAMRecord;
* @version 0.1
*/
-public class MappingQualityZeroReadFilter extends ReadFilter {
+public class MappingQualityZeroFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == 0);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java
similarity index 95%
rename from public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java
rename to public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java
index 31c2144ce..50cd30f71 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/NotPrimaryAlignmentFilter.java
@@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out duplicate reads.
*/
-public class NotPrimaryAlignmentReadFilter extends ReadFilter {
+public class NotPrimaryAlignmentFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) {
return read.getNotPrimaryAlignmentFlag();
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
index 26a400071..c99aea254 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java
@@ -36,7 +36,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.help.GATKDocUtils;
+import org.broadinstitute.sting.utils.help.HelpUtils;
+import javax.mail.Header;
import java.io.File;
import java.util.*;
@@ -50,7 +53,7 @@ import java.util.*;
* @author depristo
*/
public class FeatureManager {
- public static class FeatureDescriptor {
+ public static class FeatureDescriptor implements Comparable {
final String name;
final FeatureCodec codec;
@@ -62,6 +65,7 @@ public class FeatureManager {
public String getName() {
return name;
}
+ public String getSimpleFeatureName() { return getFeatureClass().getSimpleName(); }
public FeatureCodec getCodec() {
return codec;
}
@@ -70,13 +74,18 @@ public class FeatureManager {
@Override
public String toString() {
- return String.format("FeatureDescriptor name=%s codec=%s feature=%s", getName(), getCodecClass().getName(), getFeatureClass().getName());
+ return String.format("FeatureDescriptor name=%s codec=%s feature=%s",
+ getName(), getCodecClass().getName(), getFeatureClass().getName());
+ }
+
+ @Override
+ public int compareTo(FeatureDescriptor o) {
+ return getName().compareTo(o.getName());
}
}
private final PluginManager pluginManager;
- private final Collection featureDescriptors = new HashSet();
-
+ private final Collection featureDescriptors = new TreeSet();
/**
* Construct a FeatureManager
@@ -114,7 +123,7 @@ public class FeatureManager {
*/
@Requires("featureClass != null")
public Collection getByFeature(Class featureClass) {
- Set consistentDescriptors = new HashSet();
+ Set consistentDescriptors = new TreeSet();
if (featureClass == null)
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
@@ -189,10 +198,40 @@ public class FeatureManager {
*/
@Ensures("result != null")
public String userFriendlyListOfAvailableFeatures() {
- List names = new ArrayList();
- for ( final FeatureDescriptor descriptor : featureDescriptors )
- names.add(descriptor.getName());
- return Utils.join(",", names);
+ return userFriendlyListOfAvailableFeatures(Feature.class);
+ }
+
+ /**
+ * Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
+ * restricted to only Codecs producting Features consistent with the requiredFeatureType
+ * @return
+ */
+ @Ensures("result != null")
+ public String userFriendlyListOfAvailableFeatures(Class extends Feature> requiredFeatureType) {
+ final String nameHeader="Name", featureHeader = "FeatureType", docHeader="Documentation";
+
+ int maxNameLen = nameHeader.length(), maxFeatureNameLen = featureHeader.length();
+ for ( final FeatureDescriptor descriptor : featureDescriptors ) {
+ if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
+ maxNameLen = Math.max(maxNameLen, descriptor.getName().length());
+ maxFeatureNameLen = Math.max(maxFeatureNameLen, descriptor.getSimpleFeatureName().length());
+ }
+ }
+
+ StringBuilder docs = new StringBuilder();
+ String format = "%" + maxNameLen + "s %" + maxFeatureNameLen + "s %s%n";
+ docs.append(String.format(format, nameHeader, featureHeader, docHeader));
+ for ( final FeatureDescriptor descriptor : featureDescriptors ) {
+ if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
+ String oneDoc = String.format(format,
+ descriptor.getName(),
+ descriptor.getSimpleFeatureName(),
+ GATKDocUtils.helpLinksToGATKDocs(descriptor.getCodecClass()));
+ docs.append(oneDoc);
+ }
+ }
+
+ return docs.toString();
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java
index 4bfedb672..e2db1dc52 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java
@@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
+import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc;
@@ -17,7 +17,7 @@ import java.util.Set;
* To change this template use File | Settings | File Templates.
*/
@Requires({DataSource.READS,DataSource.REFERENCE})
-@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class})
+@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
public abstract class DuplicateWalker extends Walker {
// Do we actually want to operate on the context?
public boolean filter(GenomeLoc loc, AlignmentContext context, Set> readSets ) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
index b0b2687f4..8152f74c2 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
@@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
-import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter;
-import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
+import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
+import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -18,7 +18,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.INTERVAL)
-@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class})
+@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
public abstract class LocusWalker extends Walker {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
index 83a8ce7d7..70f3c6a1a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@@ -44,7 +44,9 @@ import java.util.Set;
public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model {
+ /** The default model with the best performance in all cases */
EXACT,
+ /** For posterity we have kept around the older GRID_SEARCH model, but this gives inferior results and shouldn't be used. */
GRID_SEARCH
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
index 594c1dd28..60dfe4fe7 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@@ -53,7 +53,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
}
public enum GENOTYPING_MODE {
+ /** the default; the Unified Genotyper will choose the most likely alternate allele */
DISCOVERY,
+ /** only the alleles passed in from a VCF rod bound to the -alleles argument will be used for genotyping */
GENOTYPE_GIVEN_ALLELES
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
index 1a76bfd07..e7f89bf08 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@@ -36,31 +36,54 @@ import java.io.File;
public class UnifiedArgumentCollection {
- // control the various models to be used
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
+ /**
+ * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
+ */
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
+ /**
+ * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
+ * het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
+ */
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY;
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
+ /**
+ * Specifies how to determine the alternate allele to use for genotyping
+ */
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
+ /**
+ * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
+ * confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
+ * is the default). Note that the confidence (QUAL) values for multi-sample low-pass (e.g. 4x per sample) calling might
+ * be significantly smaller with the new EXACT model than with our older GRID_SEARCH model, as the latter tended to
+ * over-estimate the confidence; for low-pass calling we tend to use much smaller thresholds (e.g. 4).
+ */
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false)
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
+ /**
+ * the minimum phred-scaled Qscore threshold to emit low confidence calls. Genotypes with confidence >= this but less
+ * than the calling threshold are emitted but marked as filtered.
+ */
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
+ /**
+ * This argument is not enabled by default because it increases the runtime by an appreciable amount.
+ */
@Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false)
public boolean COMPUTE_SLOD = false;
@@ -80,7 +103,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "abort_at_too_much_coverage", doc = "Don't call a site if the downsampled coverage is greater than this value", required = false)
public int COVERAGE_AT_WHICH_TO_ABORT = -1;
-
// control the various parameters to be used
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
public int MIN_BASE_QUALTY_SCORE = 17;
@@ -91,11 +113,17 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
public Double MAX_DELETION_FRACTION = 0.05;
-
// indel-related arguments
+ /**
+ * A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site.
+ * Decreasing this value will increase sensitivity but at the cost of larger calling time and a larger number of false positives.
+ */
@Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;
+ /**
+ * This argument informs the prior probability of having an indel at a site.
+ */
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
public double INDEL_HETEROZYGOSITY = 1.0/8000;
@@ -126,22 +154,23 @@ public class UnifiedArgumentCollection {
@Hidden
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
+
@Hidden
@Argument(fullName = "dovit", shortName = "dovit", doc = "Output indel debug info", required = false)
public boolean dovit = false;
+
@Hidden
@Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
public boolean GSA_PRODUCTION_ONLY = false;
+
@Hidden
-
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
@Hidden
- @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
+ @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
public boolean IGNORE_SNP_ALLELES = false;
-
@Deprecated
@Argument(fullName="output_all_callable_bases", shortName="all_bases", doc="Please use --output_mode EMIT_ALL_SITES instead" ,required=false)
private Boolean ALL_BASES_DEPRECATED = false;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index d31bb6fb9..d5dbdedd6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
-import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
@@ -45,13 +45,73 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.*;
-
/**
- * A variant caller which unifies the approaches of several disparate callers. Works for single-sample and
- * multi-sample data. The user can choose from several different incorporated calculation models.
+ * A variant caller which unifies the approaches of several disparate callers -- Works for single-sample and multi-sample data.
+ *
+ *
+ * The GATK Unified Genotyper is a multiple-sample, technology-aware SNP and indel caller. It uses a Bayesian genotype
+ * likelihood model to estimate simultaneously the most likely genotypes and allele frequency in a population of N samples,
+ * emitting an accurate posterior probability of there being a segregating variant allele at each locus as well as for the
+ * genotype of each sample. The system can either emit just the variant sites or complete genotypes (which includes
+ * homozygous reference calls) satisfying some phred-scaled confidence value. The genotyper can make accurate calls on
+ * both single sample data and multi-sample data.
+ *
+ *
Input
+ *
+ * The read data from which to make variant calls.
+ *
+ *
+ *
Output
+ *
+ * A raw, unfiltered, highly specific callset in VCF format.
+ *
+ *
+ *
Example generic command for multi-sample SNP calling
+ * The above command will call all of the samples in your provided BAM files [-I arguments] together and produce a VCF file
+ * with sites and genotypes for all samples. The easiest way to get the dbSNP file is from the GATK resource bundle. Several
+ * arguments have parameters that should be chosen based on the average coverage per sample in your data. See the detailed
+ * argument descriptions below.
+ *
The system is under active and continuous development. All outputs, the underlying likelihood model, arguments, and
+ * file formats are likely to change.
+ *
The system can be very aggressive in calling variants. In the 1000 genomes project for pilot 2 (deep coverage of ~35x)
+ * we expect the raw Qscore > 50 variants to contain at least ~10% FP calls. We use extensive post-calling filters to eliminate
+ * most of these FPs. Variant Quality Score Recalibration is a tool to perform this filtering.
+ *
We only handle diploid genotypes
+ *
+ *
*/
+
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
-@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} )
+@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} )
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.REFERENCE)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
@@ -61,10 +121,9 @@ public class UnifiedGenotyper extends LocusWalker getDbsnpRodBinding() { return dbsnp.dbsnp; }
@@ -72,7 +131,9 @@ public class UnifiedGenotyper extends LocusWalker> getCompRodBindings() { return Collections.emptyList(); }
public List> getResourceRodBindings() { return Collections.emptyList(); }
- // control the output
+ /**
+ * A raw, unfiltered, highly specific callset in VCF format.
+ */
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter writer = null;
@@ -82,9 +143,15 @@ public class UnifiedGenotyper extends LocusWalker annotationsToUse = new ArrayList();
+ /**
+ * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
+ */
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
protected String[] annotationClassesToUse = { "Standard" };
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index b3f77fc06..06455df6d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -51,8 +51,11 @@ public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
public enum OUTPUT_MODE {
+ /** the default */
EMIT_VARIANTS_ONLY,
+ /** include confident reference sites */
EMIT_ALL_CONFIDENT_SITES,
+ /** any callable site regardless of confidence */
EMIT_ALL_SITES
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
index 129be7f55..8680f3537 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@@ -178,6 +178,7 @@ public class IndelRealigner extends ReadWalker {
* will only proceed with the realignment (even above the given threshold) if it minimizes entropy among the reads (and doesn't simply
* push the mismatch column to another position). This parameter is just a heuristic and should be adjusted based on your particular data set.
*/
+ @Advanced
@Argument(fullName="entropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false)
protected double MISMATCH_THRESHOLD = 0.15;
@@ -185,30 +186,35 @@ public class IndelRealigner extends ReadWalker {
* For expert users only! To minimize memory consumption you can lower this number (but then the tool may skip realignment on regions with too much coverage;
* and if the number is too low, it may generate errors during realignment). Just make sure to give Java enough memory! 4Gb should be enough with the default value.
*/
+ @Advanced
@Argument(fullName="maxReadsInMemory", shortName="maxInMemory", doc="max reads allowed to be kept in memory at a time by the SAMFileWriter", required=false)
protected int MAX_RECORDS_IN_MEMORY = 150000;
/**
* For expert users only!
*/
+ @Advanced
@Argument(fullName="maxIsizeForMovement", shortName="maxIsize", doc="maximum insert size of read pairs that we attempt to realign", required=false)
protected int MAX_ISIZE_FOR_MOVEMENT = 3000;
/**
* For expert users only!
*/
+ @Advanced
@Argument(fullName="maxPositionalMoveAllowed", shortName="maxPosMove", doc="maximum positional move in basepairs that a read can be adjusted during realignment", required=false)
protected int MAX_POS_MOVE_ALLOWED = 200;
/**
* For expert users only! If you need to find the optimal solution regardless of running time, use a higher number.
*/
+ @Advanced
@Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false)
protected int MAX_CONSENSUSES = 30;
/**
* For expert users only! If you need to find the optimal solution regardless of running time, use a higher number.
*/
+ @Advanced
@Argument(fullName="maxReadsForConsensuses", shortName="greedy", doc="max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)", required=false)
protected int MAX_READS_FOR_CONSENSUSES = 120;
@@ -216,9 +222,11 @@ public class IndelRealigner extends ReadWalker {
* For expert users only! If this value is exceeded at a given interval, realignment is not attempted and the reads are passed to the output file(s) as-is.
* If you need to allow more reads (e.g. with very deep coverage) regardless of memory, use a higher number.
*/
+ @Advanced
@Argument(fullName="maxReadsForRealignment", shortName="maxReads", doc="max reads allowed at an interval for realignment", required=false)
protected int MAX_READS = 20000;
+ @Advanced
@Argument(fullName="noOriginalAlignmentTags", shortName="noTags", required=false, doc="Don't output the original cigar or alignment start tags for each realigned read in the output bam")
protected boolean NO_ORIGINAL_ALIGNMENT_TAGS = false;
@@ -226,6 +234,7 @@ public class IndelRealigner extends ReadWalker {
* For expert users only! This tool assumes that the target interval list is sorted; if the list turns out to be unsorted, it will throw an exception.
* Use this argument when your interval list is not sorted to instruct the Realigner to first sort it in memory.
*/
+ @Advanced
@Argument(fullName="targetIntervalsAreNotSorted", shortName="targetNotSorted", required=false, doc="The target intervals are not sorted")
protected boolean TARGET_NOT_SORTED = false;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
index 49916e1a2..bede50a0b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
@@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadCigarFilter;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
-import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
@@ -98,7 +98,7 @@ import java.util.List;
*
* @author ebanks
*/
-@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, BadCigarFilter.class})
+@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, BadCigarFilter.class})
@Reference(window=@Window(start=-1,stop=50))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@By(DataSource.REFERENCE)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
index 9f6ac2a91..546bbe1a6 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
@@ -33,7 +33,7 @@ import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
-import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
@@ -78,7 +78,7 @@ import java.util.*;
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
*/
-@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class})
+@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
public class SomaticIndelDetectorWalker extends ReadWalker {
// @Output
// PrintStream out;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
index 34c7912d9..17a6e20f1 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
@@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
-import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.BaseUtils;
@@ -91,7 +91,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
@By(DataSource.READS)
// Filter out all reads with zero mapping quality
-@ReadFilters({MappingQualityZeroReadFilter.class})
+@ReadFilters({MappingQualityZeroFilter.class})
public class ReadBackedPhasingWalker extends RodWalker {
private static final boolean DEBUG = false;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
index 5ffc61fe3..98c8950e3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@@ -29,8 +29,8 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
-import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
+import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.BaseUtils;
@@ -68,6 +68,8 @@ import java.util.Map;
*
*
Input
*
+ * The input read data whose base quality scores need to be assessed.
+ *
* A database of known polymorphic sites to skip over.
*
*
@@ -95,7 +97,7 @@ import java.util.Map;
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
-@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
+@ReadFilters( {MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
@PartitionBy(PartitionType.LOCUS)
public class CountCovariatesWalker extends LocusWalker implements TreeReducible {
@@ -134,6 +136,10 @@ public class CountCovariatesWalker extends LocusWalkerInput
*
+ * The input read data whose base quality scores need to be recalibrated.
+ *
* The recalibration table file in CSV format that was generated by the CountCovariates walker.
*
*
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
index d81a57aad..517c2362a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@@ -157,6 +157,10 @@ public class VariantRecalibrator extends RodWalker alleles = new HashSet();
- alleles.add(ref);
- if ( alt != null )
- alleles.add(alt);
-
- HashMap attrs = new HashMap();
- String id = array[array.length - 1];
- if ( id.indexOf("dbsnp") != -1 ) {
- attrs.put(VariantContext.ID_KEY, parseID(id));
- }
-
- // create a new feature given the array
- return new VariantContext("CGI", array[3], start, end, alleles, VariantContext.NO_NEG_LOG_10PERROR, null, attrs);
- }
-
- public Class getFeatureType() {
- return VariantContext.class;
- }
-
- // There's no spec and no character to distinguish header lines...
- private final static int NUM_HEADER_LINES = 12;
- public Object readHeader(LineReader reader) {
- return null;
-
- //String headerLine = null;
- //try {
- // for (int i = 0; i < NUM_HEADER_LINES; i++)
- // headerLine = reader.readLine();
- //} catch (IOException e) {
- // throw new IllegalArgumentException("Unable to read a line from the line reader");
- //}
- //return headerLine;
- }
-
- private static final Pattern DBSNP_PATTERN = Pattern.compile("^dbsnp\\.\\d+:(.*)");
- private String parseID(String raw) {
- StringBuilder sb = null;
-
- String[] ids = raw.split(";");
- for ( String id : ids ) {
- Matcher matcher = DBSNP_PATTERN.matcher(id);
- if ( matcher.matches() ) {
- String rsID = matcher.group(1);
- if ( sb == null ) {
- sb = new StringBuilder(rsID);
- } else {
- sb.append(";");
- sb.append(rsID);
- }
- }
- }
-
- return sb == null ? null : sb.toString();
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java
deleted file mode 100755
index 284c43e90..000000000
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java
+++ /dev/null
@@ -1,209 +0,0 @@
-package org.broadinstitute.sting.utils.codecs.soapsnp;
-
-import org.broad.tribble.Feature;
-import org.broad.tribble.FeatureCodec;
-import org.broad.tribble.NameAwareCodec;
-import org.broad.tribble.TribbleException;
-import org.broad.tribble.exception.CodecLineParsingException;
-import org.broad.tribble.readers.LineReader;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-
-import java.util.*;
-
-/**
- * @author depristo
- *
- * a codec for parsing soapsnp files (see http://soap.genomics.org.cn/soapsnp.html#usage2)
- *
- *
- * A simple text file format with the following whitespace separated fields:
- *
-1) Chromosome ID
-2) Coordinate on chromosome, start from 1
-3) Reference genotype
-4) Consensus genotype
-5) Quality score of consensus genotype
-6) Best base
-7) Average quality score of best base
-8) Count of uniquely mapped best base
-9) Count of all mapped best base
-10) Second best bases
-11) Average quality score of second best base
-12) Count of uniquely mapped second best base
-13) Count of all mapped second best base
-14) Sequencing depth of the site
-15) Rank sum test p_value
-16) Average copy number of nearby region
-17) Whether the site is a dbSNP.
- */
-public class SoapSNPCodec implements FeatureCodec, NameAwareCodec {
- private String[] parts;
-
- // we store a name to give to each of the variant contexts we emit
- private String name = "Unknown";
-
- public Feature decodeLoc(String line) {
- return decode(line);
- }
-
- /**
- * Decode a line as a Feature.
- *
- * @param line
- *
- * @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is
- * a comment)
- */
- public Feature decode(String line) {
- try {
- // parse into lines
- parts = line.trim().split("\\s+");
-
- // check that we got the correct number of tokens in the split
- if (parts.length != 18)
- throw new CodecLineParsingException("Invalid SoapSNP row found -- incorrect element count. Expected 18, got " + parts.length + " line = " + line);
-
- String contig = parts[0];
- long start = Long.valueOf(parts[1]);
- AlleleAndGenotype allelesAndGenotype = parseAlleles(parts[2], parts[3], line);
-
- double negLog10PError = Integer.valueOf(parts[4]) / 10.0;
-
- Map attributes = new HashMap();
- attributes.put("BestBaseQ", parts[6]);
- attributes.put("SecondBestBaseQ", parts[10]);
- attributes.put("RankSumP", parts[15]);
- // add info to keys
-
- //System.out.printf("Alleles = " + allelesAndGenotype.alleles);
- //System.out.printf("genotype = " + allelesAndGenotype.genotype);
-
- VariantContext vc = new VariantContext(name, contig, start, start, allelesAndGenotype.alleles, allelesAndGenotype.genotype, negLog10PError, VariantContext.PASSES_FILTERS, attributes);
-
- //System.out.printf("line = %s%n", line);
- //System.out.printf("vc = %s%n", vc);
-
- return vc;
- } catch (CodecLineParsingException e) {
- throw new TribbleException("Unable to parse line " + line,e);
- } catch (NumberFormatException e) {
- e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
- throw new TribbleException("Unable to parse line " + line,e);
- }
- }
-
- private static class AlleleAndGenotype {
- Collection alleles;
- Collection genotype;
-
- public AlleleAndGenotype(Collection alleles, Genotype genotype) {
- this.alleles = alleles;
- this.genotype = new HashSet();
- this.genotype.add(genotype);
- }
- }
-
- private AlleleAndGenotype parseAlleles(String ref, String consensusGenotype, String line) {
- /* A Adenine
- C Cytosine
- G Guanine
- T (or U) Thymine (or Uracil)
- R A or G
- Y C or T
- S G or C
- W A or T
- K G or T
- M A or C
- B C or G or T
- D A or G or T
- H A or C or T
- V A or C or G
- N any base
- . or - gap
- */
- if ( ref.equals(consensusGenotype) )
- throw new TribbleException.InternalCodecException("Ref base and consensus genotype are the same " + ref);
-
- Allele refAllele = Allele.create(ref, true);
- List genotypeAlleles = null;
-
- char base = consensusGenotype.charAt(0);
-
- switch ( base ) {
- case 'A': case 'C': case 'G': case 'T':
- Allele a = Allele.create(consensusGenotype);
- genotypeAlleles = Arrays.asList(a, a);
- break;
- case 'R': case 'Y': case 'S': case 'W': case 'K': case 'M':
- genotypeAlleles = determineAlt(refAllele, ref.charAt(0), base);
- break;
- default:
- throw new TribbleException("Unexpected consensus genotype " + consensusGenotype + " at line = " + line);
- }
-
-
- Collection alleles = new HashSet(genotypeAlleles);
- alleles.add(refAllele);
- Genotype genotype = new Genotype("unknown", genotypeAlleles); // todo -- probably should include genotype quality
-
- return new AlleleAndGenotype( alleles, genotype );
- }
-
- private static final Map IUPAC_SNPS = new HashMap();
- static {
- IUPAC_SNPS.put('R', "AG");
- IUPAC_SNPS.put('Y', "CT");
- IUPAC_SNPS.put('S', "GC");
- IUPAC_SNPS.put('W', "AT");
- IUPAC_SNPS.put('K', "GT");
- IUPAC_SNPS.put('M', "AC");
- }
-
- private List determineAlt(Allele ref, char refbase, char alt) {
- String alts = IUPAC_SNPS.get(alt);
- if ( alts == null )
- throw new IllegalStateException("BUG: unexpected consensus genotype " + alt);
-
- Allele a1 = alts.charAt(0) == refbase ? ref : Allele.create((byte)alts.charAt(0));
- Allele a2 = alts.charAt(1) == refbase ? ref : Allele.create((byte)alts.charAt(1));
-
- //if ( a1 != ref && a2 != ref )
- // throw new IllegalStateException("BUG: unexpected consensus genotype " + alt + " does not contain the reference base " + ref);
-
- return Arrays.asList(a1, a2);
- }
-
- /**
- * @return VariantContext
- */
- public Class getFeatureType() {
- return VariantContext.class;
- }
-
- public Object readHeader(LineReader reader) {
-
- return null; // we don't have a meaningful header
- }
-
- /**
- * get the name of this codec
- * @return our set name
- */
- public String getName() {
- return name;
- }
-
- /**
- * set the name of this codec
- * @param name new name
- */
- public void setName(String name) {
- this.name = name;
- }
-
- public static void main(String[] args) {
- System.out.printf("Testing " + args[0]);
- }
-}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java
index 710503ca8..89163dfcb 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java
@@ -39,6 +39,5 @@ public @interface DocumentedGATKFeature {
public boolean enable() default true;
public String groupName();
public String summary() default "";
- public Class extends DocumentedGATKFeatureHandler> handler() default GenericDocumentationHandler.class;
public Class[] extraDocs() default {};
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java
new file mode 100644
index 000000000..66354202f
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.help;
+
+/**
+ * Documentation unit. Effectively a class version of the DocumentedGATKFeature
+ *
+ * @author depristo
+ */
+class DocumentedGATKFeatureObject {
+ private final Class classToDoc;
+ private final boolean enable;
+ private final String groupName, summary;
+ private final Class[] extraDocs;
+
+ public DocumentedGATKFeatureObject(Class classToDoc, final boolean enable, final String groupName, final String summary, final Class[] extraDocs) {
+ this.classToDoc = classToDoc;
+ this.enable = enable;
+ this.groupName = groupName;
+ this.summary = summary;
+ this.extraDocs = extraDocs;
+ }
+
+ public DocumentedGATKFeatureObject(Class classToDoc, final String groupName, final String summary) {
+ this(classToDoc, true, groupName, summary, new Class[]{});
+ }
+
+ public Class getClassToDoc() { return classToDoc; }
+ public boolean enable() { return enable; }
+ public String groupName() { return groupName; }
+ public String summary() { return summary; }
+ public Class[] extraDocs() { return extraDocs; }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
index e2909cf15..983805c4d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java
@@ -36,9 +36,9 @@ public class GATKDocUtils {
public static String helpLinksToGATKDocs(Class c) {
String classPath = htmlFilenameForClass(c);
StringBuilder b = new StringBuilder();
- b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n");
- b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n");
- b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n");
+ b.append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath);
+ //b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n");
+ //b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n");
return b.toString();
}
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java
index 1f6db2757..41c855329 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocWorkUnit.java
@@ -51,7 +51,7 @@ class GATKDocWorkUnit implements Comparable {
/** The javadoc documentation for clazz */
final ClassDoc classDoc;
/** The annotation that lead to this Class being in GATKDoc */
- final DocumentedGATKFeature annotation;
+ final DocumentedGATKFeatureObject annotation;
/** When was this walker built, and what's the absolute version number */
final String buildTimestamp, absoluteVersion;
@@ -60,7 +60,7 @@ class GATKDocWorkUnit implements Comparable {
Map forTemplate;
public GATKDocWorkUnit(String name, String filename, String group,
- DocumentedGATKFeature annotation, DocumentedGATKFeatureHandler handler,
+ DocumentedGATKFeatureObject annotation, DocumentedGATKFeatureHandler handler,
ClassDoc classDoc, Class clazz,
String buildTimestamp, String absoluteVersion) {
this.annotation = annotation;
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java
index 8f3ec293a..5755d2b37 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java
@@ -33,6 +33,7 @@ import freemarker.template.TemplateException;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
+import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.*;
@@ -50,6 +51,14 @@ public class GATKDoclet {
RootDoc rootDoc;
+ final static Collection STATIC_DOCS = new ArrayList();
+ static {
+ STATIC_DOCS.add(new DocumentedGATKFeatureObject(FeatureCodec.class,
+ "Reference ordered data (ROD) codecs",
+ "Tribble codecs for reading reference ordered data such as VCF or BED files"));
+ }
+
+
/**
* Extracts the contents of certain types of javadoc and adds them to an XML file.
* @param rootDoc The documentation root.
@@ -99,7 +108,7 @@ public class GATKDoclet {
//if ( clazz != null && clazz.getName().equals("org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance"))
// logger.debug("foo");
- DocumentedGATKFeature feature = getFeatureForClassDoc(doc);
+ DocumentedGATKFeatureObject feature = getFeatureForClassDoc(doc);
DocumentedGATKFeatureHandler handler = createHandler(doc, feature);
if ( handler != null && handler.includeInDocs(doc) ) {
logger.info("Generating documentation for class " + doc);
@@ -146,33 +155,36 @@ public class GATKDoclet {
}
}
- private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeature feature) {
- try {
- if ( feature != null ) {
- if ( feature.enable() ) {
- DocumentedGATKFeatureHandler handler = feature.handler().newInstance();
- handler.setDoclet(this);
- return handler;
- } else {
- logger.info("Skipping disabled Documentation for " + doc);
- }
+ private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeatureObject feature) {
+ if ( feature != null ) {
+ if ( feature.enable() ) {
+ DocumentedGATKFeatureHandler handler = new GenericDocumentationHandler();
+ handler.setDoclet(this);
+ return handler;
+ } else {
+ logger.info("Skipping disabled Documentation for " + doc);
}
- } catch ( IllegalAccessException e) {
- throw new RuntimeException(e); // the constructor is now private -- this is an error
- } catch ( InstantiationException e) {
- throw new RuntimeException(e); // the constructor is now private -- this is an error
}
return null;
}
- private DocumentedGATKFeature getFeatureForClassDoc(ClassDoc doc) {
- // todo -- what do I need the ? extends Object to pass the compiler?
+ private DocumentedGATKFeatureObject getFeatureForClassDoc(ClassDoc doc) {
Class extends Object> docClass = getClassForClassDoc(doc);
- if ( docClass != null && docClass.isAnnotationPresent(DocumentedGATKFeature.class) ) {
- return docClass.getAnnotation(DocumentedGATKFeature.class);
- } else {
+
+ if ( docClass == null )
return null; // not annotated so it shouldn't be documented
+
+ if ( docClass.isAnnotationPresent(DocumentedGATKFeature.class) ) {
+ DocumentedGATKFeature f = docClass.getAnnotation(DocumentedGATKFeature.class);
+ return new DocumentedGATKFeatureObject(docClass, f.enable(), f.groupName(), f.summary(), f.extraDocs());
+ } else {
+ for ( DocumentedGATKFeatureObject staticDocs : STATIC_DOCS ) {
+ if ( staticDocs.getClassToDoc().isAssignableFrom(docClass) ) {
+ return new DocumentedGATKFeatureObject(docClass, staticDocs.enable(), staticDocs.groupName(), staticDocs.summary(), staticDocs.extraDocs());
+ }
+ }
+ return null;
}
}
@@ -217,16 +229,15 @@ public class GATKDoclet {
Collections.sort(indexData);
- Set docFeatures = new HashSet();
+ List
#macro>
<#macro relatedByType name type>
@@ -77,6 +82,7 @@
<@argumentlist name="Required" myargs=arguments.required/>
<@argumentlist name="Optional" myargs=arguments.optional/>
+ <@argumentlist name="Advanced" myargs=arguments.advanced/>
<@argumentlist name="Hidden" myargs=arguments.hidden/>
<@argumentlist name="Depreciated" myargs=arguments.depreciated/>
@@ -98,7 +104,7 @@
#if>
<#-- This class is related to other documented classes via sub/super relationships -->
- <#if relatedDocs?size != 0>
+ <#if relatedDocs?? && relatedDocs?size != 0>