Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2011-08-19 09:32:20 -04:00
commit 269ed1206c
39 changed files with 545 additions and 529 deletions

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import java.lang.annotation.*;
/**
* Indicates that a walker argument should is considered an advanced option.
*
* @author Mark DePristo
* @version 0.1
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.TYPE,ElementType.FIELD})
public @interface Advanced {
}

View File

@ -151,6 +151,14 @@ public class ArgumentSource {
return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class);
}
/**
* Is the given argument considered an advanced option when displaying on the command-line argument system.
* @return True if so. False otherwise.
*/
public boolean isAdvanced() {
return field.isAnnotationPresent(Advanced.class);
}
/**
* Is this command-line argument dependent on some primitive argument types?
* @return True if this command-line argument depends on other arguments; false otherwise.

View File

@ -325,7 +325,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
Class parameterType = getParameterizedTypeClass(type);
Class parameterType = JVMUtils.getParameterizedTypeClass(type);
return RodBinding.makeUnbound((Class<? extends Feature>)parameterType);
}
@ -338,6 +338,8 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches );
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
try {
String name = defaultDefinition.fullName;
String tribbleType = null;
@ -373,18 +375,18 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
if ( featureDescriptor != null ) {
tribbleType = featureDescriptor.getName();
logger.warn("Dynamically determined type of " + file + " to be " + tribbleType);
} else {
throw new UserException.CommandLineException(
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
"Please add an explicit type tag :TYPE listing the correct type from among the supported types: %s",
manager.userFriendlyListOfAvailableFeatures()));
}
}
if ( tribbleType == null )
throw new UserException.CommandLineException(
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
manager.userFriendlyListOfAvailableFeatures(parameterType)));
}
}
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
Class parameterType = getParameterizedTypeClass(type);
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
parsingEngine.addTags(result,tags);
parsingEngine.addRodBinding(result);
@ -399,16 +401,6 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
value, source.field.getName(), e.getMessage()));
}
}
private Class getParameterizedTypeClass(Type t) {
if ( t instanceof ParameterizedType ) {
ParameterizedType parameterizedType = (ParameterizedType)t;
if ( parameterizedType.getActualTypeArguments().length != 1 )
throw new ReviewedStingException("BUG: more than 1 generic type found on class" + t);
return (Class)parameterizedType.getActualTypeArguments()[0];
} else
throw new ReviewedStingException("BUG: could not find generic type on class " + t);
}
}
/**

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Attribution;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -49,7 +50,7 @@ import java.util.*;
@DocumentedGATKFeature(
groupName = "GATK Engine",
summary = "Features and arguments for the GATK engine itself, available to all walkers.",
extraDocs = { ReadFilter.class, UserException.class })
extraDocs = { UserException.class })
public class CommandLineGATK extends CommandLineExecutable {
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
private String analysisName = null;
@ -174,6 +175,10 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp);
formatter.format("Available Reference Ordered Data types:%n");
formatter.format(new FeatureManager().userFriendlyListOfAvailableFeatures());
formatter.format("%n");
formatter.format("For a full description of this walker, see its GATKdocs at:%n");
formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType));

View File

@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out FailsVendorQualityCheck reads.
*/
public class FailsVendorQualityCheckReadFilter extends ReadFilter {
public class FailsVendorQualityCheckFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) {
return read.getReadFailsVendorQualityCheckFlag();
}

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.commandline.Argument;
* @version 0.1
*/
public class MappingQualityReadFilter extends ReadFilter {
public class MappingQualityFilter extends ReadFilter {
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
public int MIN_MAPPING_QUALTY_SCORE = 10;

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
* @version 0.1
*/
public class MappingQualityUnavailableReadFilter extends ReadFilter {
public class MappingQualityUnavailableFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
}

View File

@ -33,7 +33,7 @@ import net.sf.samtools.SAMRecord;
* @version 0.1
*/
public class MappingQualityZeroReadFilter extends ReadFilter {
public class MappingQualityZeroFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == 0);
}

View File

@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out duplicate reads.
*/
public class NotPrimaryAlignmentReadFilter extends ReadFilter {
public class NotPrimaryAlignmentFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) {
return read.getNotPrimaryAlignmentFlag();
}

View File

@ -36,7 +36,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.GATKDocUtils;
import org.broadinstitute.sting.utils.help.HelpUtils;
import javax.mail.Header;
import java.io.File;
import java.util.*;
@ -50,7 +53,7 @@ import java.util.*;
* @author depristo
*/
public class FeatureManager {
public static class FeatureDescriptor {
public static class FeatureDescriptor implements Comparable<FeatureDescriptor> {
final String name;
final FeatureCodec codec;
@ -62,6 +65,7 @@ public class FeatureManager {
public String getName() {
return name;
}
public String getSimpleFeatureName() { return getFeatureClass().getSimpleName(); }
public FeatureCodec getCodec() {
return codec;
}
@ -70,13 +74,18 @@ public class FeatureManager {
@Override
public String toString() {
return String.format("FeatureDescriptor name=%s codec=%s feature=%s", getName(), getCodecClass().getName(), getFeatureClass().getName());
return String.format("FeatureDescriptor name=%s codec=%s feature=%s",
getName(), getCodecClass().getName(), getFeatureClass().getName());
}
@Override
public int compareTo(FeatureDescriptor o) {
return getName().compareTo(o.getName());
}
}
private final PluginManager<FeatureCodec> pluginManager;
private final Collection<FeatureDescriptor> featureDescriptors = new HashSet<FeatureDescriptor>();
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
/**
* Construct a FeatureManager
@ -114,7 +123,7 @@ public class FeatureManager {
*/
@Requires("featureClass != null")
public <T extends Feature> Collection<FeatureDescriptor> getByFeature(Class<T> featureClass) {
Set<FeatureDescriptor> consistentDescriptors = new HashSet<FeatureDescriptor>();
Set<FeatureDescriptor> consistentDescriptors = new TreeSet<FeatureDescriptor>();
if (featureClass == null)
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
@ -189,10 +198,40 @@ public class FeatureManager {
*/
@Ensures("result != null")
public String userFriendlyListOfAvailableFeatures() {
List<String> names = new ArrayList<String>();
for ( final FeatureDescriptor descriptor : featureDescriptors )
names.add(descriptor.getName());
return Utils.join(",", names);
return userFriendlyListOfAvailableFeatures(Feature.class);
}
/**
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
* restricted to only Codecs producting Features consistent with the requiredFeatureType
* @return
*/
@Ensures("result != null")
public String userFriendlyListOfAvailableFeatures(Class<? extends Feature> requiredFeatureType) {
final String nameHeader="Name", featureHeader = "FeatureType", docHeader="Documentation";
int maxNameLen = nameHeader.length(), maxFeatureNameLen = featureHeader.length();
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
maxNameLen = Math.max(maxNameLen, descriptor.getName().length());
maxFeatureNameLen = Math.max(maxFeatureNameLen, descriptor.getSimpleFeatureName().length());
}
}
StringBuilder docs = new StringBuilder();
String format = "%" + maxNameLen + "s %" + maxFeatureNameLen + "s %s%n";
docs.append(String.format(format, nameHeader, featureHeader, docHeader));
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
String oneDoc = String.format(format,
descriptor.getName(),
descriptor.getSimpleFeatureName(),
GATKDocUtils.helpLinksToGATKDocs(descriptor.getCodecClass()));
docs.append(oneDoc);
}
}
return docs.toString();
}
/**

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -17,7 +17,7 @@ import java.util.Set;
* To change this template use File | Settings | File Templates.
*/
@Requires({DataSource.READS,DataSource.REFERENCE})
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class})
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {

View File

@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -18,7 +18,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.INTERVAL)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class})
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -44,7 +44,9 @@ import java.util.Set;
public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model {
/** The default model with the best performance in all cases */
EXACT,
/** For posterity we have kept around the older GRID_SEARCH model, but this gives inferior results and shouldn't be used. */
GRID_SEARCH
}

View File

@ -53,7 +53,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
}
public enum GENOTYPING_MODE {
/** the default; the Unified Genotyper will choose the most likely alternate allele */
DISCOVERY,
/** only the alleles passed in from a VCF rod bound to the -alleles argument will be used for genotyping */
GENOTYPE_GIVEN_ALLELES
}

View File

@ -36,31 +36,54 @@ import java.io.File;
public class UnifiedArgumentCollection {
// control the various models to be used
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
/**
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
*/
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
/**
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
*/
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY;
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
/**
* Specifies how to determine the alternate allele to use for genotyping
*/
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Should we output confident genotypes (i.e. including ref calls) or just the variants?", required = false)
public UnifiedGenotyperEngine.OUTPUT_MODE OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
/**
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
* confidence >= this threshold are emitted as called sites. A reasonable threshold is 30 for high-pass calling (this
* is the default). Note that the confidence (QUAL) values for multi-sample low-pass (e.g. 4x per sample) calling might
* be significantly smaller with the new EXACT model than with our older GRID_SEARCH model, as the latter tended to
* over-estimate the confidence; for low-pass calling we tend to use much smaller thresholds (e.g. 4).
*/
@Argument(fullName = "standard_min_confidence_threshold_for_calling", shortName = "stand_call_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be called", required = false)
public double STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
/**
* the minimum phred-scaled Qscore threshold to emit low confidence calls. Genotypes with confidence >= this but less
* than the calling threshold are emitted but marked as filtered.
*/
@Argument(fullName = "standard_min_confidence_threshold_for_emitting", shortName = "stand_emit_conf", doc = "The minimum phred-scaled confidence threshold at which variants not at 'trigger' track sites should be emitted (and filtered if less than the calling threshold)", required = false)
public double STANDARD_CONFIDENCE_FOR_EMITTING = 30.0;
/**
* This argument is not enabled by default because it increases the runtime by an appreciable amount.
*/
@Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false)
public boolean COMPUTE_SLOD = false;
@ -80,7 +103,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "abort_at_too_much_coverage", doc = "Don't call a site if the downsampled coverage is greater than this value", required = false)
public int COVERAGE_AT_WHICH_TO_ABORT = -1;
// control the various parameters to be used
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
public int MIN_BASE_QUALTY_SCORE = 17;
@ -91,11 +113,17 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
public Double MAX_DELETION_FRACTION = 0.05;
// indel-related arguments
/**
* A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site.
* Decreasing this value will increase sensitivity but at the cost of larger calling time and a larger number of false positives.
*/
@Argument(fullName = "min_indel_count_for_genotyping", shortName = "minIndelCnt", doc = "Minimum number of consensus indels required to trigger genotyping run", required = false)
public int MIN_INDEL_COUNT_FOR_GENOTYPING = 5;
/**
* This argument informs the prior probability of having an indel at a site.
*/
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
public double INDEL_HETEROZYGOSITY = 1.0/8000;
@ -126,22 +154,23 @@ public class UnifiedArgumentCollection {
@Hidden
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
@Hidden
@Argument(fullName = "dovit", shortName = "dovit", doc = "Output indel debug info", required = false)
public boolean dovit = false;
@Hidden
@Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
public boolean GSA_PRODUCTION_ONLY = false;
@Hidden
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
@Hidden
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
public boolean IGNORE_SNP_ALLELES = false;
@Deprecated
@Argument(fullName="output_all_callable_bases", shortName="all_bases", doc="Please use --output_mode EMIT_ALL_SITES instead" ,required=false)
private Boolean ALL_BASES_DEPRECATED = false;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
@ -45,13 +45,73 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.*;
/**
* A variant caller which unifies the approaches of several disparate callers. Works for single-sample and
* multi-sample data. The user can choose from several different incorporated calculation models.
* A variant caller which unifies the approaches of several disparate callers -- Works for single-sample and multi-sample data.
*
* <p>
* The GATK Unified Genotyper is a multiple-sample, technology-aware SNP and indel caller. It uses a Bayesian genotype
* likelihood model to estimate simultaneously the most likely genotypes and allele frequency in a population of N samples,
* emitting an accurate posterior probability of there being a segregating variant allele at each locus as well as for the
* genotype of each sample. The system can either emit just the variant sites or complete genotypes (which includes
* homozygous reference calls) satisfying some phred-scaled confidence value. The genotyper can make accurate calls on
* both single sample data and multi-sample data.
*
* <h2>Input</h2>
* <p>
* The read data from which to make variant calls.
* </p>
*
* <h2>Output</h2>
* <p>
* A raw, unfiltered, highly specific callset in VCF format.
* </p>
*
* <h2>Example generic command for multi-sample SNP calling</h2>
* <pre>
* java -jar GenomeAnalysisTK.jar \
* -R resources/Homo_sapiens_assembly18.fasta \
* -T UnifiedGenotyper \
* -I sample1.bam [-I sample2.bam ...] \
* --dbsnp dbSNP.vcf \
* -o snps.raw.vcf \
* -stand_call_conf [50.0] \
* -stand_emit_conf 10.0 \
* -dcov [50] \
* [-L targets.interval_list]
* </pre>
*
* <p>
* The above command will call all of the samples in your provided BAM files [-I arguments] together and produce a VCF file
* with sites and genotypes for all samples. The easiest way to get the dbSNP file is from the GATK resource bundle. Several
* arguments have parameters that should be chosen based on the average coverage per sample in your data. See the detailed
* argument descriptions below.
* </p>
*
* <h2>Example command for generating calls at all sites</h2>
* <pre>
* java -jar /path/to/GenomeAnalysisTK.jar \
* -l INFO \
* -R resources/Homo_sapiens_assembly18.fasta \
* -T UnifiedGenotyper \
* -I /DCC/ftp/pilot_data/data/NA12878/alignment/NA12878.SLX.maq.SRP000031.2009_08.bam \
* -o my.vcf \
* --output_mode EMIT_ALL_SITES
* </pre>
*
* <h2>Caveats</h2>
* <ul>
* <li>The system is under active and continuous development. All outputs, the underlying likelihood model, arguments, and
* file formats are likely to change.</li>
* <li>The system can be very aggressive in calling variants. In the 1000 genomes project for pilot 2 (deep coverage of ~35x)
* we expect the raw Qscore > 50 variants to contain at least ~10% FP calls. We use extensive post-calling filters to eliminate
* most of these FPs. Variant Quality Score Recalibration is a tool to perform this filtering.</li>
* <li>We only handle diploid genotypes</li>
* </ul>
*
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableReadFilter.class} )
@ReadFilters( {BadMateFilter.class, MappingQualityUnavailableFilter.class} )
@Reference(window=@Window(start=-200,stop=200))
@By(DataSource.REFERENCE)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
@ -61,10 +121,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
/**
* A dbSNP VCF file from which to annotate.
*
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
*/
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
* dbSNP is not used in any way for the calculations themselves.
*/
@ArgumentCollection
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
@ -72,7 +131,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
// control the output
/**
* A raw, unfiltered, highly specific callset in VCF format.
*/
@Output(doc="File to which variants should be written",required=true)
protected VCFWriter writer = null;
@ -82,9 +143,15 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print any relevant callability metrics output", required = false)
protected PrintStream metricsWriter = null;
/**
* Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
*/
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected List<String> annotationsToUse = new ArrayList<String>();
/**
* Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups.
*/
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
protected String[] annotationClassesToUse = { "Standard" };

View File

@ -51,8 +51,11 @@ public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
public enum OUTPUT_MODE {
/** the default */
EMIT_VARIANTS_ONLY,
/** include confident reference sites */
EMIT_ALL_CONFIDENT_SITES,
/** any callable site regardless of confidence */
EMIT_ALL_SITES
}

View File

@ -178,6 +178,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
* will only proceed with the realignment (even above the given threshold) if it minimizes entropy among the reads (and doesn't simply
* push the mismatch column to another position). This parameter is just a heuristic and should be adjusted based on your particular data set.
*/
@Advanced
@Argument(fullName="entropyThreshold", shortName="entropy", doc="percentage of mismatches at a locus to be considered having high entropy", required=false)
protected double MISMATCH_THRESHOLD = 0.15;
@ -185,30 +186,35 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
* For expert users only! To minimize memory consumption you can lower this number (but then the tool may skip realignment on regions with too much coverage;
* and if the number is too low, it may generate errors during realignment). Just make sure to give Java enough memory! 4Gb should be enough with the default value.
*/
@Advanced
@Argument(fullName="maxReadsInMemory", shortName="maxInMemory", doc="max reads allowed to be kept in memory at a time by the SAMFileWriter", required=false)
protected int MAX_RECORDS_IN_MEMORY = 150000;
/**
* For expert users only!
*/
@Advanced
@Argument(fullName="maxIsizeForMovement", shortName="maxIsize", doc="maximum insert size of read pairs that we attempt to realign", required=false)
protected int MAX_ISIZE_FOR_MOVEMENT = 3000;
/**
* For expert users only!
*/
@Advanced
@Argument(fullName="maxPositionalMoveAllowed", shortName="maxPosMove", doc="maximum positional move in basepairs that a read can be adjusted during realignment", required=false)
protected int MAX_POS_MOVE_ALLOWED = 200;
/**
* For expert users only! If you need to find the optimal solution regardless of running time, use a higher number.
*/
@Advanced
@Argument(fullName="maxConsensuses", shortName="maxConsensuses", doc="max alternate consensuses to try (necessary to improve performance in deep coverage)", required=false)
protected int MAX_CONSENSUSES = 30;
/**
* For expert users only! If you need to find the optimal solution regardless of running time, use a higher number.
*/
@Advanced
@Argument(fullName="maxReadsForConsensuses", shortName="greedy", doc="max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)", required=false)
protected int MAX_READS_FOR_CONSENSUSES = 120;
@ -216,9 +222,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
* For expert users only! If this value is exceeded at a given interval, realignment is not attempted and the reads are passed to the output file(s) as-is.
* If you need to allow more reads (e.g. with very deep coverage) regardless of memory, use a higher number.
*/
@Advanced
@Argument(fullName="maxReadsForRealignment", shortName="maxReads", doc="max reads allowed at an interval for realignment", required=false)
protected int MAX_READS = 20000;
@Advanced
@Argument(fullName="noOriginalAlignmentTags", shortName="noTags", required=false, doc="Don't output the original cigar or alignment start tags for each realigned read in the output bam")
protected boolean NO_ORIGINAL_ALIGNMENT_TAGS = false;
@ -226,6 +234,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
* For expert users only! This tool assumes that the target interval list is sorted; if the list turns out to be unsorted, it will throw an exception.
* Use this argument when your interval list is not sorted to instruct the Realigner to first sort it in memory.
*/
@Advanced
@Argument(fullName="targetIntervalsAreNotSorted", shortName="targetNotSorted", required=false, doc="The target intervals are not sorted")
protected boolean TARGET_NOT_SORTED = false;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.BadCigarFilter;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
@ -98,7 +98,7 @@ import java.util.List;
*
* @author ebanks
*/
@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, BadCigarFilter.class})
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, BadCigarFilter.class})
@Reference(window=@Window(start=-1,stop=50))
@Allows(value={DataSource.READS, DataSource.REFERENCE})
@By(DataSource.REFERENCE)

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
@ -78,7 +78,7 @@ import java.util.*;
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
*/
@ReadFilters({Platform454Filter.class, MappingQualityZeroReadFilter.class, PlatformUnitFilter.class})
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
// @Output
// PrintStream out;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.BaseUtils;
@ -91,7 +91,7 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
@By(DataSource.READS)
// Filter out all reads with zero mapping quality
@ReadFilters({MappingQualityZeroReadFilter.class})
@ReadFilters({MappingQualityZeroFilter.class})
public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
private static final boolean DEBUG = false;

View File

@ -29,8 +29,8 @@ import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.BaseUtils;
@ -68,6 +68,8 @@ import java.util.Map;
*
* <h2>Input</h2>
* <p>
* The input read data whose base quality scores need to be assessed.
* <p>
* A database of known polymorphic sites to skip over.
* </p>
*
@ -95,7 +97,7 @@ import java.util.Map;
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By( DataSource.READS ) // Only look at covered loci, not every loci of the reference file
@ReadFilters( {MappingQualityZeroReadFilter.class, MappingQualityUnavailableReadFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
@ReadFilters( {MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class} ) // Filter out all reads with zero or unavailable mapping quality
@Requires( {DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES} ) // This walker requires both -I input.bam and -R reference.fasta
@PartitionBy(PartitionType.LOCUS)
public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.CountedData, CountCovariatesWalker.CountedData> implements TreeReducible<CountCovariatesWalker.CountedData> {
@ -134,6 +136,10 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
@Argument(fullName="list", shortName="ls", doc="List the available covariates and exit", required=false)
private boolean LIST_ONLY = false;
/**
* See the -list argument to view available covariates.
*/
@Argument(fullName="covariate", shortName="cov", doc="Covariates to be used in the recalibration. Each covariate is given as a separate cov parameter. ReadGroup and ReportedQuality are required covariates and are already added for you.", required=false)
private String[] COVARIATES = null;
@Argument(fullName="standard_covs", shortName="standard", doc="Use the standard set of covariates in addition to the ones listed using the -cov argument", required=false)

View File

@ -66,6 +66,8 @@ import java.util.regex.Pattern;
*
* <h2>Input</h2>
* <p>
* The input read data whose base quality scores need to be recalibrated.
* <p>
* The recalibration table file in CSV format that was generated by the CountCovariates walker.
* </p>
*

View File

@ -157,6 +157,10 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
*/
@Argument(fullName="target_titv", shortName="titv", doc="The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on the optimization curve output figures. (approx 2.15 for whole genome experiments). ONLY USED FOR PLOTTING PURPOSES!", required=false)
private double TARGET_TITV = 2.15;
/**
* See the input VCF file's INFO field for a list of all available annotations.
*/
@Argument(fullName="use_annotation", shortName="an", doc="The names of the annotations which should used for calculations", required=true)
private String[] USE_ANNOTATIONS = null;

View File

@ -224,4 +224,14 @@ public class JVMUtils {
throw new StingException("Unknown type: " + type + " (" + type.getClass().getName() + ")");
}
}
public static Class getParameterizedTypeClass(Type t) {
if ( t instanceof ParameterizedType ) {
ParameterizedType parameterizedType = (ParameterizedType)t;
if ( parameterizedType.getActualTypeArguments().length != 1 )
throw new ReviewedStingException("BUG: more than 1 generic type found on class" + t);
return (Class)parameterizedType.getActualTypeArguments()[0];
} else
throw new ReviewedStingException("BUG: could not find generic type on class " + t);
}
}

View File

@ -1,159 +0,0 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.completegenomics;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.HashMap;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* a codec for the VAR file types produced by the Complete Genomics Institute
*/
public class CGVarCodec implements FeatureCodec {
private static final String REF_TYPE = "ref";
private static final String SNP_TYPE = "snp";
private static final String DELETION_TYPE = "del";
private static final String INSERTION_TYPE = "ins";
private static final String SUBSTITUTION_TYPE = "sub";
// the minimum number of features in the CG file line
private static final int minimumFeatureCount = 8;
/**
* decode the location only
* @param line the input line to decode
* @return a HapMapFeature
*/
public Feature decodeLoc(String line) {
return decode(line);
}
/**
* decode the CG record
* @param line the input line to decode
* @return a VariantContext
*/
public Feature decode(String line) {
String[] array = line.split("\\s+");
// make sure the split was successful - that we got an appropriate number of fields
if ( array.length < minimumFeatureCount )
return null;
String type = array[6];
long start = Long.valueOf(array[4]);
long end;
Allele ref, alt = null;
//System.out.println(line);
if ( type.equals(SNP_TYPE) ) {
ref = Allele.create(array[7], true);
alt = Allele.create(array[8], false);
end = start;
} else if ( type.equals(INSERTION_TYPE) ) {
ref = Allele.create(Allele.NULL_ALLELE_STRING, true);
alt = Allele.create(array[7], false);
end = start;
} else if ( type.equals(DELETION_TYPE) ) {
ref = Allele.create(array[7], true);
alt = Allele.create(Allele.NULL_ALLELE_STRING, false);
end = start + ref.length();
//} else if ( type.equals(REF_TYPE) ) {
// ref = Allele.create("N", true); // ref bases aren't accurate
// start++;
// end = start;
//} else if ( type.equals(SUBSTITUTION_TYPE) ) {
// ref = Allele.create(array[7], true);
// alt = Allele.create(array[8], false);
// end = start + Math.max(ref.length(), alt.length());
} else {
return null; // we don't handle other types
}
HashSet<Allele> alleles = new HashSet<Allele>();
alleles.add(ref);
if ( alt != null )
alleles.add(alt);
HashMap<String, Object> attrs = new HashMap<String, Object>();
String id = array[array.length - 1];
if ( id.indexOf("dbsnp") != -1 ) {
attrs.put(VariantContext.ID_KEY, parseID(id));
}
// create a new feature given the array
return new VariantContext("CGI", array[3], start, end, alleles, VariantContext.NO_NEG_LOG_10PERROR, null, attrs);
}
public Class<VariantContext> getFeatureType() {
return VariantContext.class;
}
// There's no spec and no character to distinguish header lines...
private final static int NUM_HEADER_LINES = 12;
public Object readHeader(LineReader reader) {
return null;
//String headerLine = null;
//try {
// for (int i = 0; i < NUM_HEADER_LINES; i++)
// headerLine = reader.readLine();
//} catch (IOException e) {
// throw new IllegalArgumentException("Unable to read a line from the line reader");
//}
//return headerLine;
}
private static final Pattern DBSNP_PATTERN = Pattern.compile("^dbsnp\\.\\d+:(.*)");
private String parseID(String raw) {
StringBuilder sb = null;
String[] ids = raw.split(";");
for ( String id : ids ) {
Matcher matcher = DBSNP_PATTERN.matcher(id);
if ( matcher.matches() ) {
String rsID = matcher.group(1);
if ( sb == null ) {
sb = new StringBuilder(rsID);
} else {
sb.append(";");
sb.append(rsID);
}
}
}
return sb == null ? null : sb.toString();
}
}

View File

@ -1,209 +0,0 @@
package org.broadinstitute.sting.utils.codecs.soapsnp;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* @author depristo
* <p/>
* a codec for parsing soapsnp files (see http://soap.genomics.org.cn/soapsnp.html#usage2)
* <p/>
*
* A simple text file format with the following whitespace separated fields:
*
1) Chromosome ID
2) Coordinate on chromosome, start from 1
3) Reference genotype
4) Consensus genotype
5) Quality score of consensus genotype
6) Best base
7) Average quality score of best base
8) Count of uniquely mapped best base
9) Count of all mapped best base
10) Second best bases
11) Average quality score of second best base
12) Count of uniquely mapped second best base
13) Count of all mapped second best base
14) Sequencing depth of the site
15) Rank sum test p_value
16) Average copy number of nearby region
17) Whether the site is a dbSNP.
*/
public class SoapSNPCodec implements FeatureCodec, NameAwareCodec {
private String[] parts;
// we store a name to give to each of the variant contexts we emit
private String name = "Unknown";
public Feature decodeLoc(String line) {
return decode(line);
}
/**
* Decode a line as a Feature.
*
* @param line
*
* @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is
* a comment)
*/
public Feature decode(String line) {
try {
// parse into lines
parts = line.trim().split("\\s+");
// check that we got the correct number of tokens in the split
if (parts.length != 18)
throw new CodecLineParsingException("Invalid SoapSNP row found -- incorrect element count. Expected 18, got " + parts.length + " line = " + line);
String contig = parts[0];
long start = Long.valueOf(parts[1]);
AlleleAndGenotype allelesAndGenotype = parseAlleles(parts[2], parts[3], line);
double negLog10PError = Integer.valueOf(parts[4]) / 10.0;
Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put("BestBaseQ", parts[6]);
attributes.put("SecondBestBaseQ", parts[10]);
attributes.put("RankSumP", parts[15]);
// add info to keys
//System.out.printf("Alleles = " + allelesAndGenotype.alleles);
//System.out.printf("genotype = " + allelesAndGenotype.genotype);
VariantContext vc = new VariantContext(name, contig, start, start, allelesAndGenotype.alleles, allelesAndGenotype.genotype, negLog10PError, VariantContext.PASSES_FILTERS, attributes);
//System.out.printf("line = %s%n", line);
//System.out.printf("vc = %s%n", vc);
return vc;
} catch (CodecLineParsingException e) {
throw new TribbleException("Unable to parse line " + line,e);
} catch (NumberFormatException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
throw new TribbleException("Unable to parse line " + line,e);
}
}
private static class AlleleAndGenotype {
Collection<Allele> alleles;
Collection<Genotype> genotype;
public AlleleAndGenotype(Collection<Allele> alleles, Genotype genotype) {
this.alleles = alleles;
this.genotype = new HashSet<Genotype>();
this.genotype.add(genotype);
}
}
private AlleleAndGenotype parseAlleles(String ref, String consensusGenotype, String line) {
/* A Adenine
C Cytosine
G Guanine
T (or U) Thymine (or Uracil)
R A or G
Y C or T
S G or C
W A or T
K G or T
M A or C
B C or G or T
D A or G or T
H A or C or T
V A or C or G
N any base
. or - gap
*/
if ( ref.equals(consensusGenotype) )
throw new TribbleException.InternalCodecException("Ref base and consensus genotype are the same " + ref);
Allele refAllele = Allele.create(ref, true);
List<Allele> genotypeAlleles = null;
char base = consensusGenotype.charAt(0);
switch ( base ) {
case 'A': case 'C': case 'G': case 'T':
Allele a = Allele.create(consensusGenotype);
genotypeAlleles = Arrays.asList(a, a);
break;
case 'R': case 'Y': case 'S': case 'W': case 'K': case 'M':
genotypeAlleles = determineAlt(refAllele, ref.charAt(0), base);
break;
default:
throw new TribbleException("Unexpected consensus genotype " + consensusGenotype + " at line = " + line);
}
Collection<Allele> alleles = new HashSet<Allele>(genotypeAlleles);
alleles.add(refAllele);
Genotype genotype = new Genotype("unknown", genotypeAlleles); // todo -- probably should include genotype quality
return new AlleleAndGenotype( alleles, genotype );
}
private static final Map<Character, String> IUPAC_SNPS = new HashMap<Character, String>();
static {
IUPAC_SNPS.put('R', "AG");
IUPAC_SNPS.put('Y', "CT");
IUPAC_SNPS.put('S', "GC");
IUPAC_SNPS.put('W', "AT");
IUPAC_SNPS.put('K', "GT");
IUPAC_SNPS.put('M', "AC");
}
private List<Allele> determineAlt(Allele ref, char refbase, char alt) {
String alts = IUPAC_SNPS.get(alt);
if ( alts == null )
throw new IllegalStateException("BUG: unexpected consensus genotype " + alt);
Allele a1 = alts.charAt(0) == refbase ? ref : Allele.create((byte)alts.charAt(0));
Allele a2 = alts.charAt(1) == refbase ? ref : Allele.create((byte)alts.charAt(1));
//if ( a1 != ref && a2 != ref )
// throw new IllegalStateException("BUG: unexpected consensus genotype " + alt + " does not contain the reference base " + ref);
return Arrays.asList(a1, a2);
}
/**
* @return VariantContext
*/
public Class<VariantContext> getFeatureType() {
return VariantContext.class;
}
public Object readHeader(LineReader reader) {
return null; // we don't have a meaningful header
}
/**
* get the name of this codec
* @return our set name
*/
public String getName() {
return name;
}
/**
* set the name of this codec
* @param name new name
*/
public void setName(String name) {
this.name = name;
}
public static void main(String[] args) {
System.out.printf("Testing " + args[0]);
}
}

View File

@ -39,6 +39,5 @@ public @interface DocumentedGATKFeature {
public boolean enable() default true;
public String groupName();
public String summary() default "";
public Class<? extends DocumentedGATKFeatureHandler> handler() default GenericDocumentationHandler.class;
public Class[] extraDocs() default {};
}

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.help;
/**
* Documentation unit. Effectively a class version of the DocumentedGATKFeature
*
* @author depristo
*/
class DocumentedGATKFeatureObject {
private final Class classToDoc;
private final boolean enable;
private final String groupName, summary;
private final Class[] extraDocs;
public DocumentedGATKFeatureObject(Class classToDoc, final boolean enable, final String groupName, final String summary, final Class[] extraDocs) {
this.classToDoc = classToDoc;
this.enable = enable;
this.groupName = groupName;
this.summary = summary;
this.extraDocs = extraDocs;
}
public DocumentedGATKFeatureObject(Class classToDoc, final String groupName, final String summary) {
this(classToDoc, true, groupName, summary, new Class[]{});
}
public Class getClassToDoc() { return classToDoc; }
public boolean enable() { return enable; }
public String groupName() { return groupName; }
public String summary() { return summary; }
public Class[] extraDocs() { return extraDocs; }
}

View File

@ -36,9 +36,9 @@ public class GATKDocUtils {
public static String helpLinksToGATKDocs(Class c) {
String classPath = htmlFilenameForClass(c);
StringBuilder b = new StringBuilder();
b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n");
b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n");
b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n");
b.append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath);
//b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n");
//b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n");
return b.toString();
}
}

View File

@ -51,7 +51,7 @@ class GATKDocWorkUnit implements Comparable<GATKDocWorkUnit> {
/** The javadoc documentation for clazz */
final ClassDoc classDoc;
/** The annotation that lead to this Class being in GATKDoc */
final DocumentedGATKFeature annotation;
final DocumentedGATKFeatureObject annotation;
/** When was this walker built, and what's the absolute version number */
final String buildTimestamp, absoluteVersion;
@ -60,7 +60,7 @@ class GATKDocWorkUnit implements Comparable<GATKDocWorkUnit> {
Map<String, Object> forTemplate;
public GATKDocWorkUnit(String name, String filename, String group,
DocumentedGATKFeature annotation, DocumentedGATKFeatureHandler handler,
DocumentedGATKFeatureObject annotation, DocumentedGATKFeatureHandler handler,
ClassDoc classDoc, Class clazz,
String buildTimestamp, String absoluteVersion) {
this.annotation = annotation;

View File

@ -33,6 +33,7 @@ import freemarker.template.TemplateException;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.*;
@ -50,6 +51,14 @@ public class GATKDoclet {
RootDoc rootDoc;
final static Collection<DocumentedGATKFeatureObject> STATIC_DOCS = new ArrayList<DocumentedGATKFeatureObject>();
static {
STATIC_DOCS.add(new DocumentedGATKFeatureObject(FeatureCodec.class,
"Reference ordered data (ROD) codecs",
"Tribble codecs for reading reference ordered data such as VCF or BED files"));
}
/**
* Extracts the contents of certain types of javadoc and adds them to an XML file.
* @param rootDoc The documentation root.
@ -99,7 +108,7 @@ public class GATKDoclet {
//if ( clazz != null && clazz.getName().equals("org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance"))
// logger.debug("foo");
DocumentedGATKFeature feature = getFeatureForClassDoc(doc);
DocumentedGATKFeatureObject feature = getFeatureForClassDoc(doc);
DocumentedGATKFeatureHandler handler = createHandler(doc, feature);
if ( handler != null && handler.includeInDocs(doc) ) {
logger.info("Generating documentation for class " + doc);
@ -146,33 +155,36 @@ public class GATKDoclet {
}
}
private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeature feature) {
try {
if ( feature != null ) {
if ( feature.enable() ) {
DocumentedGATKFeatureHandler handler = feature.handler().newInstance();
handler.setDoclet(this);
return handler;
} else {
logger.info("Skipping disabled Documentation for " + doc);
}
private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeatureObject feature) {
if ( feature != null ) {
if ( feature.enable() ) {
DocumentedGATKFeatureHandler handler = new GenericDocumentationHandler();
handler.setDoclet(this);
return handler;
} else {
logger.info("Skipping disabled Documentation for " + doc);
}
} catch ( IllegalAccessException e) {
throw new RuntimeException(e); // the constructor is now private -- this is an error
} catch ( InstantiationException e) {
throw new RuntimeException(e); // the constructor is now private -- this is an error
}
return null;
}
private DocumentedGATKFeature getFeatureForClassDoc(ClassDoc doc) {
// todo -- what do I need the ? extends Object to pass the compiler?
private DocumentedGATKFeatureObject getFeatureForClassDoc(ClassDoc doc) {
Class<? extends Object> docClass = getClassForClassDoc(doc);
if ( docClass != null && docClass.isAnnotationPresent(DocumentedGATKFeature.class) ) {
return docClass.getAnnotation(DocumentedGATKFeature.class);
} else {
if ( docClass == null )
return null; // not annotated so it shouldn't be documented
if ( docClass.isAnnotationPresent(DocumentedGATKFeature.class) ) {
DocumentedGATKFeature f = docClass.getAnnotation(DocumentedGATKFeature.class);
return new DocumentedGATKFeatureObject(docClass, f.enable(), f.groupName(), f.summary(), f.extraDocs());
} else {
for ( DocumentedGATKFeatureObject staticDocs : STATIC_DOCS ) {
if ( staticDocs.getClassToDoc().isAssignableFrom(docClass) ) {
return new DocumentedGATKFeatureObject(docClass, staticDocs.enable(), staticDocs.groupName(), staticDocs.summary(), staticDocs.extraDocs());
}
}
return null;
}
}
@ -217,16 +229,15 @@ public class GATKDoclet {
Collections.sort(indexData);
Set<DocumentedGATKFeature> docFeatures = new HashSet<DocumentedGATKFeature>();
List<Map<String, String>> groups = new ArrayList<Map<String, String>>();
Set<String> seenDocumentationFeatures = new HashSet<String>();
List<Map<String, String>> data = new ArrayList<Map<String, String>>();
for ( GATKDocWorkUnit workUnit : indexData ) {
data.add(workUnit.indexDataMap());
docFeatures.add(workUnit.annotation);
}
List<Map<String, String>> groups = new ArrayList<Map<String, String>>();
for ( DocumentedGATKFeature feature : docFeatures ) {
groups.add(toMap(feature));
if ( ! seenDocumentationFeatures.contains(workUnit.annotation.groupName()) ) {
groups.add(toMap(workUnit.annotation));
seenDocumentationFeatures.add(workUnit.annotation.groupName());
}
}
root.put("data", data);
@ -237,7 +248,7 @@ public class GATKDoclet {
return root;
}
private static final Map<String, String> toMap(DocumentedGATKFeature annotation) {
private static final Map<String, String> toMap(DocumentedGATKFeatureObject annotation) {
Map<String, String> root = new HashMap<String, String>();
root.put("name", annotation.groupName());
root.put("summary", annotation.summary());

View File

@ -30,14 +30,18 @@ import com.sun.javadoc.FieldDoc;
import com.sun.javadoc.RootDoc;
import com.sun.javadoc.Tag;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import java.io.*;
import java.lang.reflect.Field;
import java.lang.reflect.*;
import java.util.*;
/**
@ -52,13 +56,13 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
@Override
public boolean includeInDocs(ClassDoc doc) {
return true;
// try {
// Class type = HelpUtils.getClassForDoc(doc);
// return JVMUtils.isConcrete(type);
// } catch ( ClassNotFoundException e ) {
// return false;
// }
// return true;
try {
Class type = HelpUtils.getClassForDoc(doc);
return JVMUtils.isConcrete(type);
} catch ( ClassNotFoundException e ) {
return false;
}
}
@ -107,13 +111,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
// attempt to instantiate the class
Object instance = makeInstanceIfPossible(toProcess.clazz);
Map<String, List<Object>> args = new HashMap<String, List<Object>>();
Map<String, List<Map<String, Object>>> args = new HashMap<String, List<Map<String, Object>>>();
root.put("arguments", args);
args.put("all", new ArrayList<Object>());
args.put("required", new ArrayList<Object>());
args.put("optional", new ArrayList<Object>());
args.put("hidden", new ArrayList<Object>());
args.put("depreciated", new ArrayList<Object>());
args.put("all", new ArrayList<Map<String, Object>>());
args.put("required", new ArrayList<Map<String, Object>>());
args.put("optional", new ArrayList<Map<String, Object>>());
args.put("advanced", new ArrayList<Map<String, Object>>());
args.put("hidden", new ArrayList<Map<String, Object>>());
args.put("depreciated", new ArrayList<Map<String, Object>>());
try {
for ( ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(classdoc)) ) {
ArgumentDefinition argDef = argumentSource.createArgumentDefinitions().get(0);
@ -123,6 +128,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
logger.debug(String.format("Processing %s", argumentSource));
String kind = "optional";
if ( argumentSource.isRequired() ) kind = "required";
else if ( argumentSource.isAdvanced() ) kind = "advanced";
else if ( argumentSource.isHidden() ) kind = "hidden";
else if ( argumentSource.isDeprecated() ) kind = "depreciated";
@ -149,11 +155,37 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
logger.debug(String.format("Skipping hidden feature %s", argumentSource));
}
}
// sort the arguments
for (Map.Entry<String,List<Map<String, Object>>> entry : args.entrySet()) {
entry.setValue(sortArguments(entry.getValue()));
}
} catch ( ClassNotFoundException e ) {
throw new RuntimeException(e);
}
}
private List<Map<String, Object>> sortArguments(List<Map<String, Object>> unsorted) {
Collections.sort(unsorted, new CompareArgumentsByName());
return unsorted;
}
private class CompareArgumentsByName implements Comparator<Map<String, Object>> {
public int compare(Map<String, Object> x, Map<String, Object> y) {
return elt(x).compareTo(elt(y));
}
private String elt(Map<String, Object> m) {
String v = m.get("name").toString().toLowerCase();
if ( v.startsWith("--") )
return v.substring(2);
else if ( v.startsWith("-") )
return v.substring(1);
else
throw new RuntimeException("Expect to see arguments beginning with at least one -, but found " + v);
}
}
private Object getFieldValue(Class c, Object instance, String fieldName) {
Field field = JVMUtils.findField(c, fieldName);
if ( field != null ) {
@ -255,20 +287,6 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
put("name", otherUnit.name);}});
}
List<Map<String, Object>> hierarchyDocs = new ArrayList<Map<String, Object>>();
for (final GATKDocWorkUnit other : all ) {
final String relation = classRelationship(toProcess.clazz, other.clazz);
if ( relation != null )
hierarchyDocs.add(
new HashMap<String, Object>(){{
put("filename", other.filename);
put("relation", relation);
put("name", other.name);}});
}
root.put("relatedDocs", hierarchyDocs);
root.put("extradocs", extraDocsData);
}
@ -309,6 +327,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
return fieldDoc;
Field field = HelpUtils.getFieldForFieldDoc(fieldDoc);
if ( field == null )
throw new RuntimeException("Could not find the field corresponding to " + fieldDoc + ", presumably because the field is inaccessible");
if ( field.isAnnotationPresent(ArgumentCollection.class) ) {
ClassDoc typeDoc = getRootDoc().classNamed(fieldDoc.type().qualifiedTypeName());
if ( typeDoc == null )
@ -333,15 +353,82 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
return null;
}
private static final int MAX_DISPLAY_NAME = 30;
Pair<String, String> displayNames(String s1, String s2) {
if ( s1 == null ) return new Pair<String, String>(s2, null);
if ( s2 == null ) return new Pair<String, String>(s1, null);
String l = s1.length() > s2.length() ? s1 : s2;
String s = s1.length() > s2.length() ? s2 : s1;
if ( l.length() > MAX_DISPLAY_NAME )
return new Pair<String, String>(s, l);
else
return new Pair<String, String>(l, s);
}
protected String argumentTypeString(Type type) {
if (type instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)type;
List<String> subs = new ArrayList<String>();
for (Type actualType: parameterizedType.getActualTypeArguments())
subs.add(argumentTypeString(actualType));
return argumentTypeString(((ParameterizedType)type).getRawType()) + "[" + Utils.join(",", subs) + "]";
} else if (type instanceof GenericArrayType) {
return argumentTypeString(((GenericArrayType)type).getGenericComponentType()) + "[]";
} else if (type instanceof WildcardType) {
throw new RuntimeException("We don't support wildcards in arguments: " + type);
} else if (type instanceof Class<?>) {
return ((Class) type).getSimpleName();
} else {
throw new StingException("Unknown type: " + type);
}
}
protected Class<? extends Feature> getFeatureTypeIfPossible(Type type) {
if ( type instanceof ParameterizedType) {
ParameterizedType paramType = (ParameterizedType)type;
if ( RodBinding.class.isAssignableFrom((Class<?>)paramType.getRawType()) ) {
return (Class<? extends Feature>)JVMUtils.getParameterizedTypeClass(type);
} else {
for ( Type paramtype : paramType.getActualTypeArguments() ) {
Class<? extends Feature> x = getFeatureTypeIfPossible(paramtype);
if ( x != null )
return x;
}
}
}
return null;
}
protected Map<String, Object> docForArgument(FieldDoc fieldDoc, ArgumentSource source, ArgumentDefinition def) {
Map<String, Object> root = new HashMap<String, Object>();
root.put("name", def.shortName != null ? "-" + def.shortName : "--" + def.fullName );
Pair<String, String> names = displayNames("-" + def.shortName, "--" + def.fullName);
if ( def.shortName != null && def.fullName != null )
root.put("synonyms", "--" + def.fullName);
root.put("name", names.getFirst() );
if ( names.getSecond() != null )
root.put("synonyms", names.getSecond());
root.put("required", def.required ? "yes" : "no");
root.put("type", def.argumentType.getSimpleName());
// type of the field
root.put("type", argumentTypeString(source.field.getGenericType()));
Class<? extends Feature> featureClass = getFeatureTypeIfPossible(source.field.getGenericType());
if ( featureClass != null ) {
// deal with the allowable types
FeatureManager manager = new FeatureManager();
List<String> rodTypes = new ArrayList<String>();
for (FeatureManager.FeatureDescriptor descriptor : manager.getByFeature(featureClass) ) {
rodTypes.add(String.format("<a href=%s>%s</a>",
GATKDocUtils.htmlFilenameForClass(descriptor.getCodecClass()),
descriptor.getName()));
}
root.put("rodTypes", Utils.join(", ", rodTypes));
}
// summary and fulltext
root.put("summary", def.doc != null ? def.doc : "");

View File

@ -156,12 +156,13 @@ public class ListFileUtils {
FeatureManager.FeatureDescriptor descriptor = builderForValidation.getByTriplet(triplet);
if ( descriptor == null )
throw new UserException.UnknownTribbleType(rodBinding.getTribbleType(),
String.format("Field %s had provided type %s but there's no such Tribble type. Available types are %s",
rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures()));
String.format("Field %s had provided type %s but there's no such Tribble type. The compatible types are: %n%s",
rodBinding.getName(), rodBinding.getTribbleType(), builderForValidation.userFriendlyListOfAvailableFeatures(rodBinding.getType())));
if ( ! rodBinding.getType().isAssignableFrom(descriptor.getFeatureClass()) )
throw new UserException.BadArgumentValue(rodBinding.getName(),
String.format("Field %s expected type %s, but the type of the input file provided on the command line was %s producing %s. Please make sure that you have provided the correct file type and/or that you are not binding your rod to a name matching one of the available types.",
rodBinding.getName(), rodBinding.getType(), descriptor.getName(), descriptor.getFeatureClass()));
String.format("Field %s expects Features of type %s, but the input file produces Features of type %s. The compatible types are: %n%s",
rodBinding.getName(), rodBinding.getType().getSimpleName(), descriptor.getSimpleFeatureName(),
builderForValidation.userFriendlyListOfAvailableFeatures(rodBinding.getType())));
rodBindings.add(triplet);

View File

@ -54,7 +54,8 @@ import java.util.*;
public class FeatureManagerUnitTest extends BaseTest {
private static final File RANDOM_FILE = new File(validationDataLocation + "exampleGATKReport.eval");
private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
private static final File VCF4_FILE = new File(validationDataLocation + "vcf4.1.example.vcf");
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
private FeatureManager manager;
private GenomeLocParser genomeLocParser;
@ -98,7 +99,8 @@ public class FeatureManagerUnitTest extends BaseTest {
}
public String toString() {
return String.format("FMTest name=%s codec=%s feature=%s file=%s", name, codec, feature, associatedFile);
return String.format("FMTest name=%s codec=%s feature=%s file=%s",
name, codec.getSimpleName(), feature.getSimpleName(), associatedFile);
}
}
@ -106,6 +108,7 @@ public class FeatureManagerUnitTest extends BaseTest {
public Object[][] createTests() {
new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ);
new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
return FMTest.getTests(FMTest.class);
}

View File

@ -38,21 +38,21 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
return "-T CombineVariants -NO_HEADER -L 1:1-50,000,000 -o %s -R " + b36KGReference + args;
}
public void test1InOut(String file, String md5, boolean vcf3) {
test1InOut(file, md5, "", vcf3);
public void test1InOut(String file, String md5) {
test1InOut(file, md5, "");
}
public void test1InOut(String file, String md5, String args, boolean vcf3) {
public void test1InOut(String file, String md5, String args) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -priority v1 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file + args),
baseTestString(" -priority v1 -V:v1 " + validationDataLocation + file + args),
1,
Arrays.asList(md5));
executeTest("testInOut1--" + file, spec);
}
public void combine2(String file1, String file2, String args, String md5, boolean vcf3) {
public void combine2(String file1, String file2, String args, String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -priority v1,v2 -V:v1,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file1 + " -V:v2,VCF" + (vcf3 ? "3 " : " ") + validationDataLocation + file2 + args),
baseTestString(" -priority v1,v2 -V:v1 " + validationDataLocation + file1 + " -V:v2 "+ validationDataLocation + file2 + args),
1,
Arrays.asList(md5));
executeTest("combine2 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
@ -78,23 +78,23 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
}
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0", true); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo", true); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null", true); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0", false); } // official project VCF files in tabix format
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0"); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo"); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0"); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b", false); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e", false); }
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f", false); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9", false); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a", false); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083", false); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c", true); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); }
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); }
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); }

Binary file not shown.

View File

@ -20,16 +20,21 @@
<#macro argumentDetails arg>
<h3><a name="${arg.name}">${arg.name}<#if arg.synonyms??> / ${arg.synonyms}</#if></a>
(<#if arg.attributes??>${arg.attributes} </#if>${arg.type}<#if arg.defaultValue??> with default value ${arg.defaultValue}</#if>)</h3>
${arg.summary}. ${arg.fulltext}<br>
<p class="args">
${arg.summary}. ${arg.fulltext}
<#if arg.rodTypes??>${arg.name} binds reference ordered data. This argument supports ROD files of the
following types: ${arg.rodTypes}</#if>
<#if arg.options??>
<p>The ${arg.name} argument is an enumerated type (${arg.type}), which can have one of the following values:</p>
<br>
The ${arg.name} argument is an enumerated type (${arg.type}), which can have one of the following values:
<dl class="enum">
<#list arg.options as option>
<dt>${option.name}
<dd>${option.summary}
<dt class="enum">${option.name}</dt>
<dd class="enum">${option.summary}</dd>
</#list>
</dl>
</#if>
</p>
</#macro>
<#macro relatedByType name type>
@ -77,6 +82,7 @@
<tbody>
<@argumentlist name="Required" myargs=arguments.required/>
<@argumentlist name="Optional" myargs=arguments.optional/>
<@argumentlist name="Advanced" myargs=arguments.advanced/>
<@argumentlist name="Hidden" myargs=arguments.hidden/>
<@argumentlist name="Depreciated" myargs=arguments.depreciated/>
</tbody>
@ -98,7 +104,7 @@
</#if>
<#-- This class is related to other documented classes via sub/super relationships -->
<#if relatedDocs?size != 0>
<#if relatedDocs?? && relatedDocs?size != 0>
<hr>
<h2>Related capabilities</h2>
<@relatedByType name="Superclasses" type="superclass"/>

View File

@ -42,6 +42,10 @@ p.version
text-align: center;
}
p.args
{
margin-left: 3em;
}
h1, h2, h3, h4
{
@ -81,17 +85,17 @@ hr
* enum DT layout
*/
dl {
border: 1px solid #ccc;
dl.enum {
margin-left: 3em;
border: 1px dashed #ccc;
}
dt {
dt.enum {
font-weight: bold;
text-decoration: underline;
}
dd {
margin: 0;
dd.enum {
padding: 0 0 0.5em 0;
}