diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index b9e380295..9b316f077 100755 --- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -28,11 +28,13 @@ package org.broadinstitute.sting.analyzecovariates; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.text.XReadLines; import java.io.*; @@ -42,19 +44,71 @@ import java.util.Map; import java.util.regex.Pattern; /** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: Dec 1, 2009 + * Call R scripts to plot residual error versus the various covariates. * - * Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates. + *
+ * After counting covariates in either the initial BAM File or again in the recalibrated BAM File, an analysis tool is available which + * reads the .csv file and outputs several PDF (and .dat) files for each read group in the given BAM. These PDF files graphically + * show the various metrics and characteristics of the reported quality scores (often in relation to the empirical qualities). + * In order to show that any biases in the reported quality scores have been generally fixed through recalibration one should run + * CountCovariates again on a bam file produced by TableRecalibration. In this way users can compare the analysis plots generated + * by pre-recalibration and post-recalibration .csv files. Our usual chain of commands that we use to generate plots of residual + * error is: CountCovariates, TableRecalibrate, samtools index on the recalibrated bam file, CountCovariates again on the recalibrated + * bam file, and then AnalyzeCovariates on both the before and after recal_data.csv files to see the improvement in recalibration. + * + *
+ * The color coding along with the RMSE is included in the plots to give some indication of the number of observations that went into + * each of the quality score estimates. It is defined as follows for N, the number of observations: + * + *
+ * NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options + * must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R, + * not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using + * this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball. + * For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout. + * + *
+ * See the GATK wiki for a tutorial and example recalibration accuracy plots. + * http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration + * + *
+ * The recalibration table file in CSV format that was generated by the CountCovariates walker. + *
+ * + *+ * java -Xmx4g -jar AnalyzeCovariates.jar \ + * -recalFile /path/to/recal.table.csv \ + * -outputDir /path/to/output_dir/ \ + * -resources resources/ \ + * -ignoreQ 5 + *+ * */ +@DocumentedGATKFeature( + groupName = "AnalyzeCovariates", + summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator") public class AnalyzeCovariates extends CommandLineProgram { ///////////////////////////// // Command Line Arguments ///////////////////////////// - + /** + * After the header, data records occur one per line until the end of the file. The first several items on a line are the + * values of the individual covariates and will change depending on which covariates were specified at runtime. The last + * three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches, + * and the raw empirical quality score calculated by phred-scaling the mismatch rate. + */ @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false) private String RECAL_FILE = "output.recal_data.csv"; @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) @@ -67,11 +121,20 @@ public class AnalyzeCovariates extends CommandLineProgram { private int IGNORE_QSCORES_LESS_THAN = 5; @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups + + /** + * Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation + * by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later. + */ @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50") private int MAX_QUALITY_SCORE = 50; + + /** + * This argument is useful for comparing before/after plots and you want the axes to match each other. + */ @Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots") private int MAX_HISTOGRAM_VALUE = 0; - @Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, this value will be the max value of the histogram plots") + @Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, do indel quality plotting") private boolean DO_INDEL_QUALITY = false; diff --git a/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java b/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java new file mode 100644 index 000000000..9350e4a66 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/package-info.java @@ -0,0 +1,4 @@ +/** + * Package to plot residual accuracy versus error covariates for the base quality score recalibrator. + */ +package org.broadinstitute.sting.analyzecovariates; \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index 7e96b609e..7c567f511 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -34,10 +34,7 @@ import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.walkers.Attribution; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.help.ApplicationDetails; -import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.help.GATKDocUtils; -import org.broadinstitute.sting.utils.help.GATKDoclet; +import org.broadinstitute.sting.utils.help.*; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.util.*; @@ -178,7 +175,7 @@ public class CommandLineGATK extends CommandLineExecutable { Formatter formatter = new Formatter(additionalHelp); formatter.format("For a full description of this walker, see its GATKdocs at:%n"); - formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType)); + formatter.format("%s%n", HelpUtils.helpLinksToGATKDocs(walkerType)); return additionalHelp.toString(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java index 198f7d7d3..ba6321121 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java @@ -59,8 +59,8 @@ public class LowMemoryIntervalSharder implements Iterator
+ * A very common question about a NGS set of reads is what areas of the genome are considered callable. The system + * considers the coverage at each locus and emits either a per base state or a summary interval BED file that + * partitions the genomic intervals into the following callable states: + *
+ * A BAM file containing exactly one sample. + *
+ * + *+ *
+ * -T CallableLociWalker \ + * -I my.bam \ + * -summary my.summary \ + * -o my.bed + *+ * + * would produce a BED file (my.bed) that looks like: + * + *
+ * 20 10000000 10000864 CALLABLE + * 20 10000865 10000985 POOR_MAPPING_QUALITY + * 20 10000986 10001138 CALLABLE + * 20 10001139 10001254 POOR_MAPPING_QUALITY + * 20 10001255 10012255 CALLABLE + * 20 10012256 10012259 POOR_MAPPING_QUALITY + * 20 10012260 10012263 CALLABLE + * 20 10012264 10012328 POOR_MAPPING_QUALITY + * 20 10012329 10012550 CALLABLE + * 20 10012551 10012551 LOW_COVERAGE + * 20 10012552 10012554 CALLABLE + * 20 10012555 10012557 LOW_COVERAGE + * 20 10012558 10012558 CALLABLE + * et cetera... + *+ * as well as a summary table that looks like: + * + *
+ * state nBases + * REF_N 0 + * CALLABLE 996046 + * NO_COVERAGE 121 + * LOW_COVERAGE 928 + * EXCESSIVE_COVERAGE 0 + * POOR_MAPPING_QUALITY 2906 + *+ * + * @author Mark DePristo + * @since May 7, 2010 */ @By(DataSource.REFERENCE) public class CallableLociWalker extends LocusWalker
+ * This walker is designed to work as the first pass in a two-pass processing step. It does a by-locus traversal operating + * only at sites that are not in dbSNP. We assume that all reference mismatches we see are therefore errors and indicative + * of poor base quality. This walker generates tables based on various user-specified covariates (such as read group, + * reported quality score, cycle, and dinucleotide). Since there is a large amount of data one can then calculate an empirical + * probability of error given the particular covariates seen at this site, where p(error) = num mismatches / num observations. + * The output file is a CSV list of (the several covariate values, num observations, num mismatches, empirical quality score). + *
+ * Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will be added for the user regardless of whether or not they were specified. * - * Note: ReadGroupCovariate and QualityScoreCovariate are required covariates and will be added for the user regardless of whether or not they were specified - * Note: This walker is designed to be used in conjunction with TableRecalibrationWalker. + *
+ * See the GATK wiki for a tutorial and example recalibration accuracy plots. + * http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration + * + *
+ * A database of known polymorphic sites to skip over. + *
+ * + *+ * A recalibration table file in CSV format that is used by the TableRecalibration walker. + *
+ * + *+ * java -Xmx4g -jar GenomeAnalysisTK.jar \ + * -R resources/Homo_sapiens_assembly18.fasta \ + * -knownSites bundle/hg18/dbsnp_132.hg18.vcf \ + * -knownSites another/optional/setOfSitesToMask.vcf \ + * -I my_reads.bam \ + * -T CountCovariates \ + * -cov ReadGroupCovariate \ + * -cov QualityScoreCovariate \ + * -cov CycleCovariate \ + * -cov DinucCovariate \ + * -recalFile my_reads.recal_data.csv + ** - * @author rpoplin - * @since Nov 3, 2009 */ @BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN) @@ -96,8 +121,13 @@ public class CountCovariatesWalker extends LocusWalker
+ * This walker is designed to work as the second pass in a two-pass processing step, doing a by-read traversal. For each + * base in each read this walker calculates various user-specified covariates (such as read group, reported quality score, + * cycle, and dinuc). Using these values as a key in a large hashmap the walker calculates an empirical base quality score + * and overwrites the quality score currently in the read. This walker then outputs a new bam file with these updated (recalibrated) reads. * - * For each base in each read this walker calculates various user-specified covariates (such as read group, reported quality score, cycle, and dinuc) - * Using these values as a key in a large hashmap the walker calculates an empirical base quality score and overwrites the quality score currently in the read. - * This walker then outputs a new bam file with these updated (recalibrated) reads. + *
+ * See the GATK wiki for a tutorial and example recalibration accuracy plots. + * http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration * - * Note: This walker expects as input the recalibration table file generated previously by CovariateCounterWalker. - * Note: This walker is designed to be used in conjunction with CovariateCounterWalker. + *
+ * The recalibration table file in CSV format that was generated by the CountCovariates walker. + *
+ * + *+ * A bam file in which the quality scores in each read have been recalibrated. + *
+ * + *+ * java -Xmx4g -jar GenomeAnalysisTK.jar \ + * -R resources/Homo_sapiens_assembly18.fasta \ + * -I my_reads.bam \ + * -T TableRecalibration \ + * -o my_reads.recal.bam \ + * -recalFile my_reads.recal_data.csv + ** - * @author rpoplin - * @since Nov 3, 2009 */ @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) @@ -79,24 +98,54 @@ public class TableRecalibrationWalker extends ReadWalker
but rather just write them out unmodified in the recalibrated BAM file. This is useful + * because Solexa writes Q2 and Q3 bases when the machine has really gone wrong. This would be fine in and of itself, + * but when you select a subset of these reads based on their ability to align to the reference and their dinucleotide effect, + * your Q2 and Q3 bins can be elevated to Q8 or Q10, leading to issues downstream. With the default value of 5, all Q0-Q4 bases + * are unmodified during recalibration, so they don't get inappropriately evaluated. + */ + @Argument(fullName="preserve_qscores_less_than", shortName="pQ", doc="Bases with quality scores less than this threshold won't be recalibrated. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false) private int PRESERVE_QSCORES_LESS_THAN = 5; - @Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points, default=1") + + /** + * By default TableRecalibration applies a Yates' correction to account for overfitting when it calculates the empirical + * quality score, in particular, ( # mismatches + 1 ) / ( # observations + 1 ). TableRecalibration accepts a --smoothing / -sm+ * argument which sets how many unobserved counts to add to every bin. Use --smoothing 0 to turn off all smoothing or, for example, + * --smoothing 15 for a large amount of smoothing. + */ + @Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points") private int SMOOTHING = 1; - @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default=50") + + /** + * Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation + * by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later. + */ + @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores") private int MAX_QUALITY_SCORE = 50; + + /** + * By default TableRecalibration emits the OQ field -- so you can go back and look at the original quality scores, rerun + * the system using the OQ flags, etc, on the output BAM files; to turn off emission of the OQ field use this flag. + */ @Argument(fullName="doNotWriteOriginalQuals", shortName="noOQs", required=false, doc="If true, we will not write the original quality (OQ) tag for each read") private boolean DO_NOT_WRITE_OQ = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java new file mode 100755 index 000000000..fc23200af --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java @@ -0,0 +1,438 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.validation; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; +import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.MutableVariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; + +import java.util.Map; +import java.util.Set; + +import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel; + +/** + * Genotypes a dataset and validates the calls of another dataset using the Unified Genotyper. + * + * + * Genotype and Validate is a tool to evaluate the quality of a dataset for calling SNPs + * and Indels given a secondary (validation) data source. The data sources are BAM or VCF + * files. You can use them interchangeably (i.e. a BAM to validate calls in a VCF or a VCF + * to validate calls on a BAM). + *
+ * + *+ * The simplest scenario is when you have a VCF of hand annotated SNPs and Indels, and you + * want to know how well a particular technology performs calling these snps. With a + * dataset (BAM file) generated by the technology in test, and the hand annotated VCF, you + * can run GenotypeAndValidate to asses the accuracy of the calls with the new technology's + * dataset. + *
+ * + *+ * Another option is to validate the calls on a VCF file, using a deep coverage BAM file + * that you trust the calls on. The GenotypeAndValidate walker will make calls using the + * reads in the BAM file and take them as truth, then compare to the calls in the VCF file + * and produce a truth table. + *
+ * + * + *Input
+ *+ * A BAM file to make calls on and a VCF file to use as truth validation dataset. + * + * You also have the option to invert the roles of the files using the command line options listed below. + *
+ * + *Output
+ *+ * GenotypeAndValidate has two outputs. The truth table and the optional VCF file. The truth table is a + * 2x2 table correlating what was called in the dataset with the truth of the call (whether it's a true + * positive or a false positive). The table should look like this: + *
+ *+ * + * + *+ *
+ *+ * + *+ * ALT + *REF + *Predictive Value + *+ * + *called alt + *True Positive (TP) + *False Positive (FP) + *Positive PV + *+ * + *called ref + *False Negative (FN) + *True Negative (TN) + *Negative PV + *+ * The positive predictive value (PPV) is the proportion of subjects with positive test results + * who are correctly diagnosed. + *
+ *+ * The negative predictive value (NPV) is the proportion of subjects with a negative test result + * who are correctly diagnosed. + *
+ *+ * The VCF file will contain only the variants that were called or not called, excluding the ones that + * were uncovered or didn't pass the filters. This file is useful if you are trying to compare + * the PPV and NPV of two different technologies on the exact same sites (so you can compare apples to + * apples). + *
+ * + *+ * Here is an example of an annotated VCF file (info field clipped for clarity) + * + *
+ * #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 + * 1 20568807 . C T 0 HapMapHet AC=1;AF=0.50;AN=2;DP=0;GV=T GT 0/1 + * 1 22359922 . T C 282 WG-CG-HiSeq AC=2;AF=0.50;GV=T;AN=4;DP=42 GT:AD:DP:GL:GQ 1/0 ./. 0/1:20,22:39:-72.79,-11.75,-67.94:99 ./. + * 13 102391461 . G A 341 Indel;SnpCluster AC=1;GV=F;AF=0.50;AN=2;DP=45 GT:AD:DP:GL:GQ ./. ./. 0/1:32,13:45:-50.99,-13.56,-112.17:99 ./. + * 1 175516757 . C G 655 SnpCluster,WG AC=1;AF=0.50;AN=2;GV=F;DP=74 GT:AD:DP:GL:GQ ./. ./. 0/1:52,22:67:-89.02,-20.20,-191.27:99 ./. + *+ * + * + * + *Additional Details
+ *+ *
+ * + *- + * You should always use -BTI on your VCF track, so that the GATK only looks at the sites on the VCF file. + * This speeds up the process a lot. + *
+ *- + * The total number of visited bases may be greater than the number of variants in the original + * VCF file because of extended indels, as they trigger one call per new insertion or deletion. + * (i.e. ACTG/- will count as 4 genotyper calls, but it's only one line in the VCF). + *
+ *Examples
+ *+ *
- + * Genotypes BAM file from new technology using the VCF as a truth dataset: + *
+ * + *+ * java + * -jar /GenomeAnalysisTK.jar + * -T GenotypeAndValidate + * -R human_g1k_v37.fasta + * -I myNewTechReads.bam + * -alleles handAnnotatedVCF.vcf + * -BTI alleles + *+ * + *- + * Using a BAM file as the truth dataset: + *
+ * + *+ * java + * -jar /GenomeAnalysisTK.jar + * -T GenotypeAndValidate + * -R human_g1k_v37.fasta + * -I myTruthDataset.bam + * -alleles callsToValidate.vcf + * -BTI alleles + * -bt + * -o gav.vcf + *+ * + * + * @author Mauricio Carneiro + * @since ${DATE} + */ + +@Requires(value={DataSource.READS, DataSource.REFERENCE}) +@Allows(value={DataSource.READS, DataSource.REFERENCE}) + +@By(DataSource.REFERENCE) +@Reference(window=@Window(start=-200,stop=200)) + + +public class GenotypeAndValidateWalker extends RodWalkerimplements TreeReducible { + + @Output(doc="Generate a VCF file with the variants considered by the walker, with a new annotation \"callStatus\" which will carry the value called in the validation VCF or BAM file", required=false) + protected VCFWriter vcfWriter = null; + + @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype", required=true) + public RodBinding alleles; + + @Argument(fullName ="set_bam_truth", shortName ="bt", doc="Use the calls on the reads (bam file) as the truth dataset and validate the calls on the VCF", required=false) + private boolean bamIsTruth = false; + + @Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false) + private int mbq = -1; + + @Argument(fullName="maximum_deletion_fraction", shortName="deletions", doc="Maximum deletion fraction for calling a genotype", required=false) + private double deletions = -1; + + @Argument(fullName="standard_min_confidence_threshold_for_calling", shortName="stand_call_conf", doc="the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls", required=false) + private double callConf = -1; + + @Argument(fullName="standard_min_confidence_threshold_for_emitting", shortName="stand_emit_conf", doc="the minimum phred-scaled Qscore threshold to emit low confidence calls", required=false) + private double emitConf = -1; + + @Argument(fullName="condition_on_depth", shortName="depth", doc="Condition validation on a minimum depth of coverage by the reads", required=false) + private int minDepth = -1; + + @Argument(fullName ="sample", shortName ="sn", doc="Name of the sample to validate (in case your VCF/BAM has more than one sample)", required=false) + private String sample = ""; + + private UnifiedGenotyperEngine snpEngine; + private UnifiedGenotyperEngine indelEngine; + + public static class CountedData { + private long nAltCalledAlt = 0L; + private long nAltCalledRef = 0L; + private long nRefCalledAlt = 0L; + private long nRefCalledRef = 0L; + private long nNotConfidentCalls = 0L; + private long nUncovered = 0L; + + /** + * Adds the values of other to this, returning this + * @param other the other object + */ + public void add(CountedData other) { + nAltCalledAlt += other.nAltCalledAlt; + nAltCalledRef += other.nAltCalledRef; + nRefCalledAlt += other.nRefCalledAlt; + nRefCalledRef += other.nRefCalledRef; + nUncovered += other.nUncovered; + nNotConfidentCalls += other.nNotConfidentCalls; + } + } + + + + //--------------------------------------------------------------------------------------------------------------- + // + // initialize + // + //--------------------------------------------------------------------------------------------------------------- + + public void initialize() { + + // Initialize VCF header + if (vcfWriter != null) { + Map header = VCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName()); + Set samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + Set headerLines = VCFUtils.smartMergeHeaders(header.values(), logger); + headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate")); + vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); + } + + // Filling in SNP calling arguments for UG + UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); + uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; + uac.alleles = alleles; + if (!bamIsTruth) uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; + if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq; + if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions; + if (emitConf >= 0) uac.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf; + if (callConf >= 0) uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf; + + uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; + snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac); + + // Adding the INDEL calling arguments for UG + uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL; + indelEngine = new UnifiedGenotyperEngine(getToolkit(), uac); + + // make sure we have callConf set to the threshold set by the UAC so we can use it later. + callConf = uac.STANDARD_CONFIDENCE_FOR_CALLING; + } + + //--------------------------------------------------------------------------------------------------------------- + // + // map + // + //--------------------------------------------------------------------------------------------------------------- + + public CountedData map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { + + final CountedData counter = new CountedData(); + + // For some reason RodWalkers get map calls with null trackers + if( tracker == null ) + return counter; + + VariantContext vcComp = tracker.getFirstValue(alleles); + if( vcComp == null ) + return counter; + + //todo - not sure I want this, may be misleading to filter extended indel events. + if (isInsideExtendedIndel(vcComp, ref)) + return counter; + + // Do not operate on variants that are not covered to the optional minimum depth + if (!context.hasReads() || (minDepth > 0 && context.getBasePileup().getBases().length < minDepth)) { + counter.nUncovered = 1L; + return counter; + } + + VariantCallContext call; + if ( vcComp.isSNP() ) + call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context); + else if ( vcComp.isIndel() ) { + call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context); + } + else { + logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles()); + return counter; + } + + + boolean writeVariant = true; + + if (bamIsTruth) { + if (call.confidentlyCalled) { + // If truth is a confident REF call + if (call.isVariant()) { + if (vcComp.isVariant()) + counter.nAltCalledAlt = 1L; // todo -- may wanna check if the alts called are the same? + else + counter.nAltCalledRef = 1L; + } + // If truth is a confident ALT call + else { + if (vcComp.isVariant()) + counter.nRefCalledAlt = 1L; + else + counter.nRefCalledRef = 1L; + } + } + else { + counter.nNotConfidentCalls = 1L; + writeVariant = false; + } + } + else { + if (!vcComp.hasAttribute("GV")) + throw new UserException.BadInput("Variant has no GV annotation in the INFO field. " + vcComp.getChr() + ":" + vcComp.getStart()); + + + + if (call.isCalledAlt(callConf)) { + if (vcComp.getAttribute("GV").equals("T")) + counter.nAltCalledAlt = 1L; + else + counter.nRefCalledAlt = 1L; + } + else if (call.isCalledRef(callConf)) { + if (vcComp.getAttribute("GV").equals("T")) + counter.nAltCalledRef = 1L; + else + counter.nRefCalledRef = 1L; + } + else { + counter.nNotConfidentCalls = 1L; + writeVariant = false; + } + } + + if (vcfWriter != null && writeVariant) { + if (!vcComp.hasAttribute("callStatus")) { + MutableVariantContext mvc = new MutableVariantContext(vcComp); + mvc.putAttribute("callStatus", call.isCalledAlt(callConf) ? "ALT" : "REF" ); + vcfWriter.add(mvc); + } + else + vcfWriter.add(vcComp); + } + return counter; + } + + //--------------------------------------------------------------------------------------------------------------- + // + // reduce + // + //--------------------------------------------------------------------------------------------------------------- + + public CountedData reduceInit() { + return new CountedData(); + } + + public CountedData treeReduce( final CountedData sum1, final CountedData sum2) { + sum2.add(sum1); + return sum2; + } + + public CountedData reduce( final CountedData mapValue, final CountedData reduceSum ) { + reduceSum.add(mapValue); + return reduceSum; + } + + public void onTraversalDone( CountedData reduceSum ) { + double ppv = 100 * ((double) reduceSum.nAltCalledAlt /( reduceSum.nAltCalledAlt + reduceSum.nRefCalledAlt)); + double npv = 100 * ((double) reduceSum.nRefCalledRef /( reduceSum.nRefCalledRef + reduceSum.nAltCalledRef)); + double sensitivity = 100 * ((double) reduceSum.nAltCalledAlt /( reduceSum.nAltCalledAlt + reduceSum.nAltCalledRef)); + double specificity = (reduceSum.nRefCalledRef + reduceSum.nRefCalledAlt > 0) ? 100 * ((double) reduceSum.nRefCalledRef /( reduceSum.nRefCalledRef + reduceSum.nRefCalledAlt)) : 100; + logger.info(String.format("Resulting Truth Table Output\n\n" + + "---------------------------------------------------\n" + + "\t\t|\tALT\t|\tREF\t\n" + + "---------------------------------------------------\n" + + "called alt\t|\t%d\t|\t%d\n" + + "called ref\t|\t%d\t|\t%d\n" + + "---------------------------------------------------\n" + + "positive predictive value: %f%%\n" + + "negative predictive value: %f%%\n" + + "---------------------------------------------------\n" + + "sensitivity: %f%%\n" + + "specificity: %f%%\n" + + "---------------------------------------------------\n" + + "not confident: %d\n" + + "not covered: %d\n" + + "---------------------------------------------------\n", reduceSum.nAltCalledAlt, reduceSum.nRefCalledAlt, reduceSum.nAltCalledRef, reduceSum.nRefCalledRef, ppv, npv, sensitivity, specificity, reduceSum.nNotConfidentCalls, reduceSum.nUncovered)); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java index 7653f511f..61149e5d9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java @@ -14,9 +14,8 @@ import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.utils.codecs.table.TableFeature; import org.broadinstitute.sting.gatk.walkers.DataSource; -import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.BaseUtils; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index b52863c8f..f6b6a8d65 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -191,16 +191,28 @@ public class SelectVariants extends RodWalker { protected VCFWriter vcfWriter = null; @Argument(fullName="sample_name", shortName="sn", doc="Include genotypes from this sample. Can be specified multiple times", required=false) - public Set sampleNames; + public Set sampleNames = new HashSet (0); @Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false) - public Set sampleExpressions; + public Set sampleExpressions ; @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false) public Set sampleFiles; /** - * Note that thse expressions are evaluated *after* the specified samples are extracted and the INFO field annotations are updated. + * Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded. + */ + @Argument(fullName="exclude_sample_name", shortName="xl_sn", doc="Exclude genotypes from this sample. Can be specified multiple times", required=false) + public Set XLsampleNames = new HashSet (0); + + /** + * Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded. + */ + @Argument(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false) + public Set XLsampleFiles; + + /** + * Note that these expressions are evaluated *after* the specified samples are extracted and the INFO field annotations are updated. */ @Argument(shortName="select", doc="One or more criteria to use when selecting the data", required=false) public ArrayList SELECT_EXPRESSIONS = new ArrayList (); @@ -304,8 +316,7 @@ public class SelectVariants extends RodWalker { private ArrayList afBoosts = null; double bkDelta = 0.0; - - private PrintStream outMVFileStream = null; + private PrintStream outMVFileStream = null; /** @@ -321,19 +332,27 @@ public class SelectVariants extends RodWalker { Collection samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); Collection samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions); + // first, add any requested samples samples.addAll(samplesFromFile); samples.addAll(samplesFromExpressions); - if (sampleNames != null) - samples.addAll(sampleNames); + samples.addAll(sampleNames); - if(samples.isEmpty()) { + // if none were requested, we want all of them + if ( samples.isEmpty() ) { samples.addAll(vcfSamples); NO_SAMPLES_SPECIFIED = true; } - for (String sample : samples) { + // now, exclude any requested samples + Collection XLsamplesFromFile = SampleUtils.getSamplesFromFiles(XLsampleFiles); + samples.removeAll(XLsamplesFromFile); + samples.removeAll(XLsampleNames); + + if ( samples.size() == 0 ) + throw new UserException("All samples requested to be included were also requested to be excluded."); + + for ( String sample : samples ) logger.info("Including sample '" + sample + "'"); - } // Initialize VCF header Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 61851abe2..12e62a249 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; -import org.broadinstitute.sting.gatk.refdata.features.DbSNPHelper; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; @@ -120,7 +119,7 @@ public class VariantsToVCF extends RodWalker { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) return 0; - String rsID = dbsnp == null ? null : DbSNPHelper.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP); + String rsID = dbsnp == null ? null : VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP); Collection contexts = getVariantContexts(tracker, ref); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java index 5e536d4c1..e328c9286 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.refdata.features.beagle; +package org.broadinstitute.sting.utils.codecs.beagle; /* * Copyright (c) 2010 The Broad Institute * diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleFeature.java similarity index 97% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleFeature.java index e6832754d..0aa9ecba2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleFeature.java @@ -22,7 +22,7 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.features.beagle; +package org.broadinstitute.sting.utils.codecs.beagle; import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.variantcontext.Allele; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java index fef6c4ea0..c30da828e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/completegenomics/CGVarCodec.java @@ -117,7 +117,7 @@ public class CGVarCodec implements FeatureCodec { return new VariantContext("CGI", array[3], start, end, alleles, VariantContext.NO_NEG_LOG_10PERROR, null, attrs); } - public Class getFeatureType() { + public Class getFeatureType() { return VariantContext.class; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java index 90878dee7..535f607a1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java @@ -82,7 +82,7 @@ public class RawHapMapCodec implements FeatureCodec { headerLine); } - public Class getFeatureType() { + public Class getFeatureType() { return RawHapMapFeature.class; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java index cec40b5bd..391715c63 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java @@ -1,6 +1,5 @@ -package org.broadinstitute.sting.gatk.refdata.features.refseq; +package org.broadinstitute.sting.utils.codecs.refseq; -import org.apache.commons.io.filefilter.FalseFileFilter; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.readers.LineReader; @@ -104,7 +103,7 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec getFeatureType() { return RefSeqFeature.class; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqFeature.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqFeature.java index a38d45428..c04ca8592 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqFeature.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.refdata.features.refseq; +package org.broadinstitute.sting.utils.codecs.refseq; import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/Transcript.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/Transcript.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/Transcript.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/refseq/Transcript.java index d8bf12810..3e8a4fb34 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/Transcript.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/Transcript.java @@ -1,53 +1,53 @@ -package org.broadinstitute.sting.gatk.refdata.features.refseq; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.HasGenomeLocation; - -import java.util.List; - -/** - * Created by IntelliJ IDEA. - * User: asivache - * Date: Sep 22, 2009 - * Time: 5:22:30 PM - * To change this template use File | Settings | File Templates. - */ -public interface Transcript extends HasGenomeLocation { - - /** Returns id of the transcript (RefSeq NM_* id) */ - public String getTranscriptId(); - /** Returns coding strand of the transcript, 1 or -1 for positive or negative strand, respectively */ - public int getStrand(); - /** Returns transcript's full genomic interval (includes all exons with UTRs) */ - public GenomeLoc getLocation(); - /** Returns genomic interval of the coding sequence (does not include - * UTRs, but still includes introns, since it's a single interval on the DNA) - */ - public GenomeLoc getCodingLocation(); - /** Name of the gene this transcript corresponds to (typically NOT gene id such as Entrez etc, - * but the implementation can decide otherwise) - */ - public String getGeneName(); - /** Number of exons in this transcript */ - public int getNumExons(); - /** Genomic location of the n-th exon; expected to throw an exception (runtime) if n is out of bounds */ - public GenomeLoc getExonLocation(int n); - - /** Returns the list of all exons in this transcript, as genomic intervals */ - public List getExons(); - - /** Returns true if the specified interval 'that' overlaps with the full genomic interval of this transcript */ - public boolean overlapsP (GenomeLoc that); - - /** Returns true if the specified interval 'that' overlaps with the coding genomic interval of this transcript. - * NOTE: since "coding interval" is still a single genomic interval, it will not contain UTRs of the outermost exons, - * but it will still contain introns and/or exons internal to this genomic locus that are not spliced into this transcript. - * @see #overlapsExonP - */ - public boolean overlapsCodingP (GenomeLoc that); - - /** Returns true if the specified interval 'that' overlaps with any of the exons actually spliced into this transcript */ - public boolean overlapsExonP (GenomeLoc that); - - -} +package org.broadinstitute.sting.utils.codecs.refseq; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.HasGenomeLocation; + +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: asivache + * Date: Sep 22, 2009 + * Time: 5:22:30 PM + * To change this template use File | Settings | File Templates. + */ +public interface Transcript extends HasGenomeLocation { + + /** Returns id of the transcript (RefSeq NM_* id) */ + public String getTranscriptId(); + /** Returns coding strand of the transcript, 1 or -1 for positive or negative strand, respectively */ + public int getStrand(); + /** Returns transcript's full genomic interval (includes all exons with UTRs) */ + public GenomeLoc getLocation(); + /** Returns genomic interval of the coding sequence (does not include + * UTRs, but still includes introns, since it's a single interval on the DNA) + */ + public GenomeLoc getCodingLocation(); + /** Name of the gene this transcript corresponds to (typically NOT gene id such as Entrez etc, + * but the implementation can decide otherwise) + */ + public String getGeneName(); + /** Number of exons in this transcript */ + public int getNumExons(); + /** Genomic location of the n-th exon; expected to throw an exception (runtime) if n is out of bounds */ + public GenomeLoc getExonLocation(int n); + + /** Returns the list of all exons in this transcript, as genomic intervals */ + public List getExons(); + + /** Returns true if the specified interval 'that' overlaps with the full genomic interval of this transcript */ + public boolean overlapsP (GenomeLoc that); + + /** Returns true if the specified interval 'that' overlaps with the coding genomic interval of this transcript. + * NOTE: since "coding interval" is still a single genomic interval, it will not contain UTRs of the outermost exons, + * but it will still contain introns and/or exons internal to this genomic locus that are not spliced into this transcript. + * @see #overlapsExonP + */ + public boolean overlapsCodingP (GenomeLoc that); + + /** Returns true if the specified interval 'that' overlaps with any of the exons actually spliced into this transcript */ + public boolean overlapsExonP (GenomeLoc that); + + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java index 43e2c3ff5..f4048d37d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.features.sampileup; +package org.broadinstitute.sting.utils.codecs.sampileup; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; @@ -35,7 +35,7 @@ import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; -import static org.broadinstitute.sting.gatk.refdata.features.sampileup.SAMPileupFeature.VariantType; +import static org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature.VariantType; /** * A Tribble encoder / decoder for SAM pileup data. diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupFeature.java similarity index 99% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupFeature.java index 378f26934..eb33243e3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/SAMPileupFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupFeature.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.features.sampileup; +package org.broadinstitute.sting.utils.codecs.sampileup; import net.sf.samtools.util.StringUtil; import org.broad.tribble.Feature; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java index 039b8adde..f6861e585 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.features.samread; +package org.broadinstitute.sting.utils.codecs.samread; import net.sf.samtools.Cigar; import net.sf.samtools.TextCigarCodec; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadFeature.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadFeature.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadFeature.java index 7f12b2b2f..fc1bf89af 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/samread/SAMReadFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadFeature.java @@ -22,7 +22,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.gatk.refdata.features.samread; +package org.broadinstitute.sting.utils.codecs.samread; import org.broad.tribble.Feature; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java index eada8521f..b5efb49a7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -222,7 +222,7 @@ public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec { return null; } - public Class getFeatureType() { + public Class getFeatureType() { return SnpEffFeature.class; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java index e169dbdfc..284c43e90 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/soapsnp/SoapSNPCodec.java @@ -178,7 +178,7 @@ public class SoapSNPCodec implements FeatureCodec, NameAwareCodec { /** * @return VariantContext */ - public Class getFeatureType() { + public Class getFeatureType() { return VariantContext.class; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java index 745ccdd9f..6fe1907e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/BedTableCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.refdata.features.table; +package org.broadinstitute.sting.utils.codecs.table; import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java index ab1ac59d8..2ce7c679e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.refdata.features.table; +package org.broadinstitute.sting.utils.codecs.table; import org.broad.tribble.Feature; import org.broad.tribble.readers.LineReader; @@ -51,7 +51,7 @@ public class TableCodec implements ReferenceDependentFeatureCodec { } @Override - public Class getFeatureType() { + public Class getFeatureType() { return TableFeature.class; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java similarity index 96% rename from public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java rename to public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java index ca73ee960..a85849f0b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/features/table/TableFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableFeature.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.gatk.refdata.features.table; +package org.broadinstitute.sting.utils.codecs.table; import org.broad.tribble.Feature; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index cb505c717..19f58ddaa 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -263,7 +263,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, * * @return the type of record */ - public Class getFeatureType() { + public Class getFeatureType() { return VariantContext.class; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index f43891e77..c0a04c81f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -180,4 +180,19 @@ public class VCFUtils { return new HashSet (map.values()); } + + public static String rsIDOfFirstRealVariant(List VCs, VariantContext.Type type) { + if ( VCs == null ) + return null; + + String rsID = null; + for ( VariantContext vc : VCs ) { + if ( vc.getType() == type ) { + rsID = vc.getID(); + break; + } + } + + return rsID; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java index ce03c8093..214a1716a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java @@ -71,7 +71,7 @@ public abstract class DocumentedGATKFeatureHandler { * @return */ public String getDestinationFilename(ClassDoc doc, Class clazz) { - return GATKDocUtils.htmlFilenameForClass(clazz); + return HelpUtils.htmlFilenameForClass(clazz); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java deleted file mode 100644 index 8efeecd7b..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.help; - -/** - * @author depristo - * @since 8/8/11 - */ -public class GATKDocUtils { - private final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/"; - private final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/"; - private final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/"; - - public static String htmlFilenameForClass(Class c) { - return c.getName().replace(".", "_") + ".html"; - } - - public static String helpLinksToGATKDocs(Class c) { - String classPath = htmlFilenameForClass(c); - StringBuilder b = new StringBuilder(); - b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n"); - b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n"); - b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n"); - return b.toString(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java index 4527c6afe..da4e7bdaf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java @@ -32,6 +32,10 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils; import java.lang.reflect.Field; public class HelpUtils { + public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/"; + public final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/"; + public final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/"; + protected static boolean implementsInterface(ProgramElementDoc classDoc, Class... interfaceClasses) { for (Class interfaceClass : interfaceClasses) if (assignableToClass(classDoc, interfaceClass, false)) @@ -74,4 +78,17 @@ public class HelpUtils { String.format("%s.%s", containingPackage.name(), doc.name()) : String.format("%s", doc.name()); } + + public static String htmlFilenameForClass(Class c) { + return c.getName().replace(".", "_") + ".html"; + } + + public static String helpLinksToGATKDocs(Class c) { + String classPath = htmlFilenameForClass(c); + StringBuilder b = new StringBuilder(); + b.append("release version: ").append(URL_ROOT_FOR_RELEASE_GATKDOCS).append(classPath).append("\n"); + b.append("stable version: ").append(URL_ROOT_FOR_STABLE_GATKDOCS).append(classPath).append("\n"); + b.append("unstable version: ").append(URL_ROOT_FOR_UNSTABLE_GATKDOCS).append(classPath).append("\n"); + return b.toString(); + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index ca3399c78..dacc2d94a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1080,8 +1080,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati } public void validateReferenceBases(Allele reference, Byte paddedRefBase) { - // don't validate if we're an insertion - if ( !reference.isNull() && !reference.basesMatch(getReference()) ) { + // don't validate if we're an insertion or complex event + if ( !reference.isNull() && getReference().length() == 1 && !reference.basesMatch(getReference()) ) { throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); } diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index 79e9172dd..013a37a88 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.commandline; import org.broad.tribble.Feature; -import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.testng.Assert; @@ -35,7 +34,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import javax.script.Bindings; import java.util.List; import java.util.EnumSet; /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java index f782580e2..85ae1e1f7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReferenceOrderedViewUnitTest.java @@ -10,7 +10,7 @@ import org.testng.Assert; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.utils.codecs.table.TableFeature; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java index bd4f93d24..d45f6e667 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/rmd/ReferenceOrderedDataPoolUnitTest.java @@ -4,7 +4,7 @@ import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.testng.Assert; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.utils.codecs.table.TableFeature; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java index fbd30bc8a..1e39fd26f 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.utils.codecs.table.TableFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java index 5d662ffed..90262b9c1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManagerUnitTest.java @@ -29,8 +29,8 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.refdata.features.table.BedTableCodec; -import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature; +import org.broadinstitute.sting.utils.codecs.table.BedTableCodec; +import org.broadinstitute.sting.utils.codecs.table.TableFeature; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index bec0d5dd4..cec377f5f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -7,7 +7,7 @@ import java.util.Arrays; public class SelectVariantsIntegrationTest extends WalkerTest { public static String baseTestString(String args) { - return "-T SelectVariants -R " + b36KGReference + " -L 1 -o %s" + args; + return "-T SelectVariants -R " + b36KGReference + " -L 1 -o %s -NO_HEADER" + args; } @Test @@ -16,7 +16,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant:VCF3 " + testfile + " -NO_HEADER"), + baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant:VCF3 " + testfile), 1, Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") ); @@ -24,12 +24,26 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testComplexSelection--" + testfile, spec); } + @Test + public void testSampleExclusion() { + String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; + String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s -NO_HEADER -xl_sn A -xl_sf " + samplesFile + " --variant:VCF3 " + testfile, + 1, + Arrays.asList("730f021fd6ecf1d195dabbee2e233bfd") + ); + + executeTest("testSampleExclusion--" + testfile, spec); + } + @Test public void testRepeatedLineSelection() { String testfile = validationDataLocation + "test.dup.vcf"; WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -sn A -sn B -sn C --variant:VCF3 " + testfile + " -NO_HEADER"), + baseTestString(" -sn A -sn B -sn C --variant:VCF3 " + testfile), 1, Arrays.asList("b74038779fe6485dbb8734ae48178356") ); diff --git a/settings/helpTemplates/style.css b/settings/helpTemplates/style.css index 1d7bcc576..2abcf0397 100644 --- a/settings/helpTemplates/style.css +++ b/settings/helpTemplates/style.css @@ -95,6 +95,12 @@ dd { padding: 0 0 0.5em 0; } +pre { + border: thin solid lightgray; + margin-left: 1em; + margin-right: 4em; + background-color: #e0fdff; +} /* * clean table layouts */ @@ -128,6 +134,48 @@ dd { } th#row-divider +{ + font-weight: bolder; + font-size: larger; +} + + +/* + * Table design for input/ouptut description + */ + +#description-table +{ + font-family: "Lucida Sans Unicode", "Lucida Grande", Sans-Serif; + font-size: 12px; + background: #fff; + margin: 5px; + border-collapse: collapse; + text-align: left; +} +#description-table th +{ + font-size: 16px; + font-weight: bold; + background-color: lightgray; + color: #039; + text-align: center; + padding: 10px 8px; + border-bottom: 2px solid #6678b1; +} +#description-table td +{ + border-bottom: 1px solid #ccc; + color: #669; + padding: 6px 8px; + text-align: right; +} +#description-table tbody tr:hover td +{ + color: #009; +} + +th#row-divider { font-weight: bolder; font-size: larger;