From 198955f7522e8e11dd73dae719a68fa1e05fc4ec Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Aug 2011 09:57:21 -0400 Subject: [PATCH 01/11] GATKDoc descriptions for all standard codecs, or TODO for their owners -- Also added vcf.gz support in the VCF codec. This wasn't committed in the last round, because it was missed by the parallel documentation effort. --- build.xml | 2 +- .../utils/codecs/beagle/BeagleCodec.java | 23 +++++++++++ .../utils/codecs/hapmap/RawHapMapCodec.java | 41 ++++++++++++++++--- .../utils/codecs/refseq/RefSeqCodec.java | 18 +++++++- .../codecs/sampileup/SAMPileupCodec.java | 39 ++++++++++++++++-- .../utils/codecs/samread/SAMReadCodec.java | 17 +++++++- .../utils/codecs/snpEff/SnpEffCodec.java | 11 ++++- .../utils/codecs/table/BedTableCodec.java | 21 ++++++---- .../sting/utils/codecs/table/TableCodec.java | 37 ++++++++++++++--- .../utils/codecs/vcf/AbstractVCFCodec.java | 21 +++++++--- .../sting/utils/codecs/vcf/VCF3Codec.java | 16 +++++++- .../sting/utils/codecs/vcf/VCFCodec.java | 40 ++++++++++++++++-- 12 files changed, 249 insertions(+), 37 deletions(-) diff --git a/build.xml b/build.xml index 85955d774..d8c38738a 100644 --- a/build.xml +++ b/build.xml @@ -49,7 +49,7 @@ - + diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java index e328c9286..413848543 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java @@ -40,6 +40,29 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.regex.Pattern; +/** + * TODO GUILLERMO DEL ANGEL + * + *

+ * Codec Description + *

+ * + *

+ * See also: @see VCF specification
+ *

+ + *

+ * + *

File format example

+ *
+ *     line 1
+ *     line 2
+ *     line 3
+ * 
+ * + * @author Mark DePristo + * @since 2010 + */ public class BeagleCodec implements ReferenceDependentFeatureCodec { private String[] header; public enum BeagleReaderType {PROBLIKELIHOOD, GENOTYPES, R2}; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java index 535f607a1..a80e05d59 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/hapmap/RawHapMapCodec.java @@ -33,12 +33,43 @@ import java.io.IOException; import java.util.Arrays; /** - * a codec for the file types produced by the HapMap consortium, available on their website: - * http://hapmap.ncbi.nlm.nih.gov/downloads/genotypes/ + * A codec for the file types produced by the HapMap consortium * - * The format includes eleven standard fields, plus genotypes for each of the samples included - * in the file - * + *

+ * The format includes eleven standard fields, plus genotypes for each of the samples included + * in the file: + * + *

+ *     Col1: refSNP rs# identifier at the time of release (NB might merge with another rs# in the future)
+ *     Col2: SNP alleles according to dbSNP
+ *     Col3: chromosome that SNP maps to
+ *     Col4: chromosome position of SNP, in basepairs on reference sequence
+ *     Col5: strand of reference sequence that SNP maps to
+ *     Col6: version of reference sequence assembly
+ *     Col7: HapMap genotype center that produced the genotypes
+ *     Col8: LSID for HapMap protocol used for genotyping
+ *     Col9: LSID for HapMap assay used for genotyping
+ *     Col10: LSID for panel of individuals genotyped
+ *     Col11: QC-code, currently 'QC+' for all entries (for future use)
+ *     Col12 and on: observed genotypes of samples, one per column, sample identifiers in column headers (Coriell catalog numbers, example: NA10847). Duplicate samples have .dup suffix.
+ * 
+ *

+ * + *

+ * See also: @See HapMap genotypes download + *

+ * + *

File format example

+ * From genotypes_chr1_ASW_r27_nr.b36_fwd.txt.gz: + *
+ *     rs# alleles chrom pos strand assembly# center protLSID assayLSID panelLSID QCcode NA19625 NA19700 NA19701 NA19702 NA19703 NA19704 NA19705 NA19708 NA19712 NA19711 NA19818 NA19819 NA19828 NA19835 NA19834 NA19836 NA19902 NA19901 NA19900 NA19904 NA19919 NA19908 NA19909 NA19914 NA19915 NA19916 NA19917 NA19918 NA19921 NA20129 NA19713 NA19982 NA19983 NA19714 NA19985 NA20128 NA20126 NA20127 NA20277 NA20276 NA20279 NA20282 NA20281 NA20284 NA20287 NA20288 NA20290 NA20289 NA20291 NA20292 NA20295 NA20294 NA20297 NA20300 NA20301 NA20302 NA20317 NA20319 NA20322 NA20333 NA20332 NA20335 NA20334 NA20337 NA20336 NA20340 NA20341 NA20343 NA20342 NA20344 NA20345 NA20346 NA20347 NA20348 NA20349 NA20350 NA20357 NA20356 NA20358 NA20359 NA20360 NA20363 NA20364
+ *     rs9629043 C/T chr1 554636 + ncbi_b36 broad urn:LSID:affymetrix.hapmap.org:Protocol:GenomeWideSNP_6.0:3 urn:LSID:broad.hapmap.org:Assay:SNP_A-8575115:3 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:3 QC+ CC CC CC CC CC CC CC CC CC CC CC CC NN CC CC CC CT CT CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CT CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC CC
+ *     rs28446478 G/T chr1 576058 + ncbi_b36 sanger urn:LSID:illumina.hapmap.org:Protocol:Human_1M_BeadChip:3 urn:LSID:sanger.hapmap.org:Assay:H1Mrs28446478:3 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:3 QC+ GT TT GT TT TT TT TT GT GT TT TT TT TT GT GT GT GT TT GT TT GT GT TT GT GT TT TT TT GT GT TT TT TT GT TT GT TT GT GT GT GT GT TT GT TT TT GT GT TT TT TT TT TT TT GT GT GT GT TT TT TT TT GT TT GT TT TT GT TT TT TT GT TT TT TT GT GT TT GT TT GT TT TT
+ *     rs12565286 C/G chr1 711153 + ncbi_b36 broad urn:LSID:affymetrix.hapmap.org:Protocol:GenomeWideSNP_6.0:3 urn:LSID:broad.hapmap.org:Assay:SNP_A-8709646:3 urn:lsid:dcc.hapmap.org:Panel:US_African-30-trios:3 QC+ GG GG GG GG GG GG GG GG CG GG GG GG GG GG GG GG GG GG GG CG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG GG CG GG GG GG GG GG GG GG CG CG GG GG GG GG GG GG GG GG GG CG CG GG GG GG GG GG GG GG GG GG GG CG NN GG GG GG GG GG GG NN GG NN NN
+ * 
+ * + * @author Mark DePristo + * @since 2010 */ public class RawHapMapCodec implements FeatureCodec { // the minimum number of features in the HapMap file line diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java index 391715c63..d94d9ff84 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/refseq/RefSeqCodec.java @@ -12,7 +12,23 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.ArrayList; /** - * the ref seq codec + * TODO FOR CHRIS HARTL + * + *

+ * Codec Description + *

+ * + *

+ * See also: link to file specification + *

+ * + *

File format example

+ *

+ * A BAM file containing exactly one sample. + *

+ * + * @author Mark DePristo + * @since 2010 */ public class RefSeqCodec implements ReferenceDependentFeatureCodec { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java index f4048d37d..f4633b2ce 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/sampileup/SAMPileupCodec.java @@ -38,10 +38,43 @@ import java.util.regex.Pattern; import static org.broadinstitute.sting.utils.codecs.sampileup.SAMPileupFeature.VariantType; /** - * A Tribble encoder / decoder for SAM pileup data. + * Decoder for SAM pileup data. For GATK validation purposes only * - * @author mhanna - * @version 0.1 + *

+ * Pileup format is first used by Tony Cox and Zemin Ning at the Sanger Institute. + * It desribes the base-pair information at each chromosomal position. This format + * facilitates SNP/indel calling and brief alignment viewing by eyes. + *

+ *

+ * Each line consists of chromosome, 1-based coordinate, reference base, the + * number of reads covering the site, read bases and base qualities. At the + * read base column, a dot stands for a match to the reference base on the + * forward strand, a comma for a match on the reverse strand, `ACGTN' for a mismatch + * on the forward strand and `acgtn' for a mismatch on the reverse strand. + * A pattern `\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between + * this reference position and the next reference position. The length of the + * insertion is given by the integer in the pattern, followed by the inserted sequence. + *

+ * + *

+ *
See also: @see SAMTools project
+ *
See also: @see Pileup format
+ *

+ * + *

File format example

+ *
+ *     seq1 272 T 24  ,.$.....,,.,.,...,,,.,..^+. <<<+;<<<<<<<<<<<=<;<;7<&
+ *     seq1 273 T 23  ,.....,,.,.,...,,,.,..A <<<;<<<<<<<<<3<=<<<;<<+
+ *     seq1 274 T 23  ,.$....,,.,.,...,,,.,...    7<7;<;<<<<<<<<<=<;<;<<6
+ *     seq1 275 A 23  ,$....,,.,.,...,,,.,...^l.  <+;9*<<<<<<<<<=<<:;<<<<
+ *     seq1 276 G 22  ...T,,.,.,...,,,.,....  33;+<<7=7<<7<&<<1;<<6<
+ *     seq1 277 T 22  ....,,.,.,.C.,,,.,..G.  +7<;<<<<<<<&<=<<:;<<&<
+ *     seq1 278 G 23  ....,,.,.,...,,,.,....^k.   %38*<<;<7<<7<=<<<;<<<<<
+ *     seq1 279 C 23  A..T,,.,.,...,,,.,..... ;75&<<<<<<<<<=<<<9<<:<<
+ * 
+ * + * @author Matt Hanna + * @since 2009 */ public class SAMPileupCodec implements FeatureCodec { // the number of tokens we expect to parse from a pileup line diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java index f6861e585..d4bdb5aa9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/samread/SAMReadCodec.java @@ -36,8 +36,21 @@ import org.broad.tribble.util.ParsingUtils; /** * Decodes a simple SAM text string. * - * @author mhanna - * @version 0.1 + *

+ * Reads in the SAM text version of a BAM file as a ROD. For testing only + *

+ * + *

+ * See also: @see SAMTools for format specification + *

+ * + *

File format example

+ *
+ *     SL-XBC:1:10:628:923#0	16	Escherichia_coli_K12	1	37	76M	=	1	0	AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA	B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB
+ * 
+ * + * @author Matt Hanna + * @since 2009 */ public class SAMReadCodec implements FeatureCodec { /* SL-XBC:1:10:628:923#0 16 Escherichia_coli_K12 1 37 76M = 1 0 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGCTTCTGA B@>87<;A@?@957:>>@AA@B>@A9AB@B>@A@@@@@A;=AAB@BBBBBCBBBB@>A>:ABB@BAABCB=CA@CB */ diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java index b5efb49a7..7f3d9e17d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -41,10 +41,11 @@ import java.io.IOException; /** * Codec for decoding the output format of the SnpEff variant effect predictor tool - * (http://snpeff.sourceforge.net/). * + *

* This format has 23 tab-delimited fields: * + *

  * Chromosome
  * Position
  * Reference
@@ -68,10 +69,16 @@ import java.io.IOException;
  * Codons Around
  * Amino Acids Around
  * Custom Interval ID
+ * 
+ * Note that we treat all except the Chromosome, Position, and Effect fields as optional. + *

* - * We treat all except the Chromosome, Position, and Effect fields as optional. + *

+ * See also: @see SNPEff project page + *

* * @author David Roazen + * @since 2011 */ public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java index 6fe1907e3..fdcc8ed10 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/BedTableCodec.java @@ -6,14 +6,19 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import java.util.Arrays; /** - * Created by IntelliJ IDEA. - * User: chartl - * Date: 3/28/11 - * Time: 2:47 PM - * To change this template use File | Settings | File Templates. - */ -/** - * The standard table codec with a slightly different parsing convention (expects loci as contig start stop, not contig:start-stop) + * The standard table codec that expects loci as contig start stop, not contig:start-stop + * + *

+ * The standard table codec with a slightly different parsing convention + * (expects loci as contig start stop, not contig:start-stop) + *

+ * + *

+ * See also: TableCodec + *

+ * + * @author Chris Hartl + * @since 2010 */ public class BedTableCodec extends TableCodec implements ReferenceDependentFeatureCodec { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java index 2ce7c679e..1919ccbf0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/table/TableCodec.java @@ -11,13 +11,40 @@ import java.util.ArrayList; import java.util.Arrays; /** - * implementation of a simple table (tab or comma delimited format) input files + * Reads tab deliminated tabular text files + * + *

+ *

    + *
  • Header: must begin with line HEADER or track (for IGV), followed by any number of column names, + * separated by whitespace.
  • + *
  • Comment lines starting with # are ignored
  • + *
  • Each non-header and non-comment line is split into parts by whitespace, + * and these parts are assigned as a map to their corresponding column name in the header. + * Note that the first element (corresponding to the HEADER column) must be a valid genome loc + * such as 1, 1:1 or 1:1-10, which is the position of the Table element on the genome. TableCodec + * requires that there be one value for each column in the header, and no more, on all lines.
  • + *
+ *

+ * + *

+ * + *

File format example

+ *
+ *     HEADER a b c
+ *     1:1  1   2   3
+ *     1:2  4   5   6
+ *     1:3  7   8   9
+ * 
+ * + * @author Mark DePristo + * @since 2009 */ public class TableCodec implements ReferenceDependentFeatureCodec { - protected String delimiterRegex = "\\s+"; - protected String headerDelimiter = "HEADER"; - protected String igvHeaderDelimiter = "track"; - protected String commentDelimiter = "#"; + final static protected String delimiterRegex = "\\s+"; + final static protected String headerDelimiter = "HEADER"; + final static protected String igvHeaderDelimiter = "track"; + final static protected String commentDelimiter = "#"; + protected ArrayList header = new ArrayList(); /** diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 19f58ddaa..46242c302 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -14,10 +14,9 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; +import java.io.*; import java.util.*; +import java.util.zip.GZIPInputStream; public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, VCFParser, SelfScopingFeatureCodec { @@ -623,9 +622,21 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) { try { - char[] buff = new char[MAGIC_HEADER_LINE.length()]; - new FileReader(potentialInput).read(buff, 0, MAGIC_HEADER_LINE.length()); + return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) || + isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE); + } catch ( FileNotFoundException e ) { + return false; + } catch ( IOException e ) { + return false; + } + } + + private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) { + try { + byte[] buff = new byte[MAGIC_HEADER_LINE.length()]; + stream.read(buff, 0, MAGIC_HEADER_LINE.length()); String firstLine = new String(buff); + stream.close(); return firstLine.startsWith(MAGIC_HEADER_LINE); } catch ( IOException e ) { return false; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java index ea16595bb..e5b1a2de5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java @@ -14,8 +14,20 @@ import java.util.*; /** - * a feature codec for the VCF 3 specification. Our aim is to read in the records and convert to VariantContext as - * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. + * A feature codec for the VCF3 specification, to read older VCF files. VCF3 has been + * depreciated in favor of VCF4 (See VCF codec for the latest information) + * + *

+ * Reads historical VCF3 encoded files (1000 Genomes Pilot results, for example) + *

+ * + *

+ * See also: @see VCF specification
+ * See also: @see VCF spec. publication + *

+ * + * @author Mark DePristo + * @since 2010 */ public class VCF3Codec extends AbstractVCFCodec { public final static String VCF3_MAGIC_HEADER = "##fileformat=VCFv3"; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index 55a0eb3f9..fa030ef5f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -12,12 +12,46 @@ import java.io.FileReader; import java.io.IOException; import java.util.*; - /** - * a feature codec for the VCF 4 specification. Our aim is to read in the records and convert to VariantContext as - * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. + * A feature codec for the VCF 4 specification + * + *

+ * VCF is a text file format (most likely stored in a compressed manner). It contains meta-information lines, a + * header line, and then data lines each containing information about a position in the genome. + *

+ *

One of the main uses of next-generation sequencing is to discover variation amongst large populations + * of related samples. Recently the format for storing next-generation read alignments has been + * standardised by the SAM/BAM file format specification. This has significantly improved the + * interoperability of next-generation tools for alignment, visualisation, and variant calling. + * We propose the Variant Call Format (VCF) as a standarised format for storing the most prevalent + * types of sequence variation, including SNPs, indels and larger structural variants, together + * with rich annotations. VCF is usually stored in a compressed manner and can be indexed for + * fast data retrieval of variants from a range of positions on the reference genome. + * The format was developed for the 1000 Genomes Project, and has also been adopted by other projects + * such as UK10K, dbSNP, or the NHLBI Exome Project. VCFtools is a software suite that implements + * various utilities for processing VCF files, including validation, merging and comparing, + * and also provides a general Perl and Python API. + * The VCF specification and VCFtools are available from http://vcftools.sourceforge.net.

+ * + *

+ * See also: @see VCF specification
+ * See also: @see VCF spec. publication + *

+ * + *

File format example

+ *
+ *     ##fileformat=VCFv4.0
+ *     #CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  NA12878
+ *     chr1    109     .       A       T       0       PASS  AC=1    GT:AD:DP:GL:GQ  0/1:610,327:308:-316.30,-95.47,-803.03:99
+ *     chr1    147     .       C       A       0       PASS  AC=1    GT:AD:DP:GL:GQ  0/1:294,49:118:-57.87,-34.96,-338.46:99
+ * 
+ * + * @author Mark DePristo + * @since 2010 */ public class VCFCodec extends AbstractVCFCodec { + // Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. + public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4"; /** From 0f25167efd3b765c8a40a8f6b90777e5a2eb4874 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Fri, 19 Aug 2011 11:01:04 -0400 Subject: [PATCH 02/11] minor fix in VariantEval docs --- .../sting/gatk/walkers/varianteval/VariantEvalWalker.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index d1fa3f4df..f6d42afb1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -95,7 +95,7 @@ public class VariantEvalWalker extends RodWalker implements Tr protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); // Help arguments - @Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit") + @Argument(fullName="list", shortName="ls", doc="List the available eval modules and exit", required=false) protected Boolean LIST = false; // Partitioning the data arguments From 4d1fd17a97aa49e48c6163b84ff3ff15725bea66 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Aug 2011 13:13:41 -0400 Subject: [PATCH 03/11] GATKDoclet cleanup and documentation -- Fixed bug in the way ArgumentCollections were handled that lead to failure in handling the dbsnp argument collection. --- .../sting/utils/help/GATKDoclet.java | 26 +- .../help/GenericDocumentationHandler.java | 263 +++++++++++------- 2 files changed, 189 insertions(+), 100 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index 5755d2b37..de6ad359e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -34,7 +34,10 @@ import org.apache.commons.io.FileUtils; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.broad.tribble.FeatureCodec; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.gatk.walkers.qc.DocumentationTest; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.io.*; import java.util.*; @@ -48,6 +51,7 @@ public class GATKDoclet { final protected static Logger logger = Logger.getLogger(GATKDoclet.class); protected static String buildTimestamp = null, absoluteVersion = null; protected static boolean showHiddenFeatures = false; + protected static boolean testOnly = false; RootDoc rootDoc; @@ -75,6 +79,8 @@ public class GATKDoclet { absoluteVersion = options[1]; if (options[0].equals("-include-hidden")) showHiddenFeatures = true; + if (options[0].equals("-test")) + testOnly = true; } GATKDoclet doclet = new GATKDoclet(); @@ -88,16 +94,26 @@ public class GATKDoclet { * @return Number of potential parameters; 0 if not supported. */ public static int optionLength(String option) { - if(option.equals("-build-timestamp") || option.equals("-absolute-version") || option.equals("-include-hidden")) { + if(option.equals("-build-timestamp") || + option.equals("-absolute-version") || + option.equals("-include-hidden")) { return 2; - } - return 0; + } else if ( option.equals("-test") ) + return 1; + else + return 0; } public boolean showHiddenFeatures() { return showHiddenFeatures; } + public static boolean testOnly() { + return testOnly; + } + + private static final List> testOnlyKeepers = Arrays.asList( + DocumentationTest.class, CommandLineGATK.class, UserException.class); public Set workUnits() { TreeSet m = new TreeSet(); @@ -105,6 +121,10 @@ public class GATKDoclet { //logger.debug("Considering " + doc); Class clazz = getClassForClassDoc(doc); + // don't add anything that's not DocumentationTest if we are in test mode + if ( clazz != null && testOnly && ! testOnlyKeepers.contains(clazz) ) + continue; + //if ( clazz != null && clazz.getName().equals("org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance")) // logger.debug("foo"); diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index d7add9af0..08e430c8a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.utils.help; +import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import com.sun.javadoc.ClassDoc; import com.sun.javadoc.FieldDoc; @@ -31,8 +32,10 @@ import com.sun.javadoc.RootDoc; import com.sun.javadoc.Tag; import org.apache.log4j.Logger; import org.broad.tribble.Feature; +import org.broad.tribble.bed.FullBEDFeature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -49,14 +52,18 @@ import java.util.*; */ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { private static Logger logger = Logger.getLogger(GenericDocumentationHandler.class); - GATKDocWorkUnit toProcess; - ClassDoc classdoc; - Set all; - RootDoc rootDoc; + + /** The Class we are documenting */ + private GATKDocWorkUnit toProcess; + + /** The set of all classes we are documenting, for cross-referencing */ + private Set all; + + /** The JavaDoc root */ + private RootDoc rootDoc; @Override public boolean includeInDocs(ClassDoc doc) { -// return true; try { Class type = HelpUtils.getClassForDoc(doc); return JVMUtils.isConcrete(type); @@ -76,7 +83,6 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { this.rootDoc = rootDoc; this.toProcess = toProcessArg; this.all = allArg; - this.classdoc = toProcess.classDoc; //System.out.printf("%s class %s%n", toProcess.group, toProcess.classDoc); Map root = new HashMap(); @@ -88,71 +94,76 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { toProcess.setHandlerContent((String)root.get("summary"), root); } + /** + * Add high-level summary information about toProcess to root, such as its + * name, summary, description, version, etc. + * + * @param root + */ protected void addHighLevelBindings(Map root) { - root.put("name", classdoc.name()); + root.put("name", toProcess.classDoc.name()); // Extract overrides from the doc tags. StringBuilder summaryBuilder = new StringBuilder(); - for(Tag tag: classdoc.firstSentenceTags()) + for(Tag tag: toProcess.classDoc.firstSentenceTags()) summaryBuilder.append(tag.text()); root.put("summary", summaryBuilder.toString()); - root.put("description", classdoc.commentText().substring(summaryBuilder.toString().length())); + root.put("description", toProcess.classDoc.commentText().substring(summaryBuilder.toString().length())); root.put("timestamp", toProcess.buildTimestamp); root.put("version", toProcess.absoluteVersion); - for(Tag tag: classdoc.tags()) { + for(Tag tag: toProcess.classDoc.tags()) { root.put(tag.name(), tag.text()); } } + /** + * Add bindings describing related GATK capabilites to toProcess + * @param root + */ + protected void addRelatedBindings(Map root) { + List> extraDocsData = new ArrayList>(); + + // add in all of the explicitly related items + for ( final Class extraDocClass : toProcess.annotation.extraDocs() ) { + final GATKDocWorkUnit otherUnit = GATKDoclet.findWorkUnitForClass(extraDocClass, all); + if ( otherUnit == null ) + throw new ReviewedStingException("Requested extraDocs for class without any documentation: " + extraDocClass); + extraDocsData.add( + new HashMap(){{ + put("filename", otherUnit.filename); + put("name", otherUnit.name);}}); + + } + root.put("extradocs", extraDocsData); + } + + /** + * Add information about all of the arguments available to toProcess to root + * + * @param root + */ protected void addArgumentBindings(Map root) { ParsingEngine parsingEngine = createStandardGATKParsingEngine(); - // attempt to instantiate the class - Object instance = makeInstanceIfPossible(toProcess.clazz); - - Map>> args = new HashMap>>(); + Map>> args = createArgumentMap(); root.put("arguments", args); - args.put("all", new ArrayList>()); - args.put("required", new ArrayList>()); - args.put("optional", new ArrayList>()); - args.put("advanced", new ArrayList>()); - args.put("hidden", new ArrayList>()); - args.put("depreciated", new ArrayList>()); try { - for ( ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(classdoc)) ) { + // loop over all of the arguments according to the parsing engine + for ( final ArgumentSource argumentSource : parsingEngine.extractArgumentSources(HelpUtils.getClassForDoc(toProcess.classDoc)) ) { + // todo -- why can you have multiple ones? ArgumentDefinition argDef = argumentSource.createArgumentDefinitions().get(0); - FieldDoc fieldDoc = getFieldDoc(classdoc, argumentSource.field.getName()); - Map argBindings = docForArgument(fieldDoc, argumentSource, argDef); // todo -- why can you have multiple ones? + FieldDoc fieldDoc = getFieldDoc(toProcess.classDoc, argumentSource.field.getName()); + Map argBindings = docForArgument(fieldDoc, argumentSource, argDef); if ( ! argumentSource.isHidden() || getDoclet().showHiddenFeatures() ) { - logger.debug(String.format("Processing %s", argumentSource)); - String kind = "optional"; - if ( argumentSource.isRequired() ) kind = "required"; - else if ( argumentSource.isAdvanced() ) kind = "advanced"; - else if ( argumentSource.isHidden() ) kind = "hidden"; - else if ( argumentSource.isDeprecated() ) kind = "depreciated"; + final String kind = docKindOfArg(argumentSource); - // get the value of the field - if ( instance != null ) { - Object value = getFieldValue(toProcess.clazz, instance, fieldDoc.name()); - - if ( value == null && argumentSource.createsTypeDefault() ) { - // handle the case where there's an implicit default - try { - value = argumentSource.typeDefaultDocString(); - } catch (ReviewedStingException e) { - ; // failed to create type default, don't worry about it - } - } - - if ( value != null ) - argBindings.put("defaultValue", prettyPrintValueString(value)); - } + final Object value = argumentValue(toProcess.clazz, argumentSource); + if ( value != null ) + argBindings.put("defaultValue", prettyPrintValueString(value)); args.get(kind).add(argBindings); args.get("all").add(argBindings); - } else { - logger.debug(String.format("Skipping hidden feature %s", argumentSource)); } } @@ -165,11 +176,78 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { } } + /** + * Return the argument kind (required, advanced, hidden, etc) of this argumentSource + * @param argumentSource + * @return + */ + @Requires("argumentSource != null") + @Ensures("result != null") + private String docKindOfArg(ArgumentSource argumentSource) { + if ( argumentSource.isRequired() ) return "required"; + else if ( argumentSource.isAdvanced() ) return "advanced"; + else if ( argumentSource.isHidden() ) return "hidden"; + else if ( argumentSource.isDeprecated() ) return "depreciated"; + else return "optional"; + } + + /** + * Attempts to determine the value of argumentSource in an instantiated version of c + * @param c + * @param argumentSource + * @return value of argumentSource, or null if this isn't possible + */ + @Requires({"c != null", "argumentSource != null"}) + private Object argumentValue(Class c, ArgumentSource argumentSource) { + // get the value of the field + // attempt to instantiate the class + final Object instance = makeInstanceIfPossible(toProcess.clazz); + if ( instance != null ) { + final Object value = getFieldValue(instance, argumentSource.field.getName()); + if ( value != null ) + return value; + + if ( argumentSource.createsTypeDefault() ) { + try { // handle the case where there's an implicit default + return argumentSource.typeDefaultDocString(); + } catch (ReviewedStingException e) { + ; // failed to create type default, don't worry about it + } + } + } + + return null; + } + + /** + * Create the argument map for holding class arguments + * @return + */ + private Map>> createArgumentMap() { + Map>> args = new HashMap>>(); + args.put("all", new ArrayList>()); + args.put("required", new ArrayList>()); + args.put("optional", new ArrayList>()); + args.put("advanced", new ArrayList>()); + args.put("hidden", new ArrayList>()); + args.put("depreciated", new ArrayList>()); + return args; + } + + + /** + * Sorts the individual argument list in unsorted according to CompareArgumentsByName + * @param unsorted + * @return + */ private List> sortArguments(List> unsorted) { Collections.sort(unsorted, new CompareArgumentsByName()); return unsorted; } + /** + * Sort arguments by case-insensitive comparison ignoring the -- and - prefixes + */ private class CompareArgumentsByName implements Comparator> { public int compare(Map x, Map y) { return elt(x).compareTo(elt(y)); @@ -186,25 +264,32 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { } } - private Object getFieldValue(Class c, Object instance, String fieldName) { - Field field = JVMUtils.findField(c, fieldName); - if ( field != null ) { - Object value = JVMUtils.getFieldValue(field, instance); - //System.out.printf("Fetched value of field %s in class %s: %s%n", fieldName, c, value); - return value; - } else { - return findFieldValueInArgumentCollections(c, instance, fieldName); - } - } - - private Object findFieldValueInArgumentCollections(Class c, Object instance, String fieldName) { - for ( Field field : JVMUtils.getAllFields(c) ) { + /** + * Utility function that finds the value of fieldName in any fields of ArgumentCollection fields in + * instance of class c. + * + * @param instance the object to query for the field value + * @param fieldName the name of the field we are looking for in instance + * @return The value assigned to field in the ArgumentCollection, otherwise null + */ + private Object getFieldValue(Object instance, String fieldName) { + // + // subtle note. If you have a field named X that is an ArgumentCollection that + // contains a field X as well, you need only consider fields in the argumentCollection, not + // matching the argument itself. + // + // @ArgumentCollection + // protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + // + for ( Field field : JVMUtils.getAllFields(instance.getClass()) ) { if ( field.isAnnotationPresent(ArgumentCollection.class) ) { //System.out.printf("Searching for %s in argument collection field %s%n", fieldName, field); Object fieldValue = JVMUtils.getFieldValue(field, instance); - Object value = getFieldValue(fieldValue.getClass(), fieldValue, fieldName); + Object value = getFieldValue(fieldValue, fieldName); if ( value != null ) return value; + } else if ( field.getName().equals(fieldName) ) { + return JVMUtils.getFieldValue(field, instance); } } @@ -212,6 +297,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { } /** + * Pretty prints value + * * Assumes value != null * @param value * @return @@ -246,6 +333,11 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { return value.toString(); } + /** + * Attempt to instantiate class c, if possible. Returns null if this proves impossible. + * @param c + * @return + */ private Object makeInstanceIfPossible(Class c) { Object instance = null; try { @@ -265,47 +357,16 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { // this last one is super dangerous, but some of these methods catch ClassNotFoundExceptions // and rethrow then as RuntimeExceptions catch (RuntimeException e) {} -// finally { -// if ( instance == null ) -// logger.warn(String.format("Unable to create instance of class %s => %s", c, instance)); -// } return instance; } - protected void addRelatedBindings(Map root) { - List> extraDocsData = new ArrayList>(); - // add in all of the explicitly related items - for ( final Class extraDocClass : toProcess.annotation.extraDocs() ) { - final GATKDocWorkUnit otherUnit = GATKDoclet.findWorkUnitForClass(extraDocClass, all); - if ( otherUnit == null ) - throw new ReviewedStingException("Requested extraDocs for class without any documentation: " + extraDocClass); - extraDocsData.add( - new HashMap(){{ - put("filename", otherUnit.filename); - put("name", otherUnit.name);}}); - - } - root.put("extradocs", extraDocsData); - } - - private static final String classRelationship(Class me, Class other) { - if ( other.equals(me) ) - // no circular references - return null; - else if ( other.isAssignableFrom(me) ) - // toProcess is a superclass of other.clazz - return "superclass"; - else if ( me.isAssignableFrom(other) ) - // toProcess inherits from other.clazz - return "subclass"; - else - return null; - - } - - protected ParsingEngine createStandardGATKParsingEngine() { + /** + * Create an instance of the GATK parsing engine, for argument processing with GATKDoclet + * @return + */ + private ParsingEngine createStandardGATKParsingEngine() { CommandLineProgram clp = new CommandLineGATK(); try { CommandLineProgram.start(clp, new String[]{}, true); @@ -315,6 +376,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { } } + /** + * Gets the javadocs associated with field name in classDoc. Throws a + * runtime exception if this proves impossible. + * + * @param classDoc + * @param name + * @return + */ private FieldDoc getFieldDoc(ClassDoc classDoc, String name) { return getFieldDoc(classDoc, name, true); } From 7b5fa4486d27cca06cd3e5b07171d6ea99c3ac38 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 19 Aug 2011 13:34:21 -0400 Subject: [PATCH 05/11] GenotypeAndValidate - Added docs to the @Arguments --- .../validation/GenotypeAndValidateWalker.java | 33 ++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java index fc23200af..2b38afaf6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidateWalker.java @@ -25,10 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.validation; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -201,30 +198,58 @@ import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel; public class GenotypeAndValidateWalker extends RodWalker implements TreeReducible { + /** + * The optional output file that will have all the variants used in the Genotype and Validation essay. + */ @Output(doc="Generate a VCF file with the variants considered by the walker, with a new annotation \"callStatus\" which will carry the value called in the validation VCF or BAM file", required=false) protected VCFWriter vcfWriter = null; + /** + * The callset to be used as truth (default) or validated (if BAM file is set to truth). + */ @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype", required=true) public RodBinding alleles; + /** + * Makes the Unified Genotyper calls to the BAM file the truth dataset and validates the alleles ROD binding callset. + */ @Argument(fullName ="set_bam_truth", shortName ="bt", doc="Use the calls on the reads (bam file) as the truth dataset and validate the calls on the VCF", required=false) private boolean bamIsTruth = false; + /** + * The minimum base quality score necessary for a base to be considered when calling a genotype. This argument is passed to the Unified Genotyper. + */ @Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false) private int mbq = -1; + /** + * The maximum deletion fraction allowed in a site for calling a genotype. This argument is passed to the Unified Genotyper. + */ @Argument(fullName="maximum_deletion_fraction", shortName="deletions", doc="Maximum deletion fraction for calling a genotype", required=false) private double deletions = -1; + /** + * the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. This argument is passed to the Unified Genotyper. + */ @Argument(fullName="standard_min_confidence_threshold_for_calling", shortName="stand_call_conf", doc="the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls", required=false) private double callConf = -1; + /** + * the minimum phred-scaled Qscore threshold to emit low confidence calls. This argument is passed to the Unified Genotyper. + */ @Argument(fullName="standard_min_confidence_threshold_for_emitting", shortName="stand_emit_conf", doc="the minimum phred-scaled Qscore threshold to emit low confidence calls", required=false) private double emitConf = -1; + /** + * Only validate sites that have at least a given depth + */ @Argument(fullName="condition_on_depth", shortName="depth", doc="Condition validation on a minimum depth of coverage by the reads", required=false) private int minDepth = -1; + /** + * If your VCF or BAM file has more than one sample and you only want to validate one, use this parameter to choose it. + */ + @Hidden @Argument(fullName ="sample", shortName ="sn", doc="Name of the sample to validate (in case your VCF/BAM has more than one sample)", required=false) private String sample = ""; From 49e831a13b51dc37877138dfca4e0cd178f4f4e5 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Aug 2011 14:35:16 -0400 Subject: [PATCH 07/11] Should have checked in --- .../sting/gatk/walkers/ClipReadsWalker.java | 60 ++++++++- .../gatk/walkers/qc/DocumentationTest.java | 115 ++++++++++++++++++ 2 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index 76b0276cd..68afed296 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -51,8 +51,66 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * This ReadWalker provides simple, yet powerful read clipping capabilities. It allows the user to clip bases in reads + * This tool provides simple, powerful read clipping capabilities. + * + *

+ * It allows the user to clip bases in reads * with poor quality scores, that match particular sequences, or that were generated by particular machine cycles. + *

+ * + *

Input

+ *

+ * A BAM file containing. + *

+ * + *

Output

+ *

+ *

    + *
  • -o: a OutputFormatted (recommended BED) file with the callable status covering each base
  • + *
  • -summary: a table of callable status x count of all examined bases
  • + *
+ *

+ * + *

Examples

+ *
+ *     -T CallableLociWalker \
+ *     -I my.bam \
+ *     -summary my.summary \
+ *     -o my.bed
+ * 
+ * + * would produce a BED file (my.bed) that looks like: + * + *
+ *     20 10000000 10000864 CALLABLE
+ *     20 10000865 10000985 POOR_MAPPING_QUALITY
+ *     20 10000986 10001138 CALLABLE
+ *     20 10001139 10001254 POOR_MAPPING_QUALITY
+ *     20 10001255 10012255 CALLABLE
+ *     20 10012256 10012259 POOR_MAPPING_QUALITY
+ *     20 10012260 10012263 CALLABLE
+ *     20 10012264 10012328 POOR_MAPPING_QUALITY
+ *     20 10012329 10012550 CALLABLE
+ *     20 10012551 10012551 LOW_COVERAGE
+ *     20 10012552 10012554 CALLABLE
+ *     20 10012555 10012557 LOW_COVERAGE
+ *     20 10012558 10012558 CALLABLE
+ *     et cetera...
+ * 
+ * as well as a summary table that looks like: + * + *
+ *                        state nBases
+ *                        REF_N 0
+ *                     CALLABLE 996046
+ *                  NO_COVERAGE 121
+ *                 LOW_COVERAGE 928
+ *           EXCESSIVE_COVERAGE 0
+ *         POOR_MAPPING_QUALITY 2906
+ * 
+ * + * @author Mark DePristo + * @since May 7, 2010 */ @Requires({DataSource.READS}) public class ClipReadsWalker extends ReadWalker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java new file mode 100644 index 000000000..933e24784 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; + +/** + * Summary test + * + *

Body test

+ */ +public class DocumentationTest extends RodWalker { + // the docs for the arguments are in the collection + @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + + /** + * dbSNP comparison VCF. By default, the dbSNP file is used to specify the set of "known" variants. + * Other sets can be specified with the -knownName (--known_names) argument. + */ + @ArgumentCollection + protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); + + /** + * detailed documentation about the argument goes here. + */ + @Input(fullName="listofRodBinding", shortName = "disc", doc="Output variants that were not called in this Feature comparison track", required=false) + private List> listOfRodBinding = Collections.emptyList(); + + @Input(fullName="optionalRodBinding", shortName = "conc", doc="Output variants that were also called in this Feature comparison track", required=false) + private RodBinding concordanceTrack; + + @Input(fullName="optionalRodBindingWithoutDefault", shortName = "optionalRodBindingWithoutDefault", doc="Output variants that were also called in this Feature comparison track", required=false) + private RodBinding noDefaultOptionalRodBinding; + + @Input(fullName="optionalRodBindingWithoutDefaultNull", shortName = "shortTest", doc="Output variants that were also called in this Feature comparison track", required=false) + private RodBinding noDefaultOptionalRodBindingNull = null; + + @Input(fullName="featureArg", shortName = "featureArg", doc="A RodBinding of feature", required=false) + private RodBinding featureArg = null; + + @Output(doc="VCFWriter",required=true) + protected VCFWriter vcfWriter = null; + + @Advanced + @Argument(fullName="setString", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) + public Set sampleNames; + + @Argument(fullName="setStringInitialized", shortName="setStringInitialized", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) + public Set setStringInitialized = new HashSet(); + + @Argument(shortName="optionalArgWithMissinglessDefault", doc="One or more criteria to use when selecting the data. Evaluated *after* the specified samples are extracted and the INFO-field annotations are updated.", required=false) + public ArrayList SELECT_EXPRESSIONS = new ArrayList(); + + @Argument(shortName="AAAAA", fullName = "AAAAA", doc="Should be the first argument", required=false) + public boolean FIRST_ARG = false; + + @Advanced + @Argument(fullName="booleanArg", shortName="env", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) + private boolean EXCLUDE_NON_VARIANTS = false; + + @Advanced + @Argument(fullName="booleanArray", shortName="booleanArray", doc="x", required=false) + private boolean[] boolArray = null; + + @Argument(fullName="enumTest", shortName="enumTest", doc="Test enum", required=false) + private TestEnum TestEnumArg = TestEnum.ENUM2; + public enum TestEnum { + /** Docs for enum1 */ + ENUM1, + /** Docs for enum2 */ + ENUM2 + } + + @Hidden + @Argument(fullName="hiddenArg", shortName="keepAF", doc="Don't include loci found to be non-variant after the subsetting procedure.", required=false) + private boolean KEEP_AF_SPECTRUM = false; + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; } + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { return value + sum; } + public void onTraversalDone(Integer result) { } +} From b08d63a6b8dc4e7b8f21c528f234d34f81ce654a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Aug 2011 15:06:37 -0400 Subject: [PATCH 08/11] Documentation and code cleanup for ClipReads, CallableLoci, and VariantsToTable -- Swapped -o [summary] and -ob [bam] for more standard -o [bam] and -os [summary] arguments. -- @Advanced arguments --- .../sting/gatk/walkers/ClipReadsWalker.java | 181 ++++++++++++------ .../walkers/coverage/CallableLociWalker.java | 4 + .../walkers/variantutils/VariantsToTable.java | 4 + .../clipreads/ClippingRepresentation.java | 29 ++- .../ClipReadsWalkersIntegrationTest.java | 6 +- 5 files changed, 154 insertions(+), 70 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index 68afed296..bb65d9b09 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -30,7 +30,9 @@ import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequenceFileFactory; import net.sf.samtools.SAMRecord; import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; @@ -42,7 +44,6 @@ import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; import org.broadinstitute.sting.utils.clipreads.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.yaml.snakeyaml.events.SequenceStartEvent; import java.io.File; import java.io.PrintStream; @@ -51,102 +52,158 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * This tool provides simple, powerful read clipping capabilities. + * This tool provides simple, powerful read clipping capabilities to remove low quality strings of bases, sections of reads, and reads containing user-provided sequences. + * * *

- * It allows the user to clip bases in reads - * with poor quality scores, that match particular sequences, or that were generated by particular machine cycles. + * It allows the user to clip bases in reads with poor quality scores, that match particular + * sequences, or that were generated by particular machine cycles. + * + *

+ *
Quality score based clipping
+ *
+ * Clip bases from the read in clipper from + *
argmax_x{ \sum{i = x + 1}^l (qTrimmingThreshold - qual)
+ * to the end of the read. This is blatantly stolen from BWA. + * + * Walk through the read from the end (in machine cycle order) to the beginning, calculating the + * running sum of qTrimmingThreshold - qual. While we do this, we track the maximum value of this + * sum where the delta > 0. After the loop, clipPoint is either -1 (don't do anything) or the + * clipping index in the read (from the end). + *
+ *
Cycle based clipping
+ *
Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc. + * For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based values (positions). + * For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11, and 12. + *
+ *
Sequence matching
+ *
Clips bases from that exactly match one of a number of base sequences. This employs an exact match algorithm, + * filtering only bases whose sequence exactly matches SEQ.
+ *
+ * *

* *

Input

*

- * A BAM file containing. + * Any number of BAM files. *

* *

Output

*

- *

    - *
  • -o: a OutputFormatted (recommended BED) file with the callable status covering each base
  • - *
  • -summary: a table of callable status x count of all examined bases
  • - *
+ * A new BAM file containing all of the reads from the input BAMs with the user-specified clipping + * operation applied to each read. + *

+ *

+ *

Summary output

+ *
+ *     Number of examined reads              13
+ *     Number of clipped reads               13
+ *     Percent of clipped reads              100.00
+ *     Number of examined bases              988
+ *     Number of clipped bases               126
+ *     Percent of clipped bases              12.75
+ *     Number of quality-score clipped bases 126
+ *     Number of range clipped bases         0
+ *     Number of sequence clipped bases      0
+ *     
+ *

+ * + *

+ *

Example clipping

+ * Suppose we are given this read: + *
+ *     314KGAAXX090507:1:19:1420:1123#0        16      chrM    3116    29      76M     *       *       *
+ *          TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
+ *          #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
+ *     
+ * + * If we are clipping reads with -QT 10 and -CR WRITE_NS, we get: + * + *
+ *     314KGAAXX090507:1:19:1420:1123#0        16      chrM    3116    29      76M     *       *       *
+ *          NNNNNNNNNNNNNNNNNTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
+ *          #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
+ *     
+ * + * Whereas with -CR WRITE_Q0S: + *
+ *     314KGAAXX090507:1:19:1420:1123#0        16      chrM    3116    29      76M     *       *       *
+ *          TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
+ *          !!!!!!!!!!!!!!!!!4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
+ *     
+ * + * Or -CR SOFTCLIP_BASES: + *
+ *     314KGAAXX090507:1:19:1420:1123#0        16      chrM    3133    29      17S59M  *       *       *
+ *          TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
+ *          #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
+ *     
*

* *

Examples

*
- *     -T CallableLociWalker \
- *     -I my.bam \
- *     -summary my.summary \
- *     -o my.bed
+ *     -T ClipReads -I my.bam -I your.bam -o my_and_your.clipped.bam -R Homo_sapiens_assembly18.fasta \
+ *     -XF seqsToClip.fasta -X CCCCC -CT "1-5,11-15" -QT 10
  * 
- * - * would produce a BED file (my.bed) that looks like: - * - *
- *     20 10000000 10000864 CALLABLE
- *     20 10000865 10000985 POOR_MAPPING_QUALITY
- *     20 10000986 10001138 CALLABLE
- *     20 10001139 10001254 POOR_MAPPING_QUALITY
- *     20 10001255 10012255 CALLABLE
- *     20 10012256 10012259 POOR_MAPPING_QUALITY
- *     20 10012260 10012263 CALLABLE
- *     20 10012264 10012328 POOR_MAPPING_QUALITY
- *     20 10012329 10012550 CALLABLE
- *     20 10012551 10012551 LOW_COVERAGE
- *     20 10012552 10012554 CALLABLE
- *     20 10012555 10012557 LOW_COVERAGE
- *     20 10012558 10012558 CALLABLE
- *     et cetera...
- * 
- * as well as a summary table that looks like: - * - *
- *                        state nBases
- *                        REF_N 0
- *                     CALLABLE 996046
- *                  NO_COVERAGE 121
- *                 LOW_COVERAGE 928
- *           EXCESSIVE_COVERAGE 0
- *         POOR_MAPPING_QUALITY 2906
- * 
- * + * @author Mark DePristo - * @since May 7, 2010 + * @since 2010 */ @Requires({DataSource.READS}) public class ClipReadsWalker extends ReadWalker { - @Output - PrintStream out; + /** + * If provided, ClipReads will write summary statistics about the clipping operations applied + * to the reads to this file. + */ + @Output(fullName = "outputStatistics", shortName = "os", doc = "Write output statistics to this file", required = false) + PrintStream out = null; /** - * an optional argument to dump the reads out to a BAM file + * The output SAM/BAM file will be written here */ - @Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false) - StingSAMFileWriter outputBam = null; + @Output(doc = "Write BAM output here", required = true) + StingSAMFileWriter outputBam; - @Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "", required = false) + /** + * If a value > 0 is provided, then the quality score based read clipper will be applied to the reads using this + * quality score threshold. + */ + @Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "If provided, the Q-score clipper will be applied", required = false) int qTrimmingThreshold = -1; - @Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String of the form 1-10,20-30 indicating machine cycles to clip from the reads", required = false) + /** + * Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc. + * For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based + * values (positions). For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11, + * and 12. + */ + @Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String indicating machine cycles to clip from the reads", required = false) String cyclesToClipArg = null; - @Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching these sequences", required = false) + /** + * Reads the sequences in the provided FASTA file, and clip any bases that exactly match any of the + * sequences in the file. + */ + @Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching the sequences in this FASTA file", required = false) String clipSequenceFile = null; + /** + * Clips bases from the reads matching the provided SEQ. Can be provided any number of times on the command line + */ @Argument(fullName = "clipSequence", shortName = "X", doc = "Remove sequences within reads matching this sequence", required = false) String[] clipSequencesArgs = null; - @Argument(fullName="read", doc="", required=false) - String onlyDoRead = null; - - //@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string. ", required = false) - //boolean keepCompletelyClippedReads = false; - -// @Argument(fullName = "onlyClipFirstSeqMatch", shortName = "ESC", doc="Only clip the first occurrence of a clipping sequence, rather than all subsequences within a read that match", required = false) -// boolean onlyClipFirstSeqMatch = false; - + /** + * The different values for this argument determines how ClipReads applies clips to the reads. This can range + * from writing Ns over the clipped bases to hard clipping away the bases from the BAM. + */ @Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false) ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS; + @Hidden + @Advanced + @Argument(fullName="read", doc="", required=false) + String onlyDoRead = null; /** * List of sequence that should be clipped from the reads diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java index 98331ec1d..32875a098 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java @@ -22,6 +22,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage; +import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -154,6 +155,7 @@ public class CallableLociWalker extends LocusWalker minMappingQuality and with base quality > minBaseQuality) exceeds this * value and is less than maxDepth the site is considered CALLABLE. */ + @Advanced @Argument(fullName = "minDepth", shortName = "minDepth", doc = "Minimum QC+ read depth before a locus is considered callable", required = false) int minDepth = 4; @@ -168,6 +170,7 @@ public class CallableLociWalker extends LocusWalker { * By default this tool only emits values for fields where the FILTER field is either PASS or . (unfiltered). * Throwing this flag will cause $WalkerName to emit values regardless of the FILTER field value. */ + @Advanced @Argument(fullName="showFiltered", shortName="raw", doc="If provided, field values from filtered records will be included in the output", required=false) public boolean showFiltered = false; /** * If provided, then this tool will exit with success after this number of records have been emitted to the file. */ + @Advanced @Argument(fullName="maxRecords", shortName="M", doc="If provided, we will emit at most maxRecord records to the table", required=false) public int MAX_RECORDS = -1; int nRecords = 0; @@ -121,6 +123,7 @@ public class VariantsToTable extends RodWalker { * can make your resulting file unreadable and malformated according to tools like R, as the representation of * multi-allelic INFO field values can be lists of values. */ + @Advanced @Argument(fullName="keepMultiAllelic", shortName="KMA", doc="If provided, we will not require the site to be biallelic", required=false) public boolean keepMultiAllelic = false; @@ -131,6 +134,7 @@ public class VariantsToTable extends RodWalker { * fields (e.g., AC not being calculated for filtered records, if included). When provided, this argument * will cause VariantsToTable to write out NA values for missing fields instead of throwing an error. */ + @Advanced @Argument(fullName="allowMissingData", shortName="AMD", doc="If provided, we will not require every record to contain every field", required=false) public boolean ALLOW_MISSING_DATA = false; diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java index 14c04b5c4..0dbe55726 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingRepresentation.java @@ -4,9 +4,28 @@ package org.broadinstitute.sting.utils.clipreads; * How should we represent a clipped bases in a read? */ public enum ClippingRepresentation { - WRITE_NS, // change the bases to Ns - WRITE_Q0S, // change the quality scores to Q0 - WRITE_NS_Q0S, // change the quality scores to Q0 and write Ns - SOFTCLIP_BASES, // change cigar string to S, but keep bases - HARDCLIP_BASES // remove the bases from the read + /** Clipped bases are changed to Ns */ + WRITE_NS, + + /** Clipped bases are changed to have Q0 quality score */ + WRITE_Q0S, + + /** Clipped bases are change to have both an N base and a Q0 quality score */ + WRITE_NS_Q0S, + + /** + * Change the read's cigar string to soft clip (S, see sam-spec) away the bases. + * Note that this can only be applied to cases where the clipped bases occur + * at the start or end of a read. + */ + SOFTCLIP_BASES, + + /** + * Change the read's cigar string to hard clip (H, see sam-spec) away the bases. + * Hard clipping, unlike soft clipping, actually removes bases from the read, + * reducing the resulting file's size but introducing an irrevesible (i.e., + * lossy) operation. Note that this can only be applied to cases where the clipped + * bases occur at the start or end of a read. + */ + HARDCLIP_BASES } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java index a129f8adf..ca3d1ee25 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java @@ -37,8 +37,8 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest { "-R " + hg18Reference + " -T ClipReads " + "-I " + validationDataLocation + "clippingReadsTest.bam " + - "-o %s " + - "-ob %s " + args, + "-os %s " + + "-o %s " + args, 2, // just one output file Arrays.asList("tmp", "bam"), Arrays.asList(md51, md52)); @@ -72,7 +72,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest { " -I " + validationDataLocation + "originalQuals.chr1.1-1K.bam" + " -L chr1:1-1,000" + " -OQ -QT 4 -CR WRITE_Q0S" + - " -o %s -ob %s", + " -o %s -os %s", 2, Arrays.asList("55c01ccc2e84481b22d3632cdb06c8ba", "22db22749f811d30216215e047461621")); executeTest("clipOriginalQuals", spec); From f39d0008bc5558c06786d2c3b8cbbaa537ca49b2 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Aug 2011 15:07:26 -0400 Subject: [PATCH 09/11] Build.xml -- contracts not built by default. Slightly simpler CSS for dl. --- build.xml | 2 +- settings/helpTemplates/style.css | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/build.xml b/build.xml index d8c38738a..ef53f6aa4 100644 --- a/build.xml +++ b/build.xml @@ -489,7 +489,7 @@ docletpathref="doclet.classpath" classpathref="external.dependencies" classpath="${java.classes}" - additionalparam="-private -build-timestamp "${build.timestamp}" -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"> + additionalparam="-private -build-timestamp "${build.timestamp}" -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"> diff --git a/settings/helpTemplates/style.css b/settings/helpTemplates/style.css index 375df2f51..297cd49ef 100644 --- a/settings/helpTemplates/style.css +++ b/settings/helpTemplates/style.css @@ -85,25 +85,33 @@ hr * enum DT layout */ +dl { + margin-left: 3em; +} + dl.enum { margin-left: 3em; border: 1px dashed #ccc; } -dt.enum { +dt, dt.enum { font-weight: bold; text-decoration: underline; } -dd.enum { +/* +dt, dd.enum { padding: 0 0 0.5em 0; } +*/ pre { border: thin solid lightgray; margin-left: 1em; margin-right: 4em; +/* background-color: #e0fdff; +*/ } /* * clean table layouts From 8b3cfb2f1c0f21699d4a4a3b1002fe1d612adf44 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Aug 2011 16:52:17 -0400 Subject: [PATCH 10/11] Final documented version of GATKDoclet and associated classes -- Docs on everything. -- Feature complete. At this point only minor improvements and bugfixes are anticipated --- .../utils/help/DocumentedGATKFeature.java | 4 + .../help/DocumentedGATKFeatureHandler.java | 4 +- .../help/DocumentedGATKFeatureObject.java | 5 +- .../sting/utils/help/GATKDocUtils.java | 19 ++ .../sting/utils/help/GATKDoclet.java | 206 +++++++++++++----- .../help/GenericDocumentationHandler.java | 81 +++++-- .../sting/utils/help/HelpUtils.java | 8 - 7 files changed, 244 insertions(+), 83 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java index 89163dfcb..5bbe3f91e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeature.java @@ -36,8 +36,12 @@ import java.lang.annotation.*; @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.TYPE) public @interface DocumentedGATKFeature { + /** Should we actually document this feature, even through it's annotated? */ public boolean enable() default true; + /** The overall group name (walkers, readfilters) this feature is associated with */ public String groupName(); + /** A human readable summary of the purpose of this group of features */ public String summary() default ""; + /** Are there links to other docs that we should include? CommandLineGATK.class for walkers, for example? */ public Class[] extraDocs() default {}; } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java index ce03c8093..87926d2e3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureHandler.java @@ -92,9 +92,7 @@ public abstract class DocumentedGATKFeatureHandler { * * toProcess.setHandlerContent(summary, rootMap); * - * @param rootDoc * @param toProcess - * @param all */ - public abstract void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcess, Set all); + public abstract void processOne(GATKDocWorkUnit toProcess); } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java index 66354202f..6c8b0a475 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/DocumentedGATKFeatureObject.java @@ -25,12 +25,15 @@ package org.broadinstitute.sting.utils.help; /** - * Documentation unit. Effectively a class version of the DocumentedGATKFeature + * Documentation unit. Effectively a class version of the DocumentedGATKFeature. + * Immutable data structure. * * @author depristo */ class DocumentedGATKFeatureObject { + /** Which class are we documenting. Specific to each class being documented */ private final Class classToDoc; + /** Are we enabled? */ private final boolean enable; private final String groupName, summary; private final Class[] extraDocs; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java index 983805c4d..cd645943b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDocUtils.java @@ -25,14 +25,33 @@ package org.broadinstitute.sting.utils.help; public class GATKDocUtils { + /** The URL root for RELEASED GATKDOC units */ public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gsa/gatkdocs/release/"; + /** The URL root for STABLE GATKDOC units */ public final static String URL_ROOT_FOR_STABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/stable/"; + /** The URL root for UNSTABLE GATKDOC units */ public final static String URL_ROOT_FOR_UNSTABLE_GATKDOCS = "http://iwww.broadinstitute.org/gsa/gatkdocs/unstable/"; + /** + * Return the filename of the GATKDoc HTML that would be generated for Class. This + * does not guarantee that the docs exist, or that docs would actually be generated + * for class (might not be annotated for documentation, for example). But if + * this class is documented, GATKDocs will write the docs to a file named as returned + * by this function. + * + * @param c + * @return + */ public static String htmlFilenameForClass(Class c) { return c.getName().replace(".", "_") + ".html"; } + /** + * Returns a full URL http://etc/ linking to the documentation for class (assuming it + * exists). Currently points to the RELEASE doc path only. + * @param c + * @return + */ public static String helpLinksToGATKDocs(Class c) { String classPath = htmlFilenameForClass(c); StringBuilder b = new StringBuilder(); diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java index de6ad359e..7f26f22f5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GATKDoclet.java @@ -43,18 +43,65 @@ import java.io.*; import java.util.*; /** + * Javadoc Doclet that combines javadoc, GATK ParsingEngine annotations, and FreeMarker + * templates to produce html formatted GATKDocs for walkers + * and other classes. * + * This document has the following workflow: + * + * 1 -- walk the javadoc heirarchy, looking for class that have the + * DocumentedGATKFeature annotation or are in the type heirarchy in the + * static list of things to document, and are to be documented + * 2 -- construct for each a GATKDocWorkUnit, resulting in the complete + * set of things to document + * 3 -- for each unit, actually generate an html page documenting it + * as well as links to related features via their units. Writing + * of a specific class HTML is accomplished by a generate DocumentationHandler + * 4 -- write out an index of all units, organized by group + * + * The documented classes are restricted to only those with @DocumentedGATKFeature + * annotation or are in the STATIC_DOCS class. */ public class GATKDoclet { - final protected static File SETTINGS_DIR = new File("settings/helpTemplates"); - final protected static File DESTINATION_DIR = new File("gatkdocs"); final protected static Logger logger = Logger.getLogger(GATKDoclet.class); + + /** Where we find the help FreeMarker templates */ + final protected static File SETTINGS_DIR = new File("settings/helpTemplates"); + + /** Where we write the GATKDoc html directory */ + final protected static File DESTINATION_DIR = new File("gatkdocs"); + + // ---------------------------------------------------------------------- + // + // Global variables that are set on the command line by javadoc + // + // ---------------------------------------------------------------------- protected static String buildTimestamp = null, absoluteVersion = null; protected static boolean showHiddenFeatures = false; + protected static boolean testOnly = false; + /** + * Any class that's in this list will be included in the documentation + * when the -test argument is provided. Useful for debugging. + */ + private static final List> testOnlyKeepers = Arrays.asList( + DocumentationTest.class, CommandLineGATK.class, UserException.class); + + /** The javadoc root doc */ RootDoc rootDoc; + /** The set of all things we are going to document */ + Set myWorkUnits; + + /** + * A static list of DocumentedGATKFeatureObjects. Any class that is as or extends + * one of the DocumentedGATKFeatureObjects.clazz of this collection will also + * be documented, even if it doesn't have the @DocumentedGATKFeature annotation. Useful + * when you want to document things that implement an interface (annotations on java + * interfaces aren't inherited) or whose base class isn't under your control (tribble + * codecs). + */ final static Collection STATIC_DOCS = new ArrayList(); static { STATIC_DOCS.add(new DocumentedGATKFeatureObject(FeatureCodec.class, @@ -70,7 +117,8 @@ public class GATKDoclet { * @throws java.io.IOException if output can't be written. */ public static boolean start(RootDoc rootDoc) throws IOException { - logger.setLevel(Level.DEBUG); + logger.setLevel(Level.INFO); + // load arguments for(String[] options: rootDoc.options()) { if(options[0].equals("-build-timestamp")) @@ -83,8 +131,9 @@ public class GATKDoclet { testOnly = true; } - GATKDoclet doclet = new GATKDoclet(); - doclet.processDocs(rootDoc); + // process the docs + new GATKDoclet().processDocs(rootDoc); + return true; } @@ -104,17 +153,54 @@ public class GATKDoclet { return 0; } + /** + * Are we supposed to include @Hidden annotations in our documented output? + * @return + */ public boolean showHiddenFeatures() { return showHiddenFeatures; } - public static boolean testOnly() { - return testOnly; + /** + * + * @param rootDoc + */ + private void processDocs(RootDoc rootDoc) { + // setup the global access to the root + this.rootDoc = rootDoc; + + try { + // basic setup + DESTINATION_DIR.mkdirs(); + FileUtils.copyFile(new File(SETTINGS_DIR + "/style.css"), new File(DESTINATION_DIR + "/style.css")); + + /* ------------------------------------------------------------------- */ + /* You should do this ONLY ONCE in the whole application life-cycle: */ + + Configuration cfg = new Configuration(); + // Specify the data source where the template files come from. + cfg.setDirectoryForTemplateLoading(SETTINGS_DIR); + // Specify how templates will see the data-model. This is an advanced topic... + cfg.setObjectWrapper(new DefaultObjectWrapper()); + + myWorkUnits = computeWorkUnits(); + for ( GATKDocWorkUnit workUnit : myWorkUnits ) { + processDocWorkUnit(cfg, workUnit); + } + + processIndex(cfg, new ArrayList(myWorkUnits)); + } catch ( FileNotFoundException e ) { + throw new RuntimeException(e); + } catch ( IOException e ) { + throw new RuntimeException(e); + } } - private static final List> testOnlyKeepers = Arrays.asList( - DocumentationTest.class, CommandLineGATK.class, UserException.class); - public Set workUnits() { + /** + * Returns the set of all GATKDocWorkUnits that we are going to generate docs for. + * @return + */ + private Set computeWorkUnits() { TreeSet m = new TreeSet(); for ( ClassDoc doc : rootDoc.classes() ) { @@ -144,37 +230,13 @@ public class GATKDoclet { return m; } - protected void processDocs(RootDoc rootDoc) { - // setup the global access to the root - this.rootDoc = rootDoc; - - try { - // basic setup - DESTINATION_DIR.mkdirs(); - FileUtils.copyFile(new File(SETTINGS_DIR + "/style.css"), new File(DESTINATION_DIR + "/style.css")); - - /* ------------------------------------------------------------------- */ - /* You should do this ONLY ONCE in the whole application life-cycle: */ - - Configuration cfg = new Configuration(); - // Specify the data source where the template files come from. - cfg.setDirectoryForTemplateLoading(SETTINGS_DIR); - // Specify how templates will see the data-model. This is an advanced topic... - cfg.setObjectWrapper(new DefaultObjectWrapper()); - - Set myWorkUnits = workUnits(); - for ( GATKDocWorkUnit workUnit : myWorkUnits ) { - processDocWorkUnit(cfg, workUnit, myWorkUnits); - } - - processIndex(cfg, new ArrayList(myWorkUnits)); - } catch ( FileNotFoundException e ) { - throw new RuntimeException(e); - } catch ( IOException e ) { - throw new RuntimeException(e); - } - } - + /** + * Create a handler capable of documenting the class doc according to feature. Returns + * null if no appropriate handler is found or doc shouldn't be documented at all. + * @param doc + * @param feature + * @return + */ private DocumentedGATKFeatureHandler createHandler(ClassDoc doc, DocumentedGATKFeatureObject feature) { if ( feature != null ) { if ( feature.enable() ) { @@ -189,6 +251,13 @@ public class GATKDoclet { return null; } + /** + * Returns the instantiated DocumentedGATKFeatureObject that describes the GATKDoc + * structure we will apply to Doc. + * + * @param doc + * @return null if this proves inappropriate or doc shouldn't be documented + */ private DocumentedGATKFeatureObject getFeatureForClassDoc(ClassDoc doc) { Class docClass = getClassForClassDoc(doc); @@ -208,6 +277,11 @@ public class GATKDoclet { } } + /** + * Return the Java class described by the ClassDoc doc + * @param doc + * @return + */ private Class getClassForClassDoc(ClassDoc doc) { try { // todo -- what do I need the ? extends Object to pass the compiler? @@ -223,10 +297,12 @@ public class GATKDoclet { } } - public static ClassDoc getClassDocForClass(RootDoc rootDoc, Class clazz) { - return rootDoc.classNamed(clazz.getName()); - } - + /** + * Create the html index listing all of the GATKDocs features + * @param cfg + * @param indexData + * @throws IOException + */ private void processIndex(Configuration cfg, List indexData) throws IOException { /* Get or create a template */ Template temp = cfg.getTemplate("generic.index.template.html"); @@ -241,6 +317,12 @@ public class GATKDoclet { } } + /** + * Helpful function to create the html index. Given all of the already run GATKDocWorkUnits, + * create the high-level grouping data listing individual features by group. + * @param indexData + * @return + */ private Map groupIndexData(List indexData) { // // root -> data -> { summary -> y, filename -> z }, etc @@ -268,6 +350,11 @@ public class GATKDoclet { return root; } + /** + * Trivial helper routine that returns the map of name and summary given the annotation + * @param annotation + * @return + */ private static final Map toMap(DocumentedGATKFeatureObject annotation) { Map root = new HashMap(); root.put("name", annotation.groupName()); @@ -275,18 +362,39 @@ public class GATKDoclet { return root; } - public final static GATKDocWorkUnit findWorkUnitForClass(Class c, Set all) { - for ( final GATKDocWorkUnit unit : all ) + /** + * Helper function that finding the GATKDocWorkUnit associated with class from among all of the work units + * @param c the class we are looking for + * @return the GATKDocWorkUnit whose .clazz.equals(c), or null if none could be found + */ + public final GATKDocWorkUnit findWorkUnitForClass(Class c) { + for ( final GATKDocWorkUnit unit : this.myWorkUnits ) if ( unit.clazz.equals(c) ) return unit; return null; } - private void processDocWorkUnit(Configuration cfg, GATKDocWorkUnit unit, Set all) + /** + * Return the ClassDoc associated with clazz + * @param clazz + * @return + */ + public ClassDoc getClassDocForClass(Class clazz) { + return rootDoc.classNamed(clazz.getName()); + } + + /** + * High-level function that processes a single DocWorkUnit unit using its handler + * + * @param cfg + * @param unit + * @throws IOException + */ + private void processDocWorkUnit(Configuration cfg, GATKDocWorkUnit unit) throws IOException { //System.out.printf("Processing documentation for class %s%n", unit.classDoc); - unit.handler.processOne(rootDoc, unit, all); + unit.handler.processOne(unit); // Get or create a template Template temp = cfg.getTemplate(unit.handler.getTemplateName(unit.classDoc)); diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 08e430c8a..4f1e95499 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -53,15 +53,15 @@ import java.util.*; public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { private static Logger logger = Logger.getLogger(GenericDocumentationHandler.class); + /** + * The max. length of the longest of --fullName -shortName argument name + * before we prefer the shorter option. + */ + private static final int MAX_DISPLAY_NAME = 30; + /** The Class we are documenting */ private GATKDocWorkUnit toProcess; - /** The set of all classes we are documenting, for cross-referencing */ - private Set all; - - /** The JavaDoc root */ - private RootDoc rootDoc; - @Override public boolean includeInDocs(ClassDoc doc) { try { @@ -79,10 +79,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { } @Override - public void processOne(RootDoc rootDoc, GATKDocWorkUnit toProcessArg, Set allArg) { - this.rootDoc = rootDoc; + public void processOne(GATKDocWorkUnit toProcessArg) { this.toProcess = toProcessArg; - this.all = allArg; //System.out.printf("%s class %s%n", toProcess.group, toProcess.classDoc); Map root = new HashMap(); @@ -126,7 +124,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { // add in all of the explicitly related items for ( final Class extraDocClass : toProcess.annotation.extraDocs() ) { - final GATKDocWorkUnit otherUnit = GATKDoclet.findWorkUnitForClass(extraDocClass, all); + final GATKDocWorkUnit otherUnit = getDoclet().findWorkUnitForClass(extraDocClass); if ( otherUnit == null ) throw new ReviewedStingException("Requested extraDocs for class without any documentation: " + extraDocClass); extraDocsData.add( @@ -388,6 +386,13 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { return getFieldDoc(classDoc, name, true); } + /** + * Recursive helper routine to getFieldDoc() + * @param classDoc + * @param name + * @param primary + * @return + */ private FieldDoc getFieldDoc(ClassDoc classDoc, String name, boolean primary) { //System.out.printf("Looking for %s in %s%n", name, classDoc.name()); for ( FieldDoc fieldDoc : classDoc.fields(false) ) { @@ -422,7 +427,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { return null; } - private static final int MAX_DISPLAY_NAME = 30; + /** + * Returns a Pair of (main, synonym) names for argument with fullName s1 and + * shortName s2. The main is selected to be the longest of the two, provided + * it doesn't exceed MAX_DISPLAY_NAME, in which case the shorter is taken. + * @param s1 + * @param s2 + * @return + */ Pair displayNames(String s1, String s2) { if ( s1 == null ) return new Pair(s2, null); if ( s2 == null ) return new Pair(s1, null); @@ -436,6 +448,15 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { return new Pair(l, s); } + /** + * Returns a human readable string that describes the Type type of a GATK argument. + * + * This will include parameterized types, so that Set{T} shows up as Set(T) and not + * just Set in the docs. + * + * @param type + * @return + */ protected String argumentTypeString(Type type) { if (type instanceof ParameterizedType) { ParameterizedType parameterizedType = (ParameterizedType)type; @@ -454,6 +475,13 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { } } + /** + * Helper routine that returns the Feature.class required by a RodBinding, + * either T for RodBinding{T} or List{RodBinding{T}}. Returns null if + * the Type doesn't fit either model. + * @param type + * @return + */ protected Class getFeatureTypeIfPossible(Type type) { if ( type instanceof ParameterizedType) { ParameterizedType paramType = (ParameterizedType)type; @@ -471,6 +499,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { return null; } + /** + * High-level entry point for creating a FreeMarker map describing the GATK argument + * source with definition def, with associated javadoc fieldDoc. + * @param fieldDoc + * @param source + * @param def + * @return a non-null Map binding argument keys with their values + */ protected Map docForArgument(FieldDoc fieldDoc, ArgumentSource source, ArgumentDefinition def) { Map root = new HashMap(); Pair names = displayNames("-" + def.shortName, "--" + def.fullName); @@ -503,27 +539,29 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { root.put("summary", def.doc != null ? def.doc : ""); root.put("fulltext", fieldDoc.commentText()); + // What are our enum options? + if ( def.validOptions != null ) + root.put("options", docForEnumArgument(source.field.getType())); + + // general attributes List attributes = new ArrayList(); - // this one below is just too much. - //attributes.add(def.ioType.annotationClass.getSimpleName()); if ( def.required ) attributes.add("required"); - // flag is just boolean, not interesting - //if ( def.isFlag ) attributes.add("flag"); - if ( def.isHidden ) attributes.add("hidden"); if ( source.isDeprecated() ) attributes.add("depreciated"); if ( attributes.size() > 0 ) root.put("attributes", Utils.join(", ", attributes)); - if ( def.validOptions != null ) { - root.put("options", docForEnumArgument(source.field.getType())); - } - return root; } + /** + * Helper routine that provides a FreeMarker map for an enumClass, grabbing the + * values of the enum and their associated javadoc documentation. + * @param enumClass + * @return + */ @Requires("enumClass.isEnum()") private List> docForEnumArgument(Class enumClass) { - ClassDoc doc = GATKDoclet.getClassDocForClass(rootDoc, enumClass); + ClassDoc doc = this.getDoclet().getClassDocForClass(enumClass); if ( doc == null ) // || ! doc.isEnum() ) throw new RuntimeException("Tried to get docs for enum " + enumClass + " but got instead: " + doc); @@ -537,5 +575,4 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { return bindings; } - } diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java index d72d2e83c..645ab34c1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java @@ -32,14 +32,6 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils; import java.lang.reflect.Field; public class HelpUtils { - - protected static boolean implementsInterface(ProgramElementDoc classDoc, Class... interfaceClasses) { - for (Class interfaceClass : interfaceClasses) - if (assignableToClass(classDoc, interfaceClass, false)) - return true; - return false; - } - protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) { try { Class type = getClassForDoc(classDoc); From ff018c796423184d75cdbb11ff3fc90e9b3af67b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Aug 2011 16:55:56 -0400 Subject: [PATCH 11/11] Swapped argument order but not MD5 order --- .../sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java index ca3d1ee25..1565c419b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/ClipReadsWalkersIntegrationTest.java @@ -74,7 +74,7 @@ public class ClipReadsWalkersIntegrationTest extends WalkerTest { " -OQ -QT 4 -CR WRITE_Q0S" + " -o %s -os %s", 2, - Arrays.asList("55c01ccc2e84481b22d3632cdb06c8ba", "22db22749f811d30216215e047461621")); + Arrays.asList("22db22749f811d30216215e047461621", "55c01ccc2e84481b22d3632cdb06c8ba")); executeTest("clipOriginalQuals", spec); } }