From ce73dc40712510a360738df49e802b4c21d95621 Mon Sep 17 00:00:00 2001
From: Christopher Hartl <chartl@broadinstitute.org>
Date: Thu, 15 Sep 2011 15:33:09 -0400
Subject: [PATCH 03/17] Update to the bindings for liftOverVCF.pl (to -V from
 -B)

---
 public/perl/liftOverVCF.pl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/public/perl/liftOverVCF.pl b/public/perl/liftOverVCF.pl
index 21cb8bb6b..ba4198292 100755
--- a/public/perl/liftOverVCF.pl
+++ b/public/perl/liftOverVCF.pl
@@ -36,7 +36,7 @@ my $unsorted_vcf = "$tmp_prefix.unsorted.vcf";
 
 # lift over the file
 print "Lifting over the vcf...";
-my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -B:variant,vcf $in -o $unsorted_vcf -chain $chain -dict $newRef.dict";
+my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -V:variant $in -o $unsorted_vcf -chain $chain -dict $newRef.dict";
 if ($recordOriginalLocation) {
   $cmd .= " -recordOriginalLocation";
 }
@@ -66,7 +66,7 @@ system($cmd) == 0 or quit("The sorting step failed.  Please correct the necessar
 
 # Filter the VCF for bad records
 print "\nFixing/removing bad records...\n";
-$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -B:variant,vcf $sorted_vcf -o $out";
+$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -V:variant $sorted_vcf -o $out";
 system($cmd) == 0 or quit("The filtering step failed.  Please correct the necessary errors before retrying.");
 
 # clean up

From f04e51c6c2b74a79644e9473230410a8ba85fe92 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 15 Sep 2011 15:38:56 -0400
Subject: [PATCH 04/17] Adding docs from Andrey since his repo was all screwed
 up.

---
 .../indels/SomaticIndelDetectorWalker.java    | 143 ++++++++++++------
 1 file changed, 94 insertions(+), 49 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
index e5ad3106d..8bba8eac2 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
@@ -68,26 +68,59 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.io.*;
 import java.util.*;
 
+
 /**
+ * Tool for calling indels in Tumor-Normal paired sample mode; this tool supports single-sample mode as well,
+ * but this latter functionality is now superceded by UnifiedGenotyper.
+ *
+ * <p>
  * This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
- * data. Two output formats supported are: BED format (minimal output, required), and extended output that includes read
- * and mismtach statistics around the calls (tuned on with --verbose). The calls can be performed from a single/pooled sample,
- * or from a matched pair of samples (with --somatic option). In the latter case, two input bam files must be specified,
- * the order is important: indels are called from the second sample ("Tumor") and additionally annotated as germline
- * if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic
- * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
- * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
+ * data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
+ * include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many
+ * forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
+ * statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
+ * attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
+ * metrics for the post-processing tools to make the final decision). The calls are performed by default
+ * from a matched tumor-normal pair of samples. In this case, two (sets of) input bam files must be specified using tagged -I
+ * command line arguments: normal and tumor bam(s) must be passed with -I:normal and -I:tumor arguments,
+ * respectively. Indels are called from the tumor sample and annotated as germline
+ * if even a weak evidence for the same indel, not necessarily a confident call, exists in the normal sample, or as somatic
+ * if normal sample has coverage at the site but no indication for an indel. Note that strictly speaking the calling
+ * is not even attempted in normal sample: if there is an indel in normal that is not detected/does not pass a threshold
+ * in tumor sample, it will not be reported.
  *
- * <b>If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you,
- * please email asivache at broadinstitute dot org and he will gladly explain everything in more detail.</b>
+ * To make indel calls and associated metrics for a single sample, this tool can be run with --unpaired flag (input
+ * bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
+ * on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
  *
+ * <h2>Input</h2>
+ * <p>
+ * Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Indel calls with associated metrics.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T SomaticIndelDetector \
+ *   -o indels.vcf \
+ *   -verbose indels.txt
+ *   -I:normal normal.bam \
+ *   -I:tumor tumor.bam
+ * </pre>
  *
  */
+
 @ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
 public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
 //    @Output
 //    PrintStream out;
-    @Output(doc="File to which variants should be written",required=true)
+    @Output(doc="File to write variants (indels) in VCF format",required=true)
     protected VCFWriter vcf_writer = null;
 
     @Argument(fullName="outputFile", shortName="O", doc="output file name (BED format). DEPRECATED> Use --bed", required=true)
@@ -102,68 +135,80 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
 
     @Hidden
     @Argument(fullName = "genotype_intervals", shortName = "genotype",
-            doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or it's the ref", required = false)
+        doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or not", required = false)
     public String genotypeIntervalsFile = null;
 
     @Hidden
     @Argument(fullName="genotypeIntervalsAreNotSorted", shortName="giNotSorted", required=false,
-            doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
-                "if the list turns out to be unsorted, it will throw an exception.  "+
-                "Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
-                "to sort and keep it in memory (increases memory usage!).")
+        doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
+            "if the list turns out to be unsorted, it will throw an exception.  "+
+            "Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
+            "to sort and keep it in memory (increases memory usage!).")
     protected boolean GENOTYPE_NOT_SORTED = false;
 
     @Hidden
-	@Argument(fullName="unpaired", shortName="unpaired",
-			doc="Perform unpaired calls (no somatic status detection)", required=false)
+    @Argument(fullName="unpaired", shortName="unpaired",
+                    doc="Perform unpaired calls (no somatic status detection)", required=false)
     boolean call_unpaired = false;
-	boolean call_somatic ;
+    boolean call_somatic ;
 
-	@Argument(fullName="verboseOutput", shortName="verbose",
-			doc="Verbose output file in text format", required=false)
-	java.io.File verboseOutput = null;
+    @Argument(fullName="verboseOutput", shortName="verbose",
+                    doc="Verbose output file in text format", required=false)
+    java.io.File verboseOutput = null;
 
     @Argument(fullName="bedOutput", shortName="bed",
-            doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
+        doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
     java.io.File bedOutput = null;
 
-	@Argument(fullName="minCoverage", shortName="minCoverage",
-			doc="indel calls will be made only at sites with coverage of minCoverage or more reads; with --somatic this value is applied to tumor sample", required=false)
-	int minCoverage = 6;
+    @Argument(fullName="minCoverage", shortName="minCoverage",
+                    doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
+            "with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false)
+    int minCoverage = 6;
 
-	@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
-			doc="used only with --somatic;  normal sample must have at least minNormalCoverage or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
-	int minNormalCoverage = 4;
+    @Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
+                    doc="used only in default (somatic) mode;  normal sample must have at least minNormalCoverage "+
+            "or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
+    int minNormalCoverage = 4;
 
-	@Argument(fullName="minFraction", shortName="minFraction",
-			doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
-			" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
-	double minFraction = 0.3;
+    @Argument(fullName="minFraction", shortName="minFraction",
+                    doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
+                    " (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
+    double minFraction = 0.3;
 
-	@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
-			doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt all indel observations at the site exceeds this threshold", required=false)
-	double minConsensusFraction = 0.7;
+    @Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
+                    doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
+            "all indel observations at the site exceeds this threshold", required=false)
+    double minConsensusFraction = 0.7;
 
-	@Argument(fullName="minIndelCount", shortName="minCnt",
-			doc="Minimum count of reads supporting consensus indel required for making the call. "+
-			" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
-			"(minIndelCount not met) will not pass.", required=false)
-	int minIndelCount = 0;
+    @Argument(fullName="minIndelCount", shortName="minCnt",
+                    doc="Minimum count of reads supporting consensus indel required for making the call. "+
+                    " This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
+                    "(minIndelCount not met) will not pass.", required=false)
+    int minIndelCount = 0;
 
-	@Argument(fullName="refseq", shortName="refseq",
-			doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
-	String RefseqFileName = null;
+    @Argument(fullName="refseq", shortName="refseq",
+                    doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with "+
+            "GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
+    String RefseqFileName = null;
 
-    @Argument(fullName="blacklistedLanes", shortName="BL",
-            doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
-                    "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
-    PlatformUnitFilterHelper dummy;
-     @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false;
+//@Argument(fullName="blacklistedLanes", shortName="BL",
+//        doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
+//                "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
+//PlatformUnitFilterHelper dummy;
+
+    @Hidden
+    @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",
+            required=false) Boolean DEBUG = false;
     @Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
-            "May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200;
+            "May need to be increased to accomodate longer reads or longer deletions. A read can be fit into the "+
+            "window if its length on the reference (i.e. read length + length of deletion gap(s) if any) is smaller "+
+            "than the window size. Reads that do not fit will be ignored, so long deletions can not be called "+
+            "if window is too small",required=false) int WINDOW_SIZE = 200;
     @Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
                 " the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;
 
+
+
 	private WindowContext tumor_context;
 	private WindowContext normal_context; 
 	private int currentContigIndex = -1;

From fe474b77f85f325ed20d6cb6c50dc298d024d03e Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 15 Sep 2011 16:05:39 -0400
Subject: [PATCH 05/17] Updating docs so printing looks nicer

---
 .../gatk/walkers/variantutils/VariantValidationAssessor.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
index b98646270..ea8549474 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
@@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
 import java.util.*;
 
 /**
- * Annotates a validation (from e.g. Sequenom) VCF with QC metrics (HW-equilibrium, % failed probes)
+ * Annotates a validation (from Sequenom for example) VCF with QC metrics (HW-equilibrium, % failed probes)
  *
  * <p>
  * The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes).

From 4ef6a4598c3704fd5aac5f5302a148ddfedd3958 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 15 Sep 2011 16:10:34 -0400
Subject: [PATCH 06/17] Updating docs to include output

---
 .../walkers/varianteval/VariantEvalWalker.java   | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
index 266b97af0..28f4f2a56 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@@ -56,6 +56,22 @@ import java.util.*;
  * <h2>Output</h2>
  * <p>
  * Evaluation tables detailing the results of the eval modules which were applied.
+ * For example:
+ * <pre>
+ * output.eval.gatkreport:
+ * ##:GATKReport.v0.1 CountVariants : Counts different classes of variants in the sample
+ * CountVariants  CompRod   CpG      EvalRod  JexlExpression  Novelty  nProcessedLoci  nCalledLoci  nRefLoci  nVariantLoci  variantRate ...
+ * CountVariants  dbsnp     CpG      eval     none            all      65900028        135770       0         135770        0.00206024  ...
+ * CountVariants  dbsnp     CpG      eval     none            known    65900028        47068        0         47068         0.00071423  ...
+ * CountVariants  dbsnp     CpG      eval     none            novel    65900028        88702        0         88702         0.00134601  ...
+ * CountVariants  dbsnp     all      eval     none            all      65900028        330818       0         330818        0.00502000  ...
+ * CountVariants  dbsnp     all      eval     none            known    65900028        120685       0         120685        0.00183133  ...
+ * CountVariants  dbsnp     all      eval     none            novel    65900028        210133       0         210133        0.00318866  ...
+ * CountVariants  dbsnp     non_CpG  eval     none            all      65900028        195048       0         195048        0.00295976  ...
+ * CountVariants  dbsnp     non_CpG  eval     none            known    65900028        73617        0         73617         0.00111710  ...
+ * CountVariants  dbsnp     non_CpG  eval     none            novel    65900028        121431       0         121431        0.00184265  ...
+ * ...
+ * </pre>
  * </p>
  *
  * <h2>Examples</h2>

From 6d02a34bfba1537f294f5a077b24702e539b87a5 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 15 Sep 2011 16:17:54 -0400
Subject: [PATCH 07/17] Updating docs to include output

---
 .../variantutils/VariantValidationAssessor.java       | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
index ea8549474..8eaf976d0 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java
@@ -57,7 +57,16 @@ import java.util.*;
  *
  * <h2>Output</h2>
  * <p>
- * An annotated VCF.
+ * An annotated VCF.  Additionally, a table like the following will be output:
+ * <pre>
+ *     Total number of samples assayed:                  185
+ *     Total number of records processed:                152
+ *     Number of Hardy-Weinberg violations:              34 (22%)
+ *     Number of no-call violations:                     12 (7%)
+ *     Number of homozygous variant violations:          0 (0%)
+ *     Number of records passing all filters:            106 (69%)
+ *     Number of passing records that are polymorphic:   98 (92%)
+ * </pre>
  * </p>
  *
  * <h2>Examples</h2>

From fd1831b4a520e68b15b6b5b958aa2d04ade4e287 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 15 Sep 2011 16:25:03 -0400
Subject: [PATCH 08/17] Updating docs to include more details

---
 .../gatk/walkers/fasta/FastaAlternateReferenceWalker.java   | 6 ++++--
 .../sting/gatk/walkers/fasta/FastaReferenceWalker.java      | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
index fd912334f..4e2c17bf6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
@@ -43,8 +43,10 @@ import java.util.List;
  * Generates an alternative reference sequence over the specified interval.
  *
  * <p>
- * Given variant ROD tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
- * Additionally, allows for a "snpmask" ROD to set overlapping bases to 'N'.
+ * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
+ * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
+ * Note that if there are multiple variants at a site, it takes the first one seen.
+ * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
  *
  * <h2>Input</h2>
  * <p>
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
index 5f3b37cc8..7ae5c5c75 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
@@ -42,6 +42,9 @@ import java.io.PrintStream;
  *
  * <p>
  * The output format can be partially controlled using the provided command-line arguments.
+ * Specify intervals with the usual -L argument to output only the reference bases within your intervals.
+ * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
+ * separate fasta sequence (named numerically in order).
  *
  * <h2>Input</h2>
  * <p>

From 2f58fdb369a3cd4857281dd210427fac6352ca88 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Thu, 15 Sep 2011 16:26:11 -0400
Subject: [PATCH 09/17] Adding expected output doc to CountCovariates

---
 .../recalibration/CountCovariatesWalker.java  | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
index 98c8950e3..1bdb70bdd 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@@ -76,6 +76,42 @@ import java.util.Map;
  * <h2>Output</h2>
  * <p>
  * A recalibration table file in CSV format that is used by the TableRecalibration walker.
+ * It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.  
+ *
+ * The first 20 lines of such a file is shown below.  
+ * * The file begins with a series of comment lines describing:
+ * ** The number of counted loci
+ * ** The number of counted bases
+ * ** The number of skipped loci and the fraction skipped, due to presence in dbSNP or bad reference bases
+ * 
+ * * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.  
+ *
+ * * After the header, data records occur one per line until the end of the file. The first several items on a line are the values of the individual covariates and will change
+ * depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of 
+ * reference mismatches, and the raw empirical quality score calculated by phred-scaling the mismatch rate.
+ * 
+ * <pre>
+ * # Counted Sites    19451059
+ * # Counted Bases    56582018
+ * # Skipped Sites    82666
+ * # Fraction Skipped 1 / 235 bp
+ * ReadGroup,QualityScore,Cycle,Dinuc,nObservations,nMismatches,Qempirical
+ * SRR006446,11,65,CA,9,1,10
+ * SRR006446,11,48,TA,10,0,40
+ * SRR006446,11,67,AA,27,0,40
+ * SRR006446,11,61,GA,11,1,10
+ * SRR006446,12,34,CA,47,1,17
+ * SRR006446,12,30,GA,52,1,17
+ * SRR006446,12,36,AA,352,1,25
+ * SRR006446,12,17,TA,182,11,12
+ * SRR006446,11,48,TG,2,0,40
+ * SRR006446,11,67,AG,1,0,40
+ * SRR006446,12,34,CG,9,0,40
+ * SRR006446,12,30,GG,43,0,40
+ * ERR001876,4,31,AG,1,0,40
+ * ERR001876,4,31,AT,2,2,1
+ * ERR001876,4,31,CA,1,0,40
+ * </pre>
  * </p>
  *
  * <h2>Examples</h2>

From 9dc6354130b23683c31a7b2c1ef8c2ed94da1946 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 15 Sep 2011 16:55:24 -0400
Subject: [PATCH 10/17] Oops didn't mean to touch this test before

---
 .../gatk/walkers/varianteval/VariantEvalIntegrationTest.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
index d8f7ad3b6..99622cbf6 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@@ -42,7 +42,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                                         "-T VariantEval",
                                         "-R " + b37KGReference,
                                         "--dbsnp " + b37dbSNP132,
-                                        "--eval " + variantEvalTestDataRoot + "CEU.trio.callsForVE.vcf",
+                                        "--eval " + variantEvalTestDataRoot + "/CEU.trio.callsForVE.vcf",
                                         "-noEV",
                                         "-EV TiTvVariantEvaluator",
                                         "-ST Sample",

From d78e00e5b2cd5e8a1b1aa75209100b039e521442 Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Thu, 15 Sep 2011 16:09:07 -0400
Subject: [PATCH 11/17] Renaming VariantAnnotator SnpEff keys

This is to head off potential confusion with the output from the SnpEff tool itself,
which also uses a key named EFF.
---
 .../sting/gatk/walkers/annotator/SnpEff.java  | 90 ++++++++++---------
 .../stratifications/FunctionalClass.java      |  4 +-
 .../VariantAnnotatorIntegrationTest.java      |  2 +-
 .../VariantEvalIntegrationTest.java           |  2 +-
 4 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
index bb3685fb5..4ead77506 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
@@ -68,23 +68,31 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
     // Key names for the INFO field annotations we will add to each record, along
     // with parsing-related information:
     public enum InfoFieldKey {
-        EFF                   (-1),
-        EFF_IMPACT            (0),
-        EFF_CODON_CHANGE      (1),
-        EFF_AMINO_ACID_CHANGE (2),
-        EFF_GENE_NAME         (3),
-        EFF_GENE_BIOTYPE      (4),
-        EFF_TRANSCRIPT_ID     (6),
-        EFF_EXON_ID           (7);
+        EFFECT_KEY            ("SNPEFF_EFFECT",           -1),
+        IMPACT_KEY            ("SNPEFF_IMPACT",            0),
+        CODON_CHANGE_KEY      ("SNPEFF_CODON_CHANGE",      1),
+        AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
+        GENE_NAME_KEY         ("SNPEFF_GENE_NAME",         3),
+        GENE_BIOTYPE_KEY      ("SNPEFF_GENE_BIOTYPE",      4),
+        TRANSCRIPT_ID_KEY     ("SNPEFF_TRANSCRIPT_ID",     6),
+        EXON_ID_KEY           ("SNPEFF_EXON_ID",           7);
+
+        // Actual text of the key
+        private final String keyName;
 
         // Index within the effect metadata subfields from the SnpEff EFF annotation
         // where each key's associated value can be found during parsing.
         private final int fieldIndex;
 
-        InfoFieldKey ( int fieldIndex ) {
+        InfoFieldKey ( String keyName, int fieldIndex ) {
+            this.keyName = keyName;
             this.fieldIndex = fieldIndex;
         }
 
+        public String getKeyName() {
+            return keyName;
+        }
+
         public int getFieldIndex() {
             return fieldIndex;
         }
@@ -292,27 +300,27 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
     }
 
     public List<String> getKeyNames() {
-        return Arrays.asList( InfoFieldKey.EFF.toString(),
-                              InfoFieldKey.EFF_IMPACT.toString(),
-                              InfoFieldKey.EFF_CODON_CHANGE.toString(),
-                              InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(),
-                              InfoFieldKey.EFF_GENE_NAME.toString(),
-                              InfoFieldKey.EFF_GENE_BIOTYPE.toString(),
-                              InfoFieldKey.EFF_TRANSCRIPT_ID.toString(),
-                              InfoFieldKey.EFF_EXON_ID.toString()
+        return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
+                              InfoFieldKey.IMPACT_KEY.getKeyName(),
+                              InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
+                              InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
+                              InfoFieldKey.GENE_NAME_KEY.getKeyName(),
+                              InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
+                              InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
+                              InfoFieldKey.EXON_ID_KEY.getKeyName()
                             );
     }
 
     public List<VCFInfoHeaderLine> getDescriptions() {
         return Arrays.asList(
-            new VCFInfoHeaderLine(InfoFieldKey.EFF.toString(),                   1, VCFHeaderLineType.String,  "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
-            new VCFInfoHeaderLine(InfoFieldKey.EFF_IMPACT.toString(),            1, VCFHeaderLineType.String,  "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
-            new VCFInfoHeaderLine(InfoFieldKey.EFF_CODON_CHANGE.toString(),      1, VCFHeaderLineType.String,  "Old/New codon for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(), 1, VCFHeaderLineType.String,  "Old/New amino acid for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(InfoFieldKey.EFF_GENE_NAME.toString(),         1, VCFHeaderLineType.String,  "Gene name for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(InfoFieldKey.EFF_GENE_BIOTYPE.toString(),      1, VCFHeaderLineType.String,  "Gene biotype for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(InfoFieldKey.EFF_TRANSCRIPT_ID.toString(),     1, VCFHeaderLineType.String,  "Transcript ID for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(InfoFieldKey.EFF_EXON_ID.toString(),           1, VCFHeaderLineType.String,  "Exon ID for the highest-impact effect resulting from the current variant")
+            new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(),            1, VCFHeaderLineType.String,  "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
+            new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(),            1, VCFHeaderLineType.String,  "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
+            new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),      1, VCFHeaderLineType.String,  "Old/New codon for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String,  "Old/New amino acid for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(),         1, VCFHeaderLineType.String,  "Gene name for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),      1, VCFHeaderLineType.String,  "Gene biotype for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),     1, VCFHeaderLineType.String,  "Transcript ID for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(),           1, VCFHeaderLineType.String,  "Exon ID for the highest-impact effect resulting from the current variant")
         );
     }
 
@@ -375,16 +383,16 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
             }
 
             try {
-                impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.EFF_IMPACT.getFieldIndex()]);
+                impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
             }
             catch ( IllegalArgumentException e ) {
-                parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.EFF_IMPACT.getFieldIndex()]));
+                parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
             }
 
-            codonChange = effectMetadata[InfoFieldKey.EFF_CODON_CHANGE.getFieldIndex()];
-            aminoAcidChange = effectMetadata[InfoFieldKey.EFF_AMINO_ACID_CHANGE.getFieldIndex()];
-            geneName = effectMetadata[InfoFieldKey.EFF_GENE_NAME.getFieldIndex()];
-            geneBiotype = effectMetadata[InfoFieldKey.EFF_GENE_BIOTYPE.getFieldIndex()];
+            codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
+            aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
+            geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
+            geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];
 
             if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
                 try {
@@ -398,8 +406,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
                 coding = EffectCoding.UNKNOWN;
             }
 
-            transcriptID = effectMetadata[InfoFieldKey.EFF_TRANSCRIPT_ID.getFieldIndex()];
-            exonID = effectMetadata[InfoFieldKey.EFF_EXON_ID.getFieldIndex()];
+            transcriptID = effectMetadata[InfoFieldKey.TRANSCRIPT_ID_KEY.getFieldIndex()];
+            exonID = effectMetadata[InfoFieldKey.EXON_ID_KEY.getFieldIndex()];
         }
 
         private void parseError ( String message ) {
@@ -443,14 +451,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
         public Map<String, Object> getAnnotations() {
             Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(InfoFieldKey.values().length));
 
-            addAnnotation(annotations, InfoFieldKey.EFF.toString(), effect.toString());
-            addAnnotation(annotations, InfoFieldKey.EFF_IMPACT.toString(), impact.toString());
-            addAnnotation(annotations, InfoFieldKey.EFF_CODON_CHANGE.toString(), codonChange);
-            addAnnotation(annotations, InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(), aminoAcidChange);
-            addAnnotation(annotations, InfoFieldKey.EFF_GENE_NAME.toString(), geneName);
-            addAnnotation(annotations, InfoFieldKey.EFF_GENE_BIOTYPE.toString(), geneBiotype);
-            addAnnotation(annotations, InfoFieldKey.EFF_TRANSCRIPT_ID.toString(), transcriptID);
-            addAnnotation(annotations, InfoFieldKey.EFF_EXON_ID.toString(), exonID);
+            addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
+            addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
+            addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
+            addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
+            addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
+            addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
+            addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
+            addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
 
             return annotations;
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java
index a32857ffc..88ffcaaeb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java
@@ -62,8 +62,8 @@ public class FunctionalClass extends VariantStratifier {
                     annotationId++;
                 } while (eval.hasAttribute(key));
 
-            } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFF.name() ) ) {
-                SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFF.name()).toString());
+            } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName() ) ) {
+                SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName()).toString());
                 if ( snpEffType == SnpEff.EffectType.STOP_GAINED )
                     type = FunctionalType.nonsense;
                 else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING )
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
index f902ce276..08baae7a7 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
             validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile  " + validationDataLocation +
             "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
             1,
-            Arrays.asList("a1c3ba9efc28ee0606339604095076ea")
+            Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9")
         );
         executeTest("Testing SnpEff annotations", spec);
     }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
index 99622cbf6..b90e6d0ff 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@@ -32,7 +32,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                                 1,
                                 Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12")
                               );
-        executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec);
+        executeTest("testFunctionClassWithSnpeff", spec);
     }
 
     @Test

From e6e9b08c9a47640f9be32b47f495174118636a5c Mon Sep 17 00:00:00 2001
From: Menachem Fromer <fromer@broadinstitute.org>
Date: Thu, 15 Sep 2011 18:51:09 -0400
Subject: [PATCH 13/17] Must provide alleles VCF to UGCallVariants

---
 .../sting/gatk/walkers/genotyper/UGCallVariants.java             | 1 -
 1 file changed, 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
index 500b11360..d88e55687 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
@@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.SampleUtils;

From 9fdf1f8eb663858cacafd8fb339d098cdce4b96d Mon Sep 17 00:00:00 2001
From: Christopher Hartl <chartl@broadinstitute.org>
Date: Thu, 15 Sep 2011 21:05:22 -0400
Subject: [PATCH 14/17] Fix some doc formatting for Depth of Coverage

---
 .../gatk/walkers/coverage/DepthOfCoverageWalker.java     | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
index 3a18fe610..86f97a36c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
@@ -69,14 +69,23 @@ import java.util.*;
  * <h2>Output</h2>
  * <p>
  * Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
+ * </p><p>
  *  - no suffix: per locus coverage
+ * </p><p>
  *  - _summary: total, mean, median, quartiles, and threshold proportions, aggregated over all bases
+ * </p><p>
  *  - _statistics: coverage histograms (# locus with X coverage), aggregated over all bases
+ * </p><p>
  *  - _interval_summary: total, mean, median, quartiles, and threshold proportions, aggregated per interval
+ * </p><p>
  *  - _interval_statistics: 2x2 table of # of intervals covered to >= X depth in >=Y samples
+ * </p><p>
  *  - _gene_summary: total, mean, median, quartiles, and threshold proportions, aggregated per gene
+ * </p><p>
  *  - _gene_statistics: 2x2 table of # of genes covered to >= X depth in >= Y samples
+ * </p><p>
  *  - _cumulative_coverage_counts: coverage histograms (# locus with >= X coverage), aggregated over all bases
+ * </p><p>
  *  - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases
  * </p>
  *

From 939babc820cc5174a1d97a8b6bdb992ca6cedc09 Mon Sep 17 00:00:00 2001
From: Christopher Hartl <chartl@broadinstitute.org>
Date: Thu, 15 Sep 2011 21:05:51 -0400
Subject: [PATCH 15/17] Updating formating for ValidationAmplicons GATK docs

---
 .../sting/gatk/walkers/validation/ValidationAmplicons.java  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
index 01e8cd321..48cba6a1a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
@@ -61,7 +61,7 @@ import java.util.List;
  * CACGTTCGGcttgtgcagagcctcaaggtcatccagaggtgatAGTTTAGGGCCCTCTCAAGTCTTTCCNGTGCGCATGG[GT/AC*]CAGCCCTGGGCACCTGTNNNNNNNNNNNNNTGCTCATGGCCTTCTAGATTCCCAGGAAATGTCAGAGCTTTTCAAAGCCC
  *</pre>
  * are amplicon sequences resulting from running the tool. The flags (preceding the sequence itself) can be:
- *
+ *<pre>
  * Valid                     // amplicon is valid
  * SITE_IS_FILTERED=1        // validation site is not marked 'PASS' or '.' in its filter field ("you are trying to validate a filtered variant")
  * VARIANT_TOO_NEAR_PROBE=1  // there is a variant too near to the variant to be validated, potentially shifting the mass-spec peak
@@ -72,10 +72,10 @@ import java.util.List;
  * END_TOO_CLOSE,            // variant is too close to the end of the amplicon region to give sequenom a good chance to find a suitable primer
  * NO_VARIANTS_FOUND,        // no variants found within the amplicon region
  * INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
- * </p>
+ * </pre></p>
  *
  * <h2>Examples</h2>
- * <pre></pre>
+ * <pre>
  *    java
  *      -jar GenomeAnalysisTK.jar
  *      -T ValidationAmplicons

From 33967a4e0c09e85cc4dc1d0eb83fe6feef80c46d Mon Sep 17 00:00:00 2001
From: Khalid Shakir <kshakir@broadinstitute.org>
Date: Fri, 16 Sep 2011 12:46:07 -0400
Subject: [PATCH 17/17] Fixed issue reported by chartl where cloned functions
 lost tags on @Inputs. Updated ExampleUnifiedGenotyper.scala with new syntax.

---
 .../examples/ExampleUnifiedGenotyper.scala    |  6 +--
 .../sting/queue/extensions/gatk/RodBind.scala |  2 +-
 .../queue/extensions/gatk/TaggedFile.scala    |  2 +-
 .../sting/queue/function/QFunction.scala      | 16 +-------
 .../{function => util}/FileExtension.scala    |  2 +-
 .../sting/queue/util/IOUtils.scala            | 40 ++++++++++++++-----
 6 files changed, 36 insertions(+), 32 deletions(-)
 rename public/scala/src/org/broadinstitute/sting/queue/{function => util}/FileExtension.scala (89%)

diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala
index 1d473b210..9bddfd97c 100644
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala
@@ -56,15 +56,15 @@ class ExampleUnifiedGenotyper extends QScript {
     genotyper.input_file :+= qscript.bamFile
     genotyper.out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf")
 
-    evalUnfiltered.rodBind :+= RodBind("eval", "VCF", genotyper.out)
+    evalUnfiltered.eval :+= genotyper.out
     evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval")
 
-    variantFilter.rodBind :+= RodBind("variant", "VCF", genotyper.out)
+    variantFilter.variant = genotyper.out
     variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf")
     variantFilter.filterName = filterNames
     variantFilter.filterExpression = filterExpressions.map("\"" + _ + "\"")
 
-    evalFiltered.rodBind :+= RodBind("eval", "VCF", variantFilter.out)
+    evalFiltered.eval :+= variantFilter.out
     evalFiltered.out = swapExt(variantFilter.out, "vcf", "eval")
 
     add(genotyper, evalUnfiltered)
diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala
index 42f63e225..b4c5d91d3 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala
@@ -1,7 +1,7 @@
 package org.broadinstitute.sting.queue.extensions.gatk
 
 import java.io.File
-import org.broadinstitute.sting.queue.function.FileExtension
+import org.broadinstitute.sting.queue.util.FileExtension
 import java.lang.String
 
 /**
diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/TaggedFile.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/TaggedFile.scala
index ed8158b49..b19f9e430 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/TaggedFile.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/TaggedFile.scala
@@ -1,7 +1,7 @@
 package org.broadinstitute.sting.queue.extensions.gatk
 
 import java.io.File
-import org.broadinstitute.sting.queue.function.FileExtension
+import org.broadinstitute.sting.queue.util.FileExtension
 
 /**
  * Used to provide tagged -I input_file arguments to the GATK.
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala
index c905581fa..500f7b200 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala
@@ -387,25 +387,11 @@ trait QFunction extends Logging with QJobReport {
    */
   protected def canon(value: Any) = {
     value match {
-      case fileExtension: FileExtension =>
-        val newFile = absolute(fileExtension);
-        val newFileExtension = fileExtension.withPath(newFile.getPath)
-        newFileExtension
-      case file: File =>
-        if (file.getClass != classOf[File])
-          throw new QException("Extensions of file must also extend with FileExtension so that the path can be modified.");
-        absolute(file)
+      case file: File => IOUtils.absolute(commandDirectory, file)
       case x => x
     }
   }
 
-  /**
-   * Returns the absolute path to the file relative to the run directory and the job command directory.
-   * @param file File to root relative to the command directory if it is not already absolute.
-   * @return The absolute path to file.
-   */
-  private def absolute(file: File) = IOUtils.absolute(commandDirectory, file)
-
   /**
    * Scala sugar type for checking annotation required and exclusiveOf.
    */
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/FileExtension.scala b/public/scala/src/org/broadinstitute/sting/queue/util/FileExtension.scala
similarity index 89%
rename from public/scala/src/org/broadinstitute/sting/queue/function/FileExtension.scala
rename to public/scala/src/org/broadinstitute/sting/queue/util/FileExtension.scala
index e2394a5bf..9b6e52c8e 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/function/FileExtension.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/FileExtension.scala
@@ -1,4 +1,4 @@
-package org.broadinstitute.sting.queue.function
+package org.broadinstitute.sting.queue.util
 
 import java.io.File
 
diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala
index 79ffa8cb9..b17ccc0d5 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala
@@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.util
 import org.apache.commons.io.FileUtils
 import java.io.{FileReader, File}
 import org.broadinstitute.sting.utils.exceptions.UserException
+import org.broadinstitute.sting.queue.QException
 
 /**
  * A collection of utilities for modifying java.io.
@@ -12,7 +13,7 @@ object IOUtils extends Logging {
    * Checks if the temp directory has been setup and throws an exception if they user hasn't set it correctly.
    * @param tempDir Temporary directory.
    */
-  def checkTempDir(tempDir: File) = {
+  def checkTempDir(tempDir: File) {
     val tempDirPath = tempDir.getAbsolutePath
     // Keeps the user from leaving the temp directory as the default, and on Macs from having pluses
     // in the path which can cause problems with the Google Reflections library.
@@ -20,7 +21,7 @@ object IOUtils extends Logging {
     if (tempDirPath.startsWith("/var/folders/") || (tempDirPath == "/tmp") || (tempDirPath == "/tmp/"))
       throw new UserException.BadTmpDir("java.io.tmpdir must be explicitly set")
     if (!tempDir.exists && !tempDir.mkdirs)
-      throw new UserException.BadTmpDir("Could not create directory: " + tempDir.getAbsolutePath())
+      throw new UserException.BadTmpDir("Could not create directory: " + tempDir.getAbsolutePath)
   }
 
   /**
@@ -35,9 +36,9 @@ object IOUtils extends Logging {
        throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent)
     val temp = File.createTempFile(prefix + "-", suffix, tempDirParent)
     if (!temp.delete)
-      throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath())
+      throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath)
     if (!temp.mkdir)
-      throw new UserException.BadTmpDir("Could not create sub directory: " + temp.getAbsolutePath())
+      throw new UserException.BadTmpDir("Could not create sub directory: " + temp.getAbsolutePath)
     absolute(temp)
   }
 
@@ -46,7 +47,7 @@ object IOUtils extends Logging {
    * @param file File to write to.
    * @param content Content to write.
    */
-  def writeContents(file: File, content: String) =  FileUtils.writeStringToFile(file, content)
+  def writeContents(file: File, content: String) { FileUtils.writeStringToFile(file, content) }
 
   /**
    * Reads content of a file into a string.
@@ -146,10 +147,12 @@ object IOUtils extends Logging {
    * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path.
    */
   def absolute(parent: File, file: File): File = {
-    if (file.isAbsolute)
-      absolute(file)
-    else
-      absolute(new File(parent, file.getPath))
+    val newPath =
+      if (file.isAbsolute)
+        absolutePath(file)
+      else
+        absolutePath(new File(parent, file.getPath))
+    replacePath(file, newPath)
   }
 
   /**
@@ -159,12 +162,16 @@ object IOUtils extends Logging {
    * @return the absolute path to the file.
    */
   def absolute(file: File) = {
+    replacePath(file, absolutePath(file))
+  }
+
+  private def absolutePath(file: File) = {
     var fileAbs = file.getAbsoluteFile
     var names = List.empty[String]
     while (fileAbs != null) {
       val name = fileAbs.getName
       fileAbs = fileAbs.getParentFile
-      
+
       if (name == ".") {
         /* skip */
 
@@ -190,7 +197,18 @@ object IOUtils extends Logging {
       }
     }
 
-    new File(names.mkString("/", "/", ""))
+    names.mkString("/", "/", "")
+  }
+
+  private def replacePath(file: File, path: String) = {
+    file match {
+      case fileExtension: FileExtension =>
+        fileExtension.withPath(path)
+      case file: File =>
+        if (file.getClass != classOf[File])
+          throw new QException("Sub classes of java.io.File must also implement FileExtension so that the path can be modified.")
+        new File(path)
+    }
   }
 
   /**