Merged bug fix from Stable into Unstable

2012-08-01 14:35:30 -04:00 · 2012-08-01 14:35:30 -04:00 · 2be29ebd22
parent 52b93cab62 4093909a56
commit 2be29ebd22
3 changed files with 7 additions and 22 deletions
--- a/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
+++ b/public/R/scripts/org/broadinstitute/sting/utils/recalibration/BQSR.R
@ -1,4 +1,5 @@
 library("ggplot2")
+library("tools") #For compactPDF in R 2.13+

 args <- commandArgs(TRUE)
 data <- read.csv(args[1])
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
@ -58,14 +58,11 @@ import java.util.*;
 * to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
 * slightly lower quality level.
 *
- * <p>
- * See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
- *
 * <h2>Input</h2>
 * <p>
 * The input raw variants to be recalibrated.
 * <p>
- * The recalibration table file in CSV format that was generated by the VariantRecalibrator walker.
+ * The recalibration table file in VCF format that was generated by the VariantRecalibrator walker.
 * <p>
 * The tranches file that was generated by the VariantRecalibrator walker.
 *
@ -82,6 +79,7 @@ import java.util.*;
 *   --ts_filter_level 99.0 \
 *   -tranchesFile path/to/output.tranches \
 *   -recalFile path/to/output.recal \
+ *   -mode SNP \
 *   -o path/to/output.recalibrated.filtered.vcf
 * </pre>
 *
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.R.RScriptExecutor;
 import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
 import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
@ -48,7 +47,6 @@ import org.broadinstitute.sting.utils.io.Resource;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
-import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;

 import java.io.File;
 import java.io.FileNotFoundException;
@ -73,15 +71,9 @@ import java.util.*;
 * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
 *
 * <p>
- * NOTE: Please see our <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3">best practices wiki page</a> for our recommendations on which annotations to use for specific project designs.
- *
- * <p>
 * NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
 * See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
 *
- * <p>
- * See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
- *
 * <h2>Input</h2>
 * <p>
 * The input raw variants to be recalibrated.
@ -90,7 +82,7 @@ import java.util.*;
 *
 * <h2>Output</h2>
 * <p>
- * A recalibration table file in CSV format that is used by the ApplyRecalibration walker.
+ * A recalibration table file in VCF format that is used by the ApplyRecalibration walker.
 * <p>
 * A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
 *
@ -102,8 +94,9 @@ import java.util.*;
 *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
 *   -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
 *   -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
- *   -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
- *   -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
+ *   -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \
+ *   -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \
+ *   -mode SNP \
 *   -recalFile path/to/output.recal \
 *   -tranchesFile path/to/output.tranches \
 *   -rscriptFile path/to/output.plots.R
@ -187,9 +180,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
    @Advanced
    @Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
    protected Boolean TRUST_ALL_POLYMORPHIC = false;
-    //@Hidden
-    //@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
-    //protected Boolean PERFORM_PROJECT_CONSENSUS = false;

    /////////////////////////////
    // Private Member Variables
@ -268,10 +258,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
                    // Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
                    dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );
                    double priorFactor = QualityUtils.qualToProb( datum.prior );
-                    //if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
-                    //    final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
-                    //    priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
-                    //}
                    datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );

                    mapList.add( datum );