Merged bug fix from Stable into Unstable

This commit is contained in:
Ryan Poplin 2012-08-01 14:35:30 -04:00
commit 2be29ebd22
3 changed files with 7 additions and 22 deletions

View File

@ -1,4 +1,5 @@
library("ggplot2")
library("tools") #For compactPDF in R 2.13+
args <- commandArgs(TRUE)
data <- read.csv(args[1])

View File

@ -58,14 +58,11 @@ import java.util.*;
* to the desired level but also has the information necessary to pull out more variants for a higher sensitivity but a
* slightly lower quality level.
*
* <p>
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
*
* <h2>Input</h2>
* <p>
* The input raw variants to be recalibrated.
* <p>
* The recalibration table file in CSV format that was generated by the VariantRecalibrator walker.
* The recalibration table file in VCF format that was generated by the VariantRecalibrator walker.
* <p>
* The tranches file that was generated by the VariantRecalibrator walker.
*
@ -82,6 +79,7 @@ import java.util.*;
* --ts_filter_level 99.0 \
* -tranchesFile path/to/output.tranches \
* -recalFile path/to/output.recal \
* -mode SNP \
* -o path/to/output.recalibrated.filtered.vcf
* </pre>
*

View File

@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
@ -48,7 +47,6 @@ import org.broadinstitute.sting.utils.io.Resource;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
import java.io.File;
import java.io.FileNotFoundException;
@ -73,15 +71,9 @@ import java.util.*;
* the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
*
* <p>
* NOTE: Please see our <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3">best practices wiki page</a> for our recommendations on which annotations to use for specific project designs.
*
* <p>
* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
* See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
*
* <p>
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
*
* <h2>Input</h2>
* <p>
* The input raw variants to be recalibrated.
@ -90,7 +82,7 @@ import java.util.*;
*
* <h2>Output</h2>
* <p>
* A recalibration table file in CSV format that is used by the ApplyRecalibration walker.
* A recalibration table file in VCF format that is used by the ApplyRecalibration walker.
* <p>
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
*
@ -102,8 +94,9 @@ import java.util.*;
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
* -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
* -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
* -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
* -resource:dbsnp,known=true,training=false,truth=false,prior=6.0 dbsnp_135.b37.vcf \
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ -an InbreedingCoeff \
* -mode SNP \
* -recalFile path/to/output.recal \
* -tranchesFile path/to/output.tranches \
* -rscriptFile path/to/output.plots.R
@ -187,9 +180,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
@Advanced
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
protected Boolean TRUST_ALL_POLYMORPHIC = false;
//@Hidden
//@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
//protected Boolean PERFORM_PROJECT_CONSENSUS = false;
/////////////////////////////
// Private Member Variables
@ -268,10 +258,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );
double priorFactor = QualityUtils.qualToProb( datum.prior );
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
//}
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
mapList.add( datum );