Merge pull request #795 from broadinstitute/ldg_VQSRcmdlineOutput
VQSR VCF header command line now contains annotations and tranche levels
This commit is contained in:
commit
ef32c44688
|
|
@ -165,12 +165,12 @@ public class TrancheManager {
|
|||
}
|
||||
}
|
||||
|
||||
public static List<Tranche> findTranches( final List<VariantDatum> data, final double[] tranches, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model ) {
|
||||
public static List<Tranche> findTranches( final List<VariantDatum> data, final List<Double> tranches, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model ) {
|
||||
return findTranches( data, tranches, metric, model, null );
|
||||
}
|
||||
|
||||
public static List<Tranche> findTranches( final List<VariantDatum> data, final double[] trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) {
|
||||
logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.length, data.size()));
|
||||
public static List<Tranche> findTranches( final List<VariantDatum> data, final List<Double> trancheThresholds, final SelectionMetric metric, final VariantRecalibratorArgumentCollection.Mode model, final File debugFile ) {
|
||||
logger.info(String.format("Finding %d tranches for %d variants", trancheThresholds.size(), data.size()));
|
||||
|
||||
Collections.sort( data, new VariantDatum.VariantDatumLODComparator() );
|
||||
metric.calculateRunningMetric(data);
|
||||
|
|
|
|||
|
|
@ -216,7 +216,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
* See the input VCF file's INFO field for a list of all available annotations.
|
||||
*/
|
||||
@Argument(fullName="use_annotation", shortName="an", doc="The names of the annotations which should used for calculations", required=true)
|
||||
private String[] USE_ANNOTATIONS = null;
|
||||
private List<String> USE_ANNOTATIONS = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* Add truth sensitivity slices through the call set at the given values. The default values are 100.0, 99.9, 99.0, and 90.0
|
||||
|
|
@ -224,12 +224,12 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
* sites in the truth set), a 99.9% truth sensitivity tranche, along with progressively smaller tranches at 99% and 90%.
|
||||
*/
|
||||
@Argument(fullName="TStranche", shortName="tranche", doc="The levels of truth sensitivity at which to slice the data. (in percent, that is 1.0 for 1 percent)", required=false)
|
||||
private double[] TS_TRANCHES = new double[] {100.0, 99.9, 99.0, 90.0};
|
||||
private List<Double> TS_TRANCHES = new ArrayList<Double>(Arrays.asList(100.0, 99.9, 99.0, 90.0));
|
||||
/**
|
||||
* For this to work properly, the -ignoreFilter argument should also be applied to the ApplyRecalibration command.
|
||||
*/
|
||||
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified, the variant recalibrator will also use variants marked as filtered by the specified filter name in the input VCF file", required=false)
|
||||
private String[] IGNORE_INPUT_FILTERS = null;
|
||||
private List<String> IGNORE_INPUT_FILTERS = new ArrayList<String>();
|
||||
@Argument(fullName="ignore_all_filters", shortName="ignoreAllFilters", doc="If specified, the variant recalibrator will ignore all input filters. Useful to rerun the VQSR from a filtered output file.", required=false)
|
||||
private boolean IGNORE_ALL_FILTERS = false;
|
||||
@Output(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false, defaultToStdout=false)
|
||||
|
|
@ -263,7 +263,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
|
||||
@Override
|
||||
public void initialize() {
|
||||
dataManager = new VariantDataManager( new ArrayList<>(Arrays.asList(USE_ANNOTATIONS)), VRAC );
|
||||
dataManager = new VariantDataManager( new ArrayList<>(USE_ANNOTATIONS), VRAC );
|
||||
|
||||
if (RSCRIPT_FILE != null && !RScriptExecutor.RSCRIPT_EXISTS)
|
||||
Utils.warnUser(logger, String.format(
|
||||
|
|
@ -271,7 +271,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
RSCRIPT_FILE));
|
||||
|
||||
if( IGNORE_INPUT_FILTERS != null ) {
|
||||
ignoreInputFilterSet.addAll( Arrays.asList(IGNORE_INPUT_FILTERS) );
|
||||
ignoreInputFilterSet.addAll( IGNORE_INPUT_FILTERS );
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
@ -428,7 +428,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
dataManager.writeOutRecalibrationTable( recalWriter );
|
||||
if( RSCRIPT_FILE != null ) {
|
||||
logger.info( "Writing out visualization Rscript file...");
|
||||
createVisualizationScript( dataManager.getRandomDataForPlotting( 1000, positiveTrainingData, negativeTrainingData, dataManager.getEvaluationData() ), goodModel, badModel, 0.0, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.length]) );
|
||||
createVisualizationScript( dataManager.getRandomDataForPlotting( 1000, positiveTrainingData, negativeTrainingData, dataManager.getEvaluationData() ), goodModel, badModel, 0.0, dataManager.getAnnotationKeys().toArray(new String[USE_ANNOTATIONS.size()]) );
|
||||
}
|
||||
|
||||
if(VRAC.MODE == VariantRecalibratorArgumentCollection.Mode.INDEL) {
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
VariantDatum[] variantData1 = new VariantDatum[N_VARIANTS];
|
||||
|
||||
private final File QUAL_DATA = new File(privateTestDir + "tranches.raw.dat");
|
||||
private final double[] TRUTH_SENSITIVITY_CUTS = new double[]{99.9, 99.0, 97.0, 95.0};
|
||||
private final List<Double> TRUTH_SENSITIVITY_CUTS = new ArrayList<Double>(Arrays.asList(99.9, 99.0, 97.0, 95.0));
|
||||
private final File EXPECTED_TRANCHES_NEW = new File(privateTestDir + "tranches.6.txt");
|
||||
private final File EXPECTED_TRANCHES_OLD = new File(privateTestDir + "tranches.4.txt");
|
||||
|
||||
|
|
@ -136,7 +136,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private static List<Tranche> findMyTranches(ArrayList<VariantDatum> vd, double[] tranches) {
|
||||
private static List<Tranche> findMyTranches(ArrayList<VariantDatum> vd, List<Double> tranches) {
|
||||
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY );
|
||||
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
||||
return TrancheManager.findTranches(vd, tranches, metric, VariantRecalibratorArgumentCollection.Mode.SNP);
|
||||
|
|
@ -153,6 +153,6 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
@Test(expectedExceptions = {UserException.class})
|
||||
public final void testBadFDR() {
|
||||
ArrayList<VariantDatum> vd = readData();
|
||||
List<Tranche> tranches = findMyTranches(vd, new double[]{-1});
|
||||
List<Tranche> tranches = findMyTranches(vd, new ArrayList<Double>(Arrays.asList(-1.0)));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue