Merge pull request #1392 from broadinstitute/gvda_vqsr_retries_mdp

Ability to retry building VQSR model (contributed by mdp)
This commit is contained in:
Geraldine Van der Auwera 2016-05-31 21:35:27 -04:00
commit 801aa49d25
1 changed files with 72 additions and 44 deletions

View File

@ -266,9 +266,11 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
private File RSCRIPT_FILE = null; private File RSCRIPT_FILE = null;
/** /**
* This GATKReport gives information to describe the VQSR model fit. Normalized means for the positive model are concatenated as one table and negative model normalized means as another table. * This GATKReport gives information to describe the VQSR model fit. Normalized means for the positive model are
* Covariances are also concatenated for postive and negative models, respectively. Tables of annotation means and standard deviations are provided to help describe the normalization. * concatenated as one table and negative model normalized means as another table. Covariances are also concatenated
* The model fit report can be read in with our R gsalib package. Individual model Gaussians can be subset by the value in the "Gaussian" column if desired. * for positive and negative models, respectively. Tables of annotation means and standard deviations are provided
* to help describe the normalization. The model fit report can be read in with our R gsalib package. Individual
* model Gaussians can be subset by the value in the "Gaussian" column if desired.
*/ */
@Argument(fullName="output_model", shortName = "outputModel", doc="If specified, the variant recalibrator will output the VQSR model fit to the file specified by -modelFile or to stdout", required=false) @Argument(fullName="output_model", shortName = "outputModel", doc="If specified, the variant recalibrator will output the VQSR model fit to the file specified by -modelFile or to stdout", required=false)
private boolean outputModel = false; private boolean outputModel = false;
@ -280,6 +282,21 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
protected int REPLICATE = 200; protected int REPLICATE = 200;
private ArrayList<Double> replicate = new ArrayList<>(); private ArrayList<Double> replicate = new ArrayList<>();
/**
* The statistical model being built by this tool may fail due to simple statistical sampling
* issues. Rather than dying immediately when the initial model fails, this argument allows the
* tool to restart with a different random seed and try to build the model again. The first
* successfully built model will be kept.
*
* Note that the most common underlying cause of model building failure is that there is insufficient data to
* build a really robust model. This argument provides a workaround for that issue but it is
* preferable to provide this tool with more data (typically by including more samples or more territory)
* in order to generate a more robust model.
*/
@Advanced
@Argument(fullName="max_attempts", shortName = "max_attempts", doc="Number of attempts to build a model before failing", required=false)
protected int max_attempts = 1;
///////////////////////////// /////////////////////////////
// Debug Arguments // Debug Arguments
///////////////////////////// /////////////////////////////
@ -457,6 +474,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
@Override @Override
public void onTraversalDone( final ExpandingArrayList<VariantDatum> reduceSum ) { public void onTraversalDone( final ExpandingArrayList<VariantDatum> reduceSum ) {
for (int i = 1; i <= max_attempts; i++) {
try {
dataManager.setData(reduceSum); dataManager.setData(reduceSum);
dataManager.normalizeData(); // Each data point is now (x - mean) / standard deviation dataManager.normalizeData(); // Each data point is now (x - mean) / standard deviation
@ -507,6 +526,15 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
logger.info("Executing: " + executor.getApproximateCommandLine()); logger.info("Executing: " + executor.getApproximateCommandLine());
executor.exec(); executor.exec();
} }
return;
} catch (Exception e) {
if (i == max_attempts) {
throw e;
} else {
logger.info(String.format("Exception occurred on attempt %d of %d. Trying again. Message was: '%s'", i, max_attempts, e.getMessage()));
}
}
}
} }
protected GATKReport writeModelReport(final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, List<String> annotationList) { protected GATKReport writeModelReport(final GaussianMixtureModel goodModel, final GaussianMixtureModel badModel, List<String> annotationList) {