2025-11-23 23:03:37 +08:00
|
|
|
#include <spdlog/cfg/env.h>
|
|
|
|
|
#include <spdlog/sinks/stdout_color_sinks.h>
|
|
|
|
|
#include <spdlog/spdlog.h>
|
|
|
|
|
#include <time.h>
|
|
|
|
|
|
|
|
|
|
#include <argparse/argparse.hpp>
|
|
|
|
|
#include <iostream>
|
|
|
|
|
#include <map>
|
|
|
|
|
#include <set>
|
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
|
|
#include "fastbqsr_version.h"
|
|
|
|
|
#include "bqsr/bqsr_args.h"
|
|
|
|
|
#include "util/profiling.h"
|
|
|
|
|
|
|
|
|
|
namespace nsgv {
|
|
|
|
|
extern BQSRArg gBqsrArg;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
int BaseRecalibrator();
|
|
|
|
|
int ApplyBQSR();
|
|
|
|
|
|
|
|
|
|
string get_current_time_str() {
|
|
|
|
|
time_t time_val;
|
|
|
|
|
struct tm *at;
|
|
|
|
|
char now[80];
|
|
|
|
|
time(&time_val);
|
|
|
|
|
at = localtime(&time_val);
|
|
|
|
|
strftime(now, 79, "%B %d, %Y at %I:%M:%S %p %Z", at);
|
|
|
|
|
return string(now);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int main_BaseRecalibrator(int argc, char *argv[]) {
|
|
|
|
|
// init arg parser
|
|
|
|
|
argparse::ArgumentParser program(nsgv::gBqsrArg.PROGRAM_RECORD_ID, FASTBQSR_VERSION, argparse::default_arguments::none);
|
|
|
|
|
program.add_description(
|
|
|
|
|
"First pass of the Base Quality Score Recalibration (BQSR) -- Generates recalibration table based on various\n"
|
|
|
|
|
"user-specified covariates (such as read group, reported quality score, machine cycle, and nucleotide context.)");
|
|
|
|
|
|
|
|
|
|
program.add_argument("--input")
|
|
|
|
|
.help("BAM/SAM/CRAM file containing reads This argument must be specified at least once.")
|
|
|
|
|
.metavar("<INPUT>")
|
|
|
|
|
.required();
|
|
|
|
|
|
|
|
|
|
program.add_argument("--output")
|
|
|
|
|
.help("The output recalibration table file to create.")
|
|
|
|
|
.metavar("<OUTPUT>")
|
|
|
|
|
.required();
|
|
|
|
|
|
|
|
|
|
program.add_argument("--reference")
|
|
|
|
|
.help("Reference sequence file.")
|
|
|
|
|
.metavar("<Reference>")
|
|
|
|
|
.required();
|
|
|
|
|
|
|
|
|
|
program.add_argument("--num-threads")
|
|
|
|
|
.help("Number of threads to use.")
|
|
|
|
|
.scan<'i', int>()
|
|
|
|
|
.default_value(1)
|
|
|
|
|
.nargs(1)
|
|
|
|
|
.metavar("<NUM_THREADS>");
|
|
|
|
|
|
|
|
|
|
program.add_argument("--known-sites")
|
|
|
|
|
.help(
|
|
|
|
|
"One or more databases of known polymorphic sites used to exclude regions around known polymorphisms from "
|
|
|
|
|
"analysis. This argument must be specified at least once.")
|
|
|
|
|
.metavar("<KnownSites>")
|
2025-12-04 22:26:13 +08:00
|
|
|
.nargs(argparse::nargs_pattern::any);
|
2025-11-23 23:03:37 +08:00
|
|
|
|
|
|
|
|
program.add_argument("--create-index")
|
|
|
|
|
.help("Whether to create an index when writing coordinate sorted BAM output.")
|
|
|
|
|
.default_value(false)
|
|
|
|
|
.implicit_value(true);
|
|
|
|
|
|
|
|
|
|
program.add_argument("--index-format")
|
|
|
|
|
.help("Format for bam index file. Possible values: {BAI, CSI}")
|
|
|
|
|
.default_value(std::string("BAI"))
|
|
|
|
|
.choices("BAI", "CSI")
|
|
|
|
|
.nargs(1)
|
|
|
|
|
.metavar("<IndexFormat>");
|
|
|
|
|
|
|
|
|
|
// add help and version args
|
|
|
|
|
program.add_argument("-h", "--help")
|
|
|
|
|
.action([&](const auto & /*unused*/) {
|
|
|
|
|
std::cout << program.help().str();
|
|
|
|
|
std::exit(0);
|
|
|
|
|
})
|
|
|
|
|
.default_value(false)
|
|
|
|
|
.help("shows help message and exits")
|
|
|
|
|
.implicit_value(true)
|
|
|
|
|
.nargs(0);
|
|
|
|
|
|
|
|
|
|
program.add_argument("-v", "--version")
|
|
|
|
|
.action([&](const auto & /*unused*/) {
|
|
|
|
|
std::cout << FASTBQSR_VERSION << std::endl;
|
|
|
|
|
std::exit(0);
|
|
|
|
|
})
|
|
|
|
|
.default_value(false)
|
|
|
|
|
.help("prints version information and exits")
|
|
|
|
|
.implicit_value(true)
|
|
|
|
|
.nargs(0);
|
|
|
|
|
|
|
|
|
|
// std::cout << program << std::endl;
|
|
|
|
|
|
|
|
|
|
nsgv::gBqsrArg.START_TIME = get_current_time_str();
|
|
|
|
|
nsgv::gBqsrArg.CLI_STR = argv[0];
|
|
|
|
|
for (int i = 1; i < argc; ++i) {
|
|
|
|
|
nsgv::gBqsrArg.CLI_STR += " " + std::string(argv[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
program.parse_args(argc, argv);
|
|
|
|
|
nsgv::gBqsrArg.INPUT_FILE = program.get("--input");
|
|
|
|
|
nsgv::gBqsrArg.OUTPUT_FILE = program.get("--output");
|
|
|
|
|
nsgv::gBqsrArg.NUM_THREADS = program.get<int>("--num-threads");
|
|
|
|
|
nsgv::gBqsrArg.CREATE_INDEX = program.get<bool>("--create-index");
|
|
|
|
|
|
2025-12-04 22:26:13 +08:00
|
|
|
nsgv::gBqsrArg.KNOWN_SITES_VCFS = program.get<std::vector<string>>("--known-sites");
|
|
|
|
|
// spdlog::info("known sites vcf files:");
|
|
|
|
|
// for (const auto& ks : nsgv::gBqsrArg.KNOWN_SITES_VCFS) {
|
|
|
|
|
// spdlog::info(" {}", ks);
|
|
|
|
|
// }
|
|
|
|
|
|
2025-11-23 23:03:37 +08:00
|
|
|
nsgv::gBqsrArg.INDEX_FORMAT =
|
|
|
|
|
program.get("--index-format") == "BAI" ? nsbqsr::IndexFormat::BAI : nsbqsr::IndexFormat::CSI;
|
|
|
|
|
|
|
|
|
|
} catch (const std::exception &err) {
|
|
|
|
|
spdlog::error(err.what());
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
spdlog::info("fast base recalibration phase-1 start");
|
|
|
|
|
BaseRecalibrator();
|
|
|
|
|
spdlog::info("fast base recalibration phase-1 end");
|
|
|
|
|
|
|
|
|
|
DisplayProfiling(1);
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-04 22:26:13 +08:00
|
|
|
int main_ApplyBQSR(int argc, char* argv[]) { return 0; }
|
2025-11-23 23:03:37 +08:00
|
|
|
|
|
|
|
|
int main(int argc, char *argv[]) {
|
|
|
|
|
// init log
|
2025-12-04 22:26:13 +08:00
|
|
|
spdlog::set_default_logger(spdlog::stderr_color_st("fastbqsr"));
|
2025-11-23 23:03:37 +08:00
|
|
|
spdlog::cfg::load_env_levels();
|
|
|
|
|
|
2025-12-04 22:26:13 +08:00
|
|
|
string bqsr_prog = argv[1];
|
2025-11-23 23:03:37 +08:00
|
|
|
if (bqsr_prog == "BaseRecalibrator") {
|
|
|
|
|
return main_BaseRecalibrator(argc - 1, argv + 1);
|
|
|
|
|
} else if (bqsr_prog == "ApplyBQSR") {
|
|
|
|
|
return main_ApplyBQSR(argc - 1, argv + 1);
|
|
|
|
|
} else {
|
|
|
|
|
spdlog::error("unknown program name: {}, should be BaseRecalibrator or ApplyBQSR", bqsr_prog);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
}
|