r115: added -I and -S to bwasw
This commit is contained in:
parent
4f61e2b7f5
commit
cd818687ac
5
bwtsw2.h
5
bwtsw2.h
|
|
@ -12,8 +12,9 @@
|
||||||
#define BSW2_FLAG_RESCUED 0x800
|
#define BSW2_FLAG_RESCUED 0x800
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int a, b, q, r, t, qr, bw;
|
int skip_sw:16, hard_clip:16;
|
||||||
int z, is, t_seeds, hard_clip, multi_2nd;
|
int a, b, q, r, t, qr, bw, max_ins;
|
||||||
|
int z, is, t_seeds, multi_2nd;
|
||||||
float mask_level, coef;
|
float mask_level, coef;
|
||||||
int n_threads, chunk_size;
|
int n_threads, chunk_size;
|
||||||
} bsw2opt_t;
|
} bsw2opt_t;
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,8 @@ bsw2opt_t *bsw2_init_opt()
|
||||||
bsw2opt_t *o = (bsw2opt_t*)calloc(1, sizeof(bsw2opt_t));
|
bsw2opt_t *o = (bsw2opt_t*)calloc(1, sizeof(bsw2opt_t));
|
||||||
o->a = 1; o->b = 3; o->q = 5; o->r = 2; o->t = 30;
|
o->a = 1; o->b = 3; o->q = 5; o->r = 2; o->t = 30;
|
||||||
o->bw = 50;
|
o->bw = 50;
|
||||||
o->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0;
|
o->max_ins = 20000;
|
||||||
|
o->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0; o->skip_sw = 0;
|
||||||
o->mask_level = 0.50f; o->coef = 5.5f;
|
o->mask_level = 0.50f; o->coef = 5.5f;
|
||||||
o->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000;
|
o->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000;
|
||||||
return o;
|
return o;
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ int bwa_bwtsw2(int argc, char *argv[])
|
||||||
|
|
||||||
opt = bsw2_init_opt();
|
opt = bsw2_init_opt();
|
||||||
srand48(11);
|
srand48(11);
|
||||||
while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:M")) >= 0) {
|
while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:S")) >= 0) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'q': opt->q = atoi(optarg); break;
|
case 'q': opt->q = atoi(optarg); break;
|
||||||
case 'r': opt->r = atoi(optarg); break;
|
case 'r': opt->r = atoi(optarg); break;
|
||||||
|
|
@ -35,6 +35,8 @@ int bwa_bwtsw2(int argc, char *argv[])
|
||||||
case 'M': opt->multi_2nd = 1; break;
|
case 'M': opt->multi_2nd = 1; break;
|
||||||
case 'H': opt->hard_clip = 1; break;
|
case 'H': opt->hard_clip = 1; break;
|
||||||
case 'f': xreopen(optarg, "w", stdout); break;
|
case 'f': xreopen(optarg, "w", stdout); break;
|
||||||
|
case 'I': opt->max_ins = atoi(optarg); break;
|
||||||
|
case 'S': opt->skip_sw = 1; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
opt->qr = opt->q + opt->r;
|
opt->qr = opt->q + opt->r;
|
||||||
|
|
@ -50,9 +52,11 @@ int bwa_bwtsw2(int argc, char *argv[])
|
||||||
fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level);
|
fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
|
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
|
||||||
fprintf(stderr, " -f FILE file to output results to instead of stdout\n");
|
fprintf(stderr, " -f FILE file to output results to instead of stdout\n");
|
||||||
fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n");
|
fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n");
|
||||||
fprintf(stderr, " -M mark multi-part alignments as secondary\n");
|
fprintf(stderr, " -M mark multi-part alignments as secondary\n");
|
||||||
|
fprintf(stderr, " -S skip Smith-Waterman read pairing\n");
|
||||||
|
fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins);
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t);
|
fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t);
|
||||||
fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
|
fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@
|
||||||
#include "stdaln.h"
|
#include "stdaln.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MAX_INS 20000
|
|
||||||
#define MIN_RATIO 0.8
|
#define MIN_RATIO 0.8
|
||||||
#define OUTLIER_BOUND 2.0
|
#define OUTLIER_BOUND 2.0
|
||||||
#define MAX_STDDEV 4.0
|
#define MAX_STDDEV 4.0
|
||||||
|
|
@ -23,7 +22,7 @@ typedef struct {
|
||||||
double avg, std;
|
double avg, std;
|
||||||
} bsw2pestat_t;
|
} bsw2pestat_t;
|
||||||
|
|
||||||
bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg)
|
bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)
|
||||||
{
|
{
|
||||||
extern void ks_introsort_uint64_t(size_t n, uint64_t *a);
|
extern void ks_introsort_uint64_t(size_t n, uint64_t *a);
|
||||||
int i, k, x, p25, p50, p75, tmp, max_len = 0;
|
int i, k, x, p25, p50, p75, tmp, max_len = 0;
|
||||||
|
|
@ -40,6 +39,7 @@ bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg)
|
||||||
if (t[0]->G2 > 0.8 * t[0]->G) continue; // the best hit is not good enough
|
if (t[0]->G2 > 0.8 * t[0]->G) continue; // the best hit is not good enough
|
||||||
if (t[1]->G2 > 0.8 * t[1]->G) continue; // the best hit is not good enough
|
if (t[1]->G2 > 0.8 * t[1]->G) continue; // the best hit is not good enough
|
||||||
l = t[0]->k > t[1]->k? t[0]->k - t[1]->k + t[1]->len : t[1]->k - t[0]->k + t[0]->len;
|
l = t[0]->k > t[1]->k? t[0]->k - t[1]->k + t[1]->len : t[1]->k - t[0]->k + t[0]->len;
|
||||||
|
if (l >= max_ins) continue; // skip pairs with excessively large insert
|
||||||
max_len = max_len > t[0]->end - t[0]->beg? max_len : t[0]->end - t[0]->beg;
|
max_len = max_len > t[0]->end - t[0]->beg? max_len : t[0]->end - t[0]->beg;
|
||||||
max_len = max_len > t[1]->end - t[1]->beg? max_len : t[1]->end - t[1]->beg;
|
max_len = max_len > t[1]->end - t[1]->beg? max_len : t[1]->end - t[1]->beg;
|
||||||
isize[k++] = l;
|
isize[k++] = l;
|
||||||
|
|
@ -186,7 +186,7 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b
|
||||||
int8_t g_mat[25];
|
int8_t g_mat[25];
|
||||||
kstring_t msg;
|
kstring_t msg;
|
||||||
memset(&msg, 0, sizeof(kstring_t));
|
memset(&msg, 0, sizeof(kstring_t));
|
||||||
pes = bsw2_stat(n, hits, &msg);
|
pes = bsw2_stat(n, hits, &msg, opt->max_ins);
|
||||||
for (i = k = 0; i < 5; ++i) {
|
for (i = k = 0; i < 5; ++i) {
|
||||||
for (j = 0; j < 4; ++j)
|
for (j = 0; j < 4; ++j)
|
||||||
g_mat[k++] = i == j? opt->a : -opt->b;
|
g_mat[k++] = i == j? opt->a : -opt->b;
|
||||||
|
|
@ -207,8 +207,10 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b
|
||||||
if (hits[i] == 0 || hits[i+1] == 0) continue; // one end has excessive N
|
if (hits[i] == 0 || hits[i+1] == 0) continue; // one end has excessive N
|
||||||
if (hits[i]->n != 1 && hits[i+1]->n != 1) continue; // no end has exactly one hit
|
if (hits[i]->n != 1 && hits[i+1]->n != 1) continue; // no end has exactly one hit
|
||||||
if (hits[i]->n > 1 || hits[i+1]->n > 1) continue; // one read has more than one hit
|
if (hits[i]->n > 1 || hits[i+1]->n > 1) continue; // one read has more than one hit
|
||||||
if (hits[i+0]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+0]->hits[0], seq[i+1].l, seq[i+1].seq, &a[1], g_mat);
|
if (!opt->skip_sw) {
|
||||||
if (hits[i+1]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+1]->hits[0], seq[i+0].l, seq[i+0].seq, &a[0], g_mat);
|
if (hits[i+0]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+0]->hits[0], seq[i+1].l, seq[i+1].seq, &a[1], g_mat);
|
||||||
|
if (hits[i+1]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+1]->hits[0], seq[i+0].l, seq[i+0].seq, &a[0], g_mat);
|
||||||
|
} // else a[0].G == a[1].G == a[0].G2 == a[1].G2 == 0
|
||||||
// the following enumerate all possibilities. It is tedious but necessary...
|
// the following enumerate all possibilities. It is tedious but necessary...
|
||||||
if (hits[i]->n + hits[i+1]->n == 1) { // one end mapped; the other not;
|
if (hits[i]->n + hits[i+1]->n == 1) { // one end mapped; the other not;
|
||||||
bwtsw2_t *p[2];
|
bwtsw2_t *p[2];
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue