r115: added -I and -S to bwasw
This commit is contained in:
parent
4f61e2b7f5
commit
cd818687ac
5
bwtsw2.h
5
bwtsw2.h
|
|
@ -12,8 +12,9 @@
|
|||
#define BSW2_FLAG_RESCUED 0x800
|
||||
|
||||
typedef struct {
|
||||
int a, b, q, r, t, qr, bw;
|
||||
int z, is, t_seeds, hard_clip, multi_2nd;
|
||||
int skip_sw:16, hard_clip:16;
|
||||
int a, b, q, r, t, qr, bw, max_ins;
|
||||
int z, is, t_seeds, multi_2nd;
|
||||
float mask_level, coef;
|
||||
int n_threads, chunk_size;
|
||||
} bsw2opt_t;
|
||||
|
|
|
|||
|
|
@ -50,7 +50,8 @@ bsw2opt_t *bsw2_init_opt()
|
|||
bsw2opt_t *o = (bsw2opt_t*)calloc(1, sizeof(bsw2opt_t));
|
||||
o->a = 1; o->b = 3; o->q = 5; o->r = 2; o->t = 30;
|
||||
o->bw = 50;
|
||||
o->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0;
|
||||
o->max_ins = 20000;
|
||||
o->z = 1; o->is = 3; o->t_seeds = 5; o->hard_clip = 0; o->skip_sw = 0;
|
||||
o->mask_level = 0.50f; o->coef = 5.5f;
|
||||
o->qr = o->q + o->r; o->n_threads = 1; o->chunk_size = 10000000;
|
||||
return o;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ int bwa_bwtsw2(int argc, char *argv[])
|
|||
|
||||
opt = bsw2_init_opt();
|
||||
srand48(11);
|
||||
while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:M")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "q:r:a:b:t:T:w:d:z:m:s:c:N:Hf:MI:S")) >= 0) {
|
||||
switch (c) {
|
||||
case 'q': opt->q = atoi(optarg); break;
|
||||
case 'r': opt->r = atoi(optarg); break;
|
||||
|
|
@ -35,6 +35,8 @@ int bwa_bwtsw2(int argc, char *argv[])
|
|||
case 'M': opt->multi_2nd = 1; break;
|
||||
case 'H': opt->hard_clip = 1; break;
|
||||
case 'f': xreopen(optarg, "w", stdout); break;
|
||||
case 'I': opt->max_ins = atoi(optarg); break;
|
||||
case 'S': opt->skip_sw = 1; break;
|
||||
}
|
||||
}
|
||||
opt->qr = opt->q + opt->r;
|
||||
|
|
@ -50,9 +52,11 @@ int bwa_bwtsw2(int argc, char *argv[])
|
|||
fprintf(stderr, " -m FLOAT mask level [%.2f]\n", opt->mask_level);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
|
||||
fprintf(stderr, " -f FILE file to output results to instead of stdout\n");
|
||||
fprintf(stderr, " -f FILE file to output results to instead of stdout\n");
|
||||
fprintf(stderr, " -H in SAM output, use hard clipping instead of soft clipping\n");
|
||||
fprintf(stderr, " -M mark multi-part alignments as secondary\n");
|
||||
fprintf(stderr, " -S skip Smith-Waterman read pairing\n");
|
||||
fprintf(stderr, " -I INT ignore pairs with insert >=INT for inferring the size distr [%d]\n", opt->max_ins);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " -T INT score threshold divided by a [%d]\n", opt->t);
|
||||
fprintf(stderr, " -c FLOAT coefficient of length-threshold adjustment [%.1f]\n", opt->coef);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@
|
|||
#include "stdaln.h"
|
||||
#endif
|
||||
|
||||
#define MAX_INS 20000
|
||||
#define MIN_RATIO 0.8
|
||||
#define OUTLIER_BOUND 2.0
|
||||
#define MAX_STDDEV 4.0
|
||||
|
|
@ -23,7 +22,7 @@ typedef struct {
|
|||
double avg, std;
|
||||
} bsw2pestat_t;
|
||||
|
||||
bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg)
|
||||
bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)
|
||||
{
|
||||
extern void ks_introsort_uint64_t(size_t n, uint64_t *a);
|
||||
int i, k, x, p25, p50, p75, tmp, max_len = 0;
|
||||
|
|
@ -40,6 +39,7 @@ bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg)
|
|||
if (t[0]->G2 > 0.8 * t[0]->G) continue; // the best hit is not good enough
|
||||
if (t[1]->G2 > 0.8 * t[1]->G) continue; // the best hit is not good enough
|
||||
l = t[0]->k > t[1]->k? t[0]->k - t[1]->k + t[1]->len : t[1]->k - t[0]->k + t[0]->len;
|
||||
if (l >= max_ins) continue; // skip pairs with excessively large insert
|
||||
max_len = max_len > t[0]->end - t[0]->beg? max_len : t[0]->end - t[0]->beg;
|
||||
max_len = max_len > t[1]->end - t[1]->beg? max_len : t[1]->end - t[1]->beg;
|
||||
isize[k++] = l;
|
||||
|
|
@ -186,7 +186,7 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b
|
|||
int8_t g_mat[25];
|
||||
kstring_t msg;
|
||||
memset(&msg, 0, sizeof(kstring_t));
|
||||
pes = bsw2_stat(n, hits, &msg);
|
||||
pes = bsw2_stat(n, hits, &msg, opt->max_ins);
|
||||
for (i = k = 0; i < 5; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
g_mat[k++] = i == j? opt->a : -opt->b;
|
||||
|
|
@ -207,8 +207,10 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b
|
|||
if (hits[i] == 0 || hits[i+1] == 0) continue; // one end has excessive N
|
||||
if (hits[i]->n != 1 && hits[i+1]->n != 1) continue; // no end has exactly one hit
|
||||
if (hits[i]->n > 1 || hits[i+1]->n > 1) continue; // one read has more than one hit
|
||||
if (hits[i+0]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+0]->hits[0], seq[i+1].l, seq[i+1].seq, &a[1], g_mat);
|
||||
if (hits[i+1]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+1]->hits[0], seq[i+0].l, seq[i+0].seq, &a[0], g_mat);
|
||||
if (!opt->skip_sw) {
|
||||
if (hits[i+0]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+0]->hits[0], seq[i+1].l, seq[i+1].seq, &a[1], g_mat);
|
||||
if (hits[i+1]->n == 1) bsw2_pair1(opt, l_pac, pac, &pes, &hits[i+1]->hits[0], seq[i+0].l, seq[i+0].seq, &a[0], g_mat);
|
||||
} // else a[0].G == a[1].G == a[0].G2 == a[1].G2 == 0
|
||||
// the following enumerate all possibilities. It is tedious but necessary...
|
||||
if (hits[i]->n + hits[i+1]->n == 1) { // one end mapped; the other not;
|
||||
bwtsw2_t *p[2];
|
||||
|
|
|
|||
Loading…
Reference in New Issue