Merge branch 'master' into master_fixes
This commit is contained in:
commit
4cb5110d03
4
bwa.1
4
bwa.1
|
|
@ -1,4 +1,4 @@
|
|||
.TH bwa 1 "15 March 2013" "bwa-0.7.3a" "Bioinformatics tools"
|
||||
.TH bwa 1 "15 March 2013" "bwa-0.7.4" "Bioinformatics tools"
|
||||
.SH NAME
|
||||
.PP
|
||||
bwa - Burrows-Wheeler Alignment Tool
|
||||
|
|
@ -220,7 +220,7 @@ deducted. [5]
|
|||
Penalty for an unpaired read pair. BWA-MEM scores an unpaired read pair as
|
||||
.RI scoreRead1+scoreRead2- INT
|
||||
and scores a paired as scoreRead1+scoreRead2-insertPenalty. It compares these
|
||||
two scores to determine whether we should force pairing. [9]
|
||||
two scores to determine whether we should force pairing. [17]
|
||||
.TP
|
||||
.B -p
|
||||
Assume the first input query file is interleaved paired-end FASTA/Q. See the command description for details.
|
||||
|
|
|
|||
2
bwamem.c
2
bwamem.c
|
|
@ -46,7 +46,7 @@ mem_opt_t *mem_opt_init()
|
|||
o->a = 1; o->b = 4; o->q = 6; o->r = 1; o->w = 100;
|
||||
o->T = 30;
|
||||
o->zdrop = 100;
|
||||
o->pen_unpaired = 9;
|
||||
o->pen_unpaired = 17;
|
||||
o->pen_clip = 5;
|
||||
o->min_seed_len = 19;
|
||||
o->split_width = 10;
|
||||
|
|
|
|||
11
bwape.c
11
bwape.c
|
|
@ -106,6 +106,11 @@ static int infer_isize(int n_seqs, bwa_seq_t *seqs[2], isize_info_t *ii, double
|
|||
tmp = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499);
|
||||
ii->low = tmp > max_len? tmp : max_len; // ii->low is unsigned
|
||||
ii->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499);
|
||||
if (ii->low > ii->high) {
|
||||
fprintf(stderr, "[infer_isize] fail to infer insert size: upper bound is smaller than read length\n");
|
||||
free(isizes);
|
||||
return -1;
|
||||
}
|
||||
for (i = 0, x = n = 0; i < tot; ++i)
|
||||
if (isizes[i] >= ii->low && isizes[i] <= ii->high)
|
||||
++n, x += isizes[i];
|
||||
|
|
@ -404,7 +409,7 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
|
|||
bwa_cigar_t *cigar = 0;
|
||||
ubyte_t *ref_seq;
|
||||
bwtint_t k, x, y, l;
|
||||
int xtra;
|
||||
int xtra, gscore;
|
||||
int8_t mat[25];
|
||||
|
||||
bwa_fill_scmat(1, 3, mat);
|
||||
|
|
@ -422,12 +427,12 @@ bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const u
|
|||
// do alignment
|
||||
xtra = KSW_XSUBO | KSW_XSTART | (len < 250? KSW_XBYTE : 0);
|
||||
r = ksw_align(len, (uint8_t*)seq, l, ref_seq, 5, mat, 5, 1, xtra, 0);
|
||||
ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32);
|
||||
gscore = ksw_global(r.qe - r.qb + 1, &seq[r.qb], r.te - r.tb + 1, &ref_seq[r.tb], 5, mat, 5, 1, 50, n_cigar, &cigar32);
|
||||
cigar = (bwa_cigar_t*)cigar32;
|
||||
for (k = 0; k < *n_cigar; ++k)
|
||||
cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
|
||||
|
||||
if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score) { // poor hit or tandem hits
|
||||
if (r.score < SW_MIN_MATCH_LEN || r.score2 == r.score || gscore != r.score) { // poor hit or tandem hits or weird alignment
|
||||
free(cigar); free(ref_seq); *n_cigar = 0;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)
|
|||
p75 = isize[(int)(.75 * k + .499)];
|
||||
ksprintf(msg, "[%s] infer the insert size distribution from %d high-quality pairs.\n", __func__, k);
|
||||
if (k < 8) {
|
||||
ksprintf(msg, "[%s] fail to infer the insert size distribution.\n", __func__);
|
||||
ksprintf(msg, "[%s] fail to infer the insert size distribution: too few good pairs.\n", __func__);
|
||||
free(isize);
|
||||
r.failed = 1;
|
||||
return r;
|
||||
|
|
@ -56,6 +56,12 @@ bsw2pestat_t bsw2_stat(int n, bwtsw2_t **buf, kstring_t *msg, int max_ins)
|
|||
r.low = tmp > max_len? tmp : max_len;
|
||||
if (r.low < 1) r.low = 1;
|
||||
r.high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499);
|
||||
if (r.low > r.high) {
|
||||
ksprintf(msg, "[%s] fail to infer the insert size distribution: upper bound is smaller than max read length.\n", __func__);
|
||||
free(isize);
|
||||
r.failed = 1;
|
||||
return r;
|
||||
}
|
||||
ksprintf(msg, "[%s] (25, 50, 75) percentile: (%d, %d, %d)\n", __func__, p25, p50, p75);
|
||||
ksprintf(msg, "[%s] low and high boundaries for computing mean and std.dev: (%d, %d)\n", __func__, r.low, r.high);
|
||||
for (i = x = 0, r.avg = 0; i < k; ++i)
|
||||
|
|
|
|||
6
ksw.c
6
ksw.c
|
|
@ -202,10 +202,11 @@ end_loop16:
|
|||
r.score = gmax + q->shift < 255? gmax : 255;
|
||||
r.te = te;
|
||||
if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score
|
||||
int max = -1, low, high, qlen = slen * 16;
|
||||
int max = -1, tmp, low, high, qlen = slen * 16;
|
||||
uint8_t *t = (uint8_t*)Hmax;
|
||||
for (i = 0; i < qlen; ++i, ++t)
|
||||
if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen;
|
||||
else if ((int)*t == max && (tmp = i / 16 + i % 16 * slen) < r.qe) r.qe = tmp;
|
||||
//printf("%d,%d\n", max, gmax);
|
||||
if (b) {
|
||||
i = (r.score + q->max - 1) / q->max;
|
||||
|
|
@ -303,10 +304,11 @@ end_loop8:
|
|||
}
|
||||
r.score = gmax; r.te = te;
|
||||
{
|
||||
int max = -1, low, high, qlen = slen * 8;
|
||||
int max = -1, tmp, low, high, qlen = slen * 8;
|
||||
uint16_t *t = (uint16_t*)Hmax;
|
||||
for (i = 0, r.qe = -1; i < qlen; ++i, ++t)
|
||||
if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen;
|
||||
else if ((int)*t == max && (tmp = i / 8 + i % 8 * slen) < r.qe) r.qe = tmp;
|
||||
if (b) {
|
||||
i = (r.score + q->max - 1) / q->max;
|
||||
low = te - i; high = te + i;
|
||||
|
|
|
|||
Loading…
Reference in New Issue