Merge pull request #139 from jmarshall/fixes
Various collected fixes (updated versions of several existing PRs)
This commit is contained in:
commit
0976272d99
6
bntseq.c
6
bntseq.c
|
|
@ -299,9 +299,9 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only)
|
|||
// read sequences
|
||||
while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q);
|
||||
if (!for_only) { // add the reverse complemented sequence
|
||||
m_pac = (bns->l_pac * 2 + 3) / 4 * 4;
|
||||
pac = realloc(pac, m_pac/4);
|
||||
memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4);
|
||||
int64_t ll_pac = (bns->l_pac * 2 + 3) / 4 * 4;
|
||||
if (ll_pac > m_pac) pac = realloc(pac, ll_pac/4);
|
||||
memset(pac + (bns->l_pac+3)/4, 0, (ll_pac - (bns->l_pac+3)/4*4) / 4);
|
||||
for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac)
|
||||
_set_pac(pac, bns->l_pac, 3-_get_pac(pac, l));
|
||||
}
|
||||
|
|
|
|||
8
bwa.1
8
bwa.1
|
|
@ -277,6 +277,14 @@ If ARG starts with @, it is interpreted as a string and gets inserted into the
|
|||
output SAM header; otherwise, ARG is interpreted as a file with all lines
|
||||
starting with @ in the file inserted into the SAM header. [null]
|
||||
.TP
|
||||
.BI -o \ FILE
|
||||
Write the output SAM file to
|
||||
.IR FILE .
|
||||
For compatibility with other BWA commands, this option may also be given as
|
||||
.B -f
|
||||
.IR FILE .
|
||||
[standard ouptut]
|
||||
.TP
|
||||
.BI -T \ INT
|
||||
Don't output alignment with score lower than
|
||||
.IR INT .
|
||||
|
|
|
|||
24
bwa.c
24
bwa.c
|
|
@ -30,13 +30,23 @@ static inline void trim_readno(kstring_t *s)
|
|||
s->l -= 2, s->s[s->l] = 0;
|
||||
}
|
||||
|
||||
static inline char *dupkstring(const kstring_t *str, int dupempty)
|
||||
{
|
||||
char *s = (str->l > 0 || dupempty)? malloc(str->l + 1) : NULL;
|
||||
if (!s) return NULL;
|
||||
|
||||
memcpy(s, str->s, str->l);
|
||||
s[str->l] = '\0';
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
|
||||
{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
|
||||
s->name = strdup(ks->name.s);
|
||||
s->comment = ks->comment.l? strdup(ks->comment.s) : 0;
|
||||
s->seq = strdup(ks->seq.s);
|
||||
s->qual = ks->qual.l? strdup(ks->qual.s) : 0;
|
||||
s->l_seq = strlen(s->seq);
|
||||
s->name = dupkstring(&ks->name, 1);
|
||||
s->comment = dupkstring(&ks->comment, 0);
|
||||
s->seq = dupkstring(&ks->seq, 1);
|
||||
s->qual = dupkstring(&ks->qual, 0);
|
||||
s->l_seq = ks->seq.l;
|
||||
}
|
||||
|
||||
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
|
||||
|
|
@ -144,9 +154,9 @@ uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins,
|
|||
max_del = (int)((double)(((l_query+1)>>1) * mat[0] - o_del) / e_del + 1.);
|
||||
max_gap = max_ins > max_del? max_ins : max_del;
|
||||
max_gap = max_gap > 1? max_gap : 1;
|
||||
w = (max_gap + abs(rlen - l_query) + 1) >> 1;
|
||||
w = (max_gap + abs((int)rlen - l_query) + 1) >> 1;
|
||||
w = w < w_? w : w_;
|
||||
min_w = abs(rlen - l_query) + 3;
|
||||
min_w = abs((int)rlen - l_query) + 3;
|
||||
w = w > min_w? w : min_w;
|
||||
// NW alignment
|
||||
if (bwa_verbose >= 4) {
|
||||
|
|
|
|||
5
bwape.c
5
bwape.c
|
|
@ -624,7 +624,8 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs,
|
|||
void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt, const char *rg_line)
|
||||
{
|
||||
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
|
||||
int i, j, n_seqs, tot_seqs = 0;
|
||||
int i, j, n_seqs;
|
||||
long long tot_seqs = 0;
|
||||
bwa_seq_t *seqs[2];
|
||||
bwa_seqio_t *ks[2];
|
||||
clock_t t;
|
||||
|
|
@ -711,7 +712,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f
|
|||
|
||||
for (j = 0; j < 2; ++j)
|
||||
bwa_free_read_seq(n_seqs, seqs[j]);
|
||||
fprintf(stderr, "[bwa_sai2sam_pe_core] %d sequences have been processed.\n", tot_seqs);
|
||||
fprintf(stderr, "[bwa_sai2sam_pe_core] %lld sequences have been processed.\n", tot_seqs);
|
||||
last_ii = ii;
|
||||
}
|
||||
|
||||
|
|
|
|||
9
bwase.c
9
bwase.c
|
|
@ -173,13 +173,15 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
|
|||
ubyte_t *rseq;
|
||||
int64_t k, rb, re, rlen;
|
||||
int8_t mat[25];
|
||||
int w;
|
||||
|
||||
bwa_fill_scmat(1, 3, mat);
|
||||
rb = *_rb; re = rb + len + ref_shift;
|
||||
assert(re <= l_pac);
|
||||
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
|
||||
assert(re - rb == rlen);
|
||||
ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW > abs(rlen - len) * 1.5? SW_BW : abs(rlen - len) * 1.5, n_cigar, &cigar32);
|
||||
w = abs((int)rlen - len) * 1.5;
|
||||
ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW > w? SW_BW : w, n_cigar, &cigar32);
|
||||
assert(*n_cigar > 0);
|
||||
if ((cigar32[*n_cigar - 1]&0xf) == 1) cigar32[*n_cigar - 1] = (cigar32[*n_cigar - 1]>>4<<4) | 3; // change endding ins to soft clipping
|
||||
if ((cigar32[0]&0xf) == 1) cigar32[0] = (cigar32[0]>>4<<4) | 3; // change beginning ins to soft clipping
|
||||
|
|
@ -505,7 +507,8 @@ void bwase_initialize()
|
|||
void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ, const char *rg_line)
|
||||
{
|
||||
extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa);
|
||||
int i, n_seqs, tot_seqs = 0, m_aln;
|
||||
int i, n_seqs, m_aln;
|
||||
long long tot_seqs = 0;
|
||||
bwt_aln1_t *aln = 0;
|
||||
bwa_seq_t *seqs;
|
||||
bwa_seqio_t *ks;
|
||||
|
|
@ -563,7 +566,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
|
|||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
|
||||
bwa_free_read_seq(n_seqs, seqs);
|
||||
fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
|
||||
fprintf(stderr, "[bwa_aln_core] %lld sequences have been processed.\n", tot_seqs);
|
||||
}
|
||||
|
||||
// destroy
|
||||
|
|
|
|||
|
|
@ -338,6 +338,7 @@ BWT *BWTCreate(const bgint_t textLength, unsigned int *decodeTable)
|
|||
bwt->decodeTable = (unsigned*)calloc(DNA_OCC_CNT_TABLE_SIZE_IN_WORD, sizeof(unsigned int));
|
||||
GenerateDNAOccCountTable(bwt->decodeTable);
|
||||
} else {
|
||||
// FIXME Prevent BWTFree() from freeing decodeTable in this case
|
||||
bwt->decodeTable = decodeTable;
|
||||
}
|
||||
|
||||
|
|
@ -1538,6 +1539,8 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB
|
|||
(long)bwtInc->numberOfIterationDone, (long)processedTextLength);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(packedFile);
|
||||
return bwtInc;
|
||||
}
|
||||
|
||||
|
|
@ -1545,8 +1548,6 @@ void BWTFree(BWT *bwt)
|
|||
{
|
||||
if (bwt == 0) return;
|
||||
free(bwt->cumulativeFreq);
|
||||
free(bwt->bwtCode);
|
||||
free(bwt->occValue);
|
||||
free(bwt->occValueMajor);
|
||||
free(bwt->decodeTable);
|
||||
free(bwt);
|
||||
|
|
@ -1555,8 +1556,10 @@ void BWTFree(BWT *bwt)
|
|||
void BWTIncFree(BWTInc *bwtInc)
|
||||
{
|
||||
if (bwtInc == 0) return;
|
||||
free(bwtInc->bwt);
|
||||
BWTFree(bwtInc->bwt);
|
||||
free(bwtInc->workingMemory);
|
||||
free(bwtInc->cumulativeCountInCurrentBuild);
|
||||
free(bwtInc->packedShift);
|
||||
free(bwtInc);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq)
|
|||
}
|
||||
{ // generate cnt_table
|
||||
for (i = 0; i != 256; ++i) {
|
||||
u_int32_t j, x = 0;
|
||||
uint32_t j, x = 0;
|
||||
for (j = 0; j != 4; ++j)
|
||||
x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3);
|
||||
b->cnt_table[i] = x;
|
||||
|
|
|
|||
5
bwtaln.c
5
bwtaln.c
|
|
@ -158,7 +158,8 @@ bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa)
|
|||
|
||||
void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
|
||||
{
|
||||
int i, n_seqs, tot_seqs = 0;
|
||||
int i, n_seqs;
|
||||
long long tot_seqs = 0;
|
||||
bwa_seq_t *seqs;
|
||||
bwa_seqio_t *ks;
|
||||
clock_t t;
|
||||
|
|
@ -218,7 +219,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
|
|||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
|
||||
bwa_free_read_seq(n_seqs, seqs);
|
||||
fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
|
||||
fprintf(stderr, "[bwa_aln_core] %lld sequences have been processed.\n", tot_seqs);
|
||||
}
|
||||
|
||||
// destroy
|
||||
|
|
|
|||
2
bwtgap.c
2
bwtgap.c
|
|
@ -58,7 +58,7 @@ static inline void gap_push(gap_stack_t *stack, int i, bwtint_t k, bwtint_t l, i
|
|||
q->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries);
|
||||
}
|
||||
p = q->stack + q->n_entries;
|
||||
p->info = (u_int32_t)score<<21 | i; p->k = k; p->l = l;
|
||||
p->info = (uint32_t)score<<21 | i; p->k = k; p->l = l;
|
||||
p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape;
|
||||
p->n_ins = n_ins; p->n_del = n_del;
|
||||
p->state = state;
|
||||
|
|
|
|||
6
bwtgap.h
6
bwtgap.h
|
|
@ -5,9 +5,9 @@
|
|||
#include "bwtaln.h"
|
||||
|
||||
typedef struct { // recursion stack
|
||||
u_int32_t info; // score<<21 | i
|
||||
u_int32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6;
|
||||
u_int32_t n_ins:16, n_del:16;
|
||||
uint32_t info; // score<<21 | i
|
||||
uint32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6;
|
||||
uint32_t n_ins:16, n_del:16;
|
||||
int last_diff_pos;
|
||||
bwtint_t k, l; // (k,l) is the SA region of [i,n-1]
|
||||
} gap_entry_t;
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
|
|||
}
|
||||
rope_destroy(r);
|
||||
}
|
||||
bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4);
|
||||
bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
|
||||
for (i = 0; i < bwt->seq_len; ++i)
|
||||
bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1);
|
||||
free(buf);
|
||||
|
|
@ -175,7 +175,7 @@ void bwt_bwtupdate_core(bwt_t *bwt)
|
|||
int bwa_bwtupdate(int argc, char *argv[]) // the "bwtupdate" command
|
||||
{
|
||||
bwt_t *bwt;
|
||||
if (argc < 2) {
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "Usage: bwa bwtupdate <the.bwt>\n");
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -242,7 +242,7 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b
|
|||
double diff;
|
||||
G[0] = hits[i]->hits[0].G + a[1].G;
|
||||
G[1] = hits[i+1]->hits[0].G + a[0].G;
|
||||
diff = fabs(G[0] - G[1]) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.);
|
||||
diff = fabs((double)(G[0] - G[1])) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.);
|
||||
if (diff > 0.05) a[G[0] > G[1]? 0 : 1].G = 0;
|
||||
}
|
||||
if (a[0].G == 0 || a[1].G == 0) { // one proper pair only
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ int main_mem(int argc, char *argv[])
|
|||
|
||||
aux.opt = opt = mem_opt_init();
|
||||
memset(&opt0, 0, sizeof(mem_opt_t));
|
||||
while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
||||
else if (c == '1') no_mt_io = 1;
|
||||
else if (c == 'x') mode = optarg;
|
||||
|
|
@ -158,6 +158,7 @@ int main_mem(int argc, char *argv[])
|
|||
else if (c == 's') opt->split_width = atoi(optarg), opt0.split_width = 1;
|
||||
else if (c == 'G') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1;
|
||||
else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1;
|
||||
else if (c == 'o' || c == 'f') xreopen(optarg, "wb", stdout);
|
||||
else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1;
|
||||
else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1;
|
||||
else if (c == 'C') aux.copy_comment = 1;
|
||||
|
|
@ -257,7 +258,7 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, " -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [%d,%d]\n", opt->e_del, opt->e_ins);
|
||||
fprintf(stderr, " -L INT[,INT] penalty for 5'- and 3'-end clipping [%d,%d]\n", opt->pen_clip5, opt->pen_clip3);
|
||||
fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n\n", opt->pen_unpaired);
|
||||
fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n");
|
||||
fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overridden [null]\n");
|
||||
fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)\n");
|
||||
fprintf(stderr, " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)\n");
|
||||
fprintf(stderr, " intractg: -B9 -O16 -L5 (intra-species contigs to ref)\n");
|
||||
|
|
@ -265,6 +266,7 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, " -p smart pairing (ignoring in2.fq)\n");
|
||||
fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n");
|
||||
fprintf(stderr, " -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null]\n");
|
||||
fprintf(stderr, " -o FILE sam file to output results to [stdout]\n");
|
||||
fprintf(stderr, " -j treat ALT contigs as part of the primary assembly (i.e. ignore <idxbase>.alt file)\n");
|
||||
fprintf(stderr, " -5 always take the leftmost alignment on a read as primary\n");
|
||||
fprintf(stderr, "\n");
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ void *wrap_calloc(size_t nmemb, size_t size,
|
|||
void *p = calloc(nmemb, size);
|
||||
if (NULL == p) {
|
||||
fprintf(stderr,
|
||||
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
|
||||
"[%s] Failed to allocate %zu bytes at %s line %u: %s\n",
|
||||
func, nmemb * size, file, line, strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
|
@ -25,7 +25,7 @@ void *wrap_malloc(size_t size,
|
|||
void *p = malloc(size);
|
||||
if (NULL == p) {
|
||||
fprintf(stderr,
|
||||
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
|
||||
"[%s] Failed to allocate %zu bytes at %s line %u: %s\n",
|
||||
func, size, file, line, strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
|
@ -37,7 +37,7 @@ void *wrap_realloc(void *ptr, size_t size,
|
|||
void *p = realloc(ptr, size);
|
||||
if (NULL == p) {
|
||||
fprintf(stderr,
|
||||
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
|
||||
"[%s] Failed to allocate %zu bytes at %s line %u: %s\n",
|
||||
func, size, file, line, strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
|
@ -49,7 +49,7 @@ char *wrap_strdup(const char *s,
|
|||
char *p = strdup(s);
|
||||
if (NULL == p) {
|
||||
fprintf(stderr,
|
||||
"[%s] Failed to allocate %zd bytes at %s line %u: %s\n",
|
||||
"[%s] Failed to allocate %zu bytes at %s line %u: %s\n",
|
||||
func, strlen(s), file, line, strerror(errno));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue