From af87825525e3eb203e2dd8a993c02cbfc8c777ad Mon Sep 17 00:00:00 2001 From: John Marshall Date: Tue, 25 Apr 2017 13:51:52 +0100 Subject: [PATCH 01/11] Replace u_int32_t by the more common uint32_t The u_int32_t type comes from , which is often included as a byproduct of other headers on glibc platforms, but not on FreeBSD or Alpine Linux. Use 's uint32_t instead, which is used elsewhere in bwa source code. --- bwt_lite.c | 2 +- bwtgap.c | 2 +- bwtgap.h | 6 +++--- bwtindex.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bwt_lite.c b/bwt_lite.c index f7946f5..eb16da6 100644 --- a/bwt_lite.c +++ b/bwt_lite.c @@ -48,7 +48,7 @@ bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq) } { // generate cnt_table for (i = 0; i != 256; ++i) { - u_int32_t j, x = 0; + uint32_t j, x = 0; for (j = 0; j != 4; ++j) x |= (((i&3) == j) + ((i>>2&3) == j) + ((i>>4&3) == j) + (i>>6 == j)) << (j<<3); b->cnt_table[i] = x; diff --git a/bwtgap.c b/bwtgap.c index 08bc1f4..7dde443 100644 --- a/bwtgap.c +++ b/bwtgap.c @@ -58,7 +58,7 @@ static inline void gap_push(gap_stack_t *stack, int i, bwtint_t k, bwtint_t l, i q->stack = (gap_entry_t*)realloc(q->stack, sizeof(gap_entry_t) * q->m_entries); } p = q->stack + q->n_entries; - p->info = (u_int32_t)score<<21 | i; p->k = k; p->l = l; + p->info = (uint32_t)score<<21 | i; p->k = k; p->l = l; p->n_mm = n_mm; p->n_gapo = n_gapo; p->n_gape = n_gape; p->n_ins = n_ins; p->n_del = n_del; p->state = state; diff --git a/bwtgap.h b/bwtgap.h index 7dd6165..9085b62 100644 --- a/bwtgap.h +++ b/bwtgap.h @@ -5,9 +5,9 @@ #include "bwtaln.h" typedef struct { // recursion stack - u_int32_t info; // score<<21 | i - u_int32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6; - u_int32_t n_ins:16, n_del:16; + uint32_t info; // score<<21 | i + uint32_t n_mm:8, n_gapo:8, n_gape:8, state:2, n_seed_mm:6; + uint32_t n_ins:16, n_del:16; int last_diff_pos; bwtint_t k, l; // (k,l) is the SA region of [i,n-1] } gap_entry_t; diff --git a/bwtindex.c b/bwtindex.c index fb32231..571d087 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -119,7 +119,7 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is) } rope_destroy(r); } - bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4); + bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); for (i = 0; i < bwt->seq_len; ++i) bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1); free(buf); From beff2e27f82c818335fba209a63149eb477e6ea3 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Tue, 25 Apr 2017 13:54:31 +0100 Subject: [PATCH 02/11] Add missing include Fixes compilation on non-glibc platforms, e.g., FreeBSD and Alpine Linux. --- kthread.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kthread.c b/kthread.c index 1b13494..780de19 100644 --- a/kthread.c +++ b/kthread.c @@ -1,4 +1,5 @@ #include +#include #include #include From 1972a978133110e36af05a6bc2e27881f51443c4 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Wed, 10 May 2017 19:47:12 +0100 Subject: [PATCH 03/11] Only realloc(pac,...) if it needs to be made larger Similarly to the realloc(pac,...) within add1(), only bother to call realloc() if appending the reverse complemented sequence requires more space than is currently in the pac/m_pac buffer. Avoids realloc(pac,0) (and a "Failed to allocate 0 bytes at bntseq.c" message from wrap_realloc()) in the corner case of an empty reference FASTA file. Fixes #54. --- bntseq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bntseq.c b/bntseq.c index 465e383..65f7e93 100644 --- a/bntseq.c +++ b/bntseq.c @@ -299,9 +299,9 @@ int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only) // read sequences while (kseq_read(seq) >= 0) pac = add1(seq, bns, pac, &m_pac, &m_seqs, &m_holes, &q); if (!for_only) { // add the reverse complemented sequence - m_pac = (bns->l_pac * 2 + 3) / 4 * 4; - pac = realloc(pac, m_pac/4); - memset(pac + (bns->l_pac+3)/4, 0, (m_pac - (bns->l_pac+3)/4*4) / 4); + int64_t ll_pac = (bns->l_pac * 2 + 3) / 4 * 4; + if (ll_pac > m_pac) pac = realloc(pac, ll_pac/4); + memset(pac + (bns->l_pac+3)/4, 0, (ll_pac - (bns->l_pac+3)/4*4) / 4); for (l = bns->l_pac - 1; l >= 0; --l, ++bns->l_pac) _set_pac(pac, bns->l_pac, 3-_get_pac(pac, l)); } From 6a5caf764f4123b49fff8504291c72c372705514 Mon Sep 17 00:00:00 2001 From: Gwen Ives Date: Tue, 9 Dec 2014 10:24:46 +0100 Subject: [PATCH 04/11] Fixed BWTIncFree() memory leaks [Based on PR #37 with the additions below.] Don't free non-malloced items in BWTFree(). If BWTCreate() is ever called with a non-NULL decodeTable, BWTFree() will need to not free decodeTable -- see FIXME comment. Close packedFile in BWTIncConstructFromPacked() in the normal case. Ignore it in error cases, as they immediately call exit() anyway. --- bwt_gen.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bwt_gen.c b/bwt_gen.c index 76f28c9..5b563d0 100644 --- a/bwt_gen.c +++ b/bwt_gen.c @@ -338,6 +338,7 @@ BWT *BWTCreate(const bgint_t textLength, unsigned int *decodeTable) bwt->decodeTable = (unsigned*)calloc(DNA_OCC_CNT_TABLE_SIZE_IN_WORD, sizeof(unsigned int)); GenerateDNAOccCountTable(bwt->decodeTable); } else { + // FIXME Prevent BWTFree() from freeing decodeTable in this case bwt->decodeTable = decodeTable; } @@ -1538,6 +1539,8 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxB (long)bwtInc->numberOfIterationDone, (long)processedTextLength); } } + + fclose(packedFile); return bwtInc; } @@ -1545,8 +1548,6 @@ void BWTFree(BWT *bwt) { if (bwt == 0) return; free(bwt->cumulativeFreq); - free(bwt->bwtCode); - free(bwt->occValue); free(bwt->occValueMajor); free(bwt->decodeTable); free(bwt); @@ -1555,8 +1556,10 @@ void BWTFree(BWT *bwt) void BWTIncFree(BWTInc *bwtInc) { if (bwtInc == 0) return; - free(bwtInc->bwt); + BWTFree(bwtInc->bwt); free(bwtInc->workingMemory); + free(bwtInc->cumulativeCountInCurrentBuild); + free(bwtInc->packedShift); free(bwtInc); } From ab3a92bc736ae96518895275df6b656724ee6b88 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Mon, 26 Jun 2017 10:45:13 +0100 Subject: [PATCH 05/11] Prevent Clang warnings on abs() and fabs() calls In the bwa.c and bwase.c calls, rlen is an int64_t returned from bns_get_seq() and is the number of reference bases covered by the alignment; l_query/len is an int and the query length of the alignment; and the result is an int given to an int parameter of ksw_global[2](). As even the result is int and as rlen is effectively bounded by the maximum length of a reference sequence, we maintain the status quo in this code and simply cast rlen to int to silence Clang's "use llabs()" (llabs() would not be a great answer given an int64_t anyway). The bwtsw2_pair.c call needs to remain fabs() so both divisions are done in floating point; cast to double to prevent Clang suggesting changing the call to integer abs(). --- bwa.c | 4 ++-- bwase.c | 4 +++- bwtsw2_pair.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/bwa.c b/bwa.c index c78ebb6..1ca5634 100644 --- a/bwa.c +++ b/bwa.c @@ -144,9 +144,9 @@ uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, max_del = (int)((double)(((l_query+1)>>1) * mat[0] - o_del) / e_del + 1.); max_gap = max_ins > max_del? max_ins : max_del; max_gap = max_gap > 1? max_gap : 1; - w = (max_gap + abs(rlen - l_query) + 1) >> 1; + w = (max_gap + abs((int)rlen - l_query) + 1) >> 1; w = w < w_? w : w_; - min_w = abs(rlen - l_query) + 3; + min_w = abs((int)rlen - l_query) + 3; w = w > min_w? w : min_w; // NW alignment if (bwa_verbose >= 4) { diff --git a/bwase.c b/bwase.c index 77c50db..37e4274 100644 --- a/bwase.c +++ b/bwase.c @@ -173,13 +173,15 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l ubyte_t *rseq; int64_t k, rb, re, rlen; int8_t mat[25]; + int w; bwa_fill_scmat(1, 3, mat); rb = *_rb; re = rb + len + ref_shift; assert(re <= l_pac); rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen); assert(re - rb == rlen); - ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW > abs(rlen - len) * 1.5? SW_BW : abs(rlen - len) * 1.5, n_cigar, &cigar32); + w = abs((int)rlen - len) * 1.5; + ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW > w? SW_BW : w, n_cigar, &cigar32); assert(*n_cigar > 0); if ((cigar32[*n_cigar - 1]&0xf) == 1) cigar32[*n_cigar - 1] = (cigar32[*n_cigar - 1]>>4<<4) | 3; // change endding ins to soft clipping if ((cigar32[0]&0xf) == 1) cigar32[0] = (cigar32[0]>>4<<4) | 3; // change beginning ins to soft clipping diff --git a/bwtsw2_pair.c b/bwtsw2_pair.c index b945128..f48dcb6 100644 --- a/bwtsw2_pair.c +++ b/bwtsw2_pair.c @@ -236,7 +236,7 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b double diff; G[0] = hits[i]->hits[0].G + a[1].G; G[1] = hits[i+1]->hits[0].G + a[0].G; - diff = fabs(G[0] - G[1]) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.); + diff = fabs((double)(G[0] - G[1])) / (opt->a + opt->b) / ((hits[i]->hits[0].len + a[1].len + hits[i+1]->hits[0].len + a[0].len) / 2.); if (diff > 0.05) a[G[0] > G[1]? 0 : 1].G = 0; } if (a[0].G == 0 || a[1].G == 0) { // one proper pair only From d28da05f36671071440c909726a4cf43a5837bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Manuel=20Abu=C3=ADn=20Mosquera?= Date: Tue, 24 Mar 2015 10:35:46 +0100 Subject: [PATCH 06/11] Added option to write output to file --- fastmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fastmap.c b/fastmap.c index 7b7145d..fcdadd1 100644 --- a/fastmap.c +++ b/fastmap.c @@ -130,7 +130,7 @@ int main_mem(int argc, char *argv[]) aux.opt = opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) { + while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:f:W:x:G:h:y:K:X:H:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == '1') no_mt_io = 1; else if (c == 'x') mode = optarg; @@ -158,6 +158,7 @@ int main_mem(int argc, char *argv[]) else if (c == 's') opt->split_width = atoi(optarg), opt0.split_width = 1; else if (c == 'G') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1; else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1; + else if (c == 'f') xreopen(optarg, "wb", stdout); else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1; else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1; else if (c == 'C') aux.copy_comment = 1; From bc58bf265d76734e650c051322485137de41a3e7 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Tue, 27 Jun 2017 17:05:44 +0100 Subject: [PATCH 07/11] Add "bwa mem -o FILE" synonym and documentation As -o is still free in the mem command, use that standard option to specify an output file (and keep -f to parallel bwase/bwape commands). Document it in the usage and man page. --- bwa.1 | 8 ++++++++ fastmap.c | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bwa.1 b/bwa.1 index 11d3854..a0b59bc 100644 --- a/bwa.1 +++ b/bwa.1 @@ -277,6 +277,14 @@ If ARG starts with @, it is interpreted as a string and gets inserted into the output SAM header; otherwise, ARG is interpreted as a file with all lines starting with @ in the file inserted into the SAM header. [null] .TP +.BI -o \ FILE +Write the output SAM file to +.IR FILE . +For compatibility with other BWA commands, this option may also be given as +.B -f +.IR FILE . +[standard ouptut] +.TP .BI -T \ INT Don't output alignment with score lower than .IR INT . diff --git a/fastmap.c b/fastmap.c index fcdadd1..c2ad90a 100644 --- a/fastmap.c +++ b/fastmap.c @@ -130,7 +130,7 @@ int main_mem(int argc, char *argv[]) aux.opt = opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:f:W:x:G:h:y:K:X:H:")) >= 0) { + while ((c = getopt(argc, argv, "51paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == '1') no_mt_io = 1; else if (c == 'x') mode = optarg; @@ -158,7 +158,7 @@ int main_mem(int argc, char *argv[]) else if (c == 's') opt->split_width = atoi(optarg), opt0.split_width = 1; else if (c == 'G') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1; else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1; - else if (c == 'f') xreopen(optarg, "wb", stdout); + else if (c == 'o' || c == 'f') xreopen(optarg, "wb", stdout); else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1; else if (c == 'y') opt->max_mem_intv = atol(optarg), opt0.max_mem_intv = 1; else if (c == 'C') aux.copy_comment = 1; @@ -266,6 +266,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -p smart pairing (ignoring in2.fq)\n"); fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n"); fprintf(stderr, " -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null]\n"); + fprintf(stderr, " -o FILE sam file to output results to [stdout]\n"); fprintf(stderr, " -j treat ALT contigs as part of the primary assembly (i.e. ignore .alt file)\n"); fprintf(stderr, " -5 always take the leftmost alignment on a read as primary\n"); fprintf(stderr, "\n"); From 5ede7eb98172ae20967223065c7bd35f0f4804ce Mon Sep 17 00:00:00 2001 From: "Rintze M. Zelle" Date: Fri, 10 Jun 2016 11:15:53 -0400 Subject: [PATCH 08/11] Update README.md Correct header formatting, fix some typos in headers --- README.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 9ac49bb..2d18696 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![Build Status](https://travis-ci.org/lh3/bwa.svg?branch=dev)](https://travis-ci.org/lh3/bwa) [![Build Status](https://drone.io/github.com/lh3/bwa/status.png)](https://drone.io/github.com/lh3/bwa/latest) -##Getting started +## Getting started git clone https://github.com/lh3/bwa.git cd bwa; make @@ -8,7 +8,7 @@ ./bwa mem ref.fa read-se.fq.gz | gzip -3 > aln-se.sam.gz ./bwa mem ref.fa read1.fq read2.fq | gzip -3 > aln-pe.sam.gz -##Introduction +## Introduction BWA is a software package for mapping DNA sequences against a large reference genome, such as the human genome. It consists of three algorithms: @@ -24,7 +24,7 @@ reference genome (the **index** command). Alignment algorithms are invoked with different sub-commands: **aln/samse/sampe** for BWA-backtrack, **bwasw** for BWA-SW and **mem** for the BWA-MEM algorithm. -##Availability +## Availability BWA is released under [GPLv3][1]. The latest source code is [freely available at github][2]. Released packages can [be downloaded][3] at @@ -37,7 +37,7 @@ In addition to BWA, this self-consistent package also comes with bwa-associated and 3rd-party tools for proper BAM-to-FASTQ conversion, mapping to ALT contigs, adapter triming, duplicate marking, HLA typing and associated data files. -##Seeking helps +## Seeking help The detailed usage is described in the man page available together with the source code. You can use `man ./bwa.1` to view the man page in a terminal. The @@ -46,7 +46,7 @@ have questions about BWA, you may [sign up the mailing list][6] and then send the questions to [bio-bwa-help@sourceforge.net][7]. You may also ask questions in forums such as [BioStar][8] and [SEQanswers][9]. -##Citing BWA +## Citing BWA * Li H. and Durbin R. (2009) Fast and accurate short read alignment with Burrows-Wheeler transform. *Bioinformatics*, **25**, 1754-1760. [PMID: @@ -63,7 +63,7 @@ in forums such as [BioStar][8] and [SEQanswers][9]. Please note that the last reference is a preprint hosted at [arXiv.org][13]. I do not have plan to submit it to a peer-reviewed journal in the near future. -##Frequently asked questions (FAQs) +## Frequently asked questions (FAQs) 1. [What types of data does BWA work with?](#type) 2. [Why does a read appear multiple times in the output SAM?](#multihit) @@ -73,7 +73,7 @@ do not have plan to submit it to a peer-reviewed journal in the near future. 6. [Does BWA work with ALT contigs in the GRCh38 release?](#altctg) 7. [Can I just run BWA-MEM against GRCh38+ALT without post-processing?](#postalt) -####1. What types of data does BWA work with? +#### 1. What types of data does BWA work with? BWA works with a variety types of DNA sequence data, though the optimal algorithm and setting may vary. The following list gives the recommended @@ -108,7 +108,7 @@ errors given longer query sequences as the chance of missing all seeds is small. As is shown above, with non-default settings, BWA-MEM works with Oxford Nanopore reads with a sequencing error rate over 20%. -####2. Why does a read appear multiple times in the output SAM? +#### 2. Why does a read appear multiple times in the output SAM? BWA-SW and BWA-MEM perform local alignments. If there is a translocation, a gene fusion or a long deletion, a read bridging the break point may have two hits, @@ -116,18 +116,18 @@ occupying two lines in the SAM output. With the default setting of BWA-MEM, one and only one line is primary and is soft clipped; other lines are tagged with 0x800 SAM flag (supplementary alignment) and are hard clipped. -####3. Does BWA work on reference sequences longer than 4GB in total? +#### 3. Does BWA work on reference sequences longer than 4GB in total? Yes. Since 0.6.x, all BWA algorithms work with a genome with total length over 4GB. However, individual chromosome should not be longer than 2GB. -####4. Why can one read in a pair has high mapping quality but the other has zero? +#### 4. Why can one read in a pair have a high mapping quality but the other has zero? This is correct. Mapping quality is assigned for individual read, not for a read pair. It is possible that one read can be mapped unambiguously, but its mate falls in a tandem repeat and thus its accurate position cannot be determined. -####5. How can a BWA-backtrack alignment stands out of the end of a chromosome? +#### 5. How can a BWA-backtrack alignment stand out of the end of a chromosome? Internally BWA concatenates all reference sequences into one long sequence. A read may be mapped to the junction of two adjacent reference sequences. In this @@ -135,7 +135,7 @@ case, BWA-backtrack will flag the read as unmapped (0x4), but you will see position, CIGAR and all the tags. A similar issue may occur to BWA-SW alignment as well. BWA-MEM does not have this problem. -####6. Does BWA work with ALT contigs in the GRCh38 release? +#### 6. Does BWA work with ALT contigs in the GRCh38 release? Yes, since 0.7.11, BWA-MEM officially supports mapping to GRCh38+ALT. BWA-backtrack and BWA-SW don't properly support ALT mapping as of now. Please @@ -143,7 +143,7 @@ see [README-alt.md][18] for details. Briefly, it is recommended to use [bwakit][17], the binary release of BWA, for generating the reference genome and for mapping. -####7. Can I just run BWA-MEM against GRCh38+ALT without post-processing? +#### 7. Can I just run BWA-MEM against GRCh38+ALT without post-processing? If you are not interested in hits to ALT contigs, it is okay to run BWA-MEM without post-processing. The alignments produced this way are very close to From a61b1dc6108b68d412055306bfca0fb133be37f3 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Thu, 29 Jun 2017 15:36:46 +0100 Subject: [PATCH 09/11] Expand tot_seqs variable to long long This counter is only used in a log message. Fixes #131. --- bwape.c | 5 +++-- bwase.c | 5 +++-- bwtaln.c | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/bwape.c b/bwape.c index f2d3d8e..fa4f7f7 100644 --- a/bwape.c +++ b/bwape.c @@ -624,7 +624,8 @@ ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs, void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt, const char *rg_line) { extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa); - int i, j, n_seqs, tot_seqs = 0; + int i, j, n_seqs; + long long tot_seqs = 0; bwa_seq_t *seqs[2]; bwa_seqio_t *ks[2]; clock_t t; @@ -711,7 +712,7 @@ void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const f for (j = 0; j < 2; ++j) bwa_free_read_seq(n_seqs, seqs[j]); - fprintf(stderr, "[bwa_sai2sam_pe_core] %d sequences have been processed.\n", tot_seqs); + fprintf(stderr, "[bwa_sai2sam_pe_core] %lld sequences have been processed.\n", tot_seqs); last_ii = ii; } diff --git a/bwase.c b/bwase.c index 37e4274..18e8671 100644 --- a/bwase.c +++ b/bwase.c @@ -507,7 +507,8 @@ void bwase_initialize() void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_fa, int n_occ, const char *rg_line) { extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa); - int i, n_seqs, tot_seqs = 0, m_aln; + int i, n_seqs, m_aln; + long long tot_seqs = 0; bwt_aln1_t *aln = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; @@ -565,7 +566,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); bwa_free_read_seq(n_seqs, seqs); - fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); + fprintf(stderr, "[bwa_aln_core] %lld sequences have been processed.\n", tot_seqs); } // destroy diff --git a/bwtaln.c b/bwtaln.c index 20b01cd..a348fdf 100644 --- a/bwtaln.c +++ b/bwtaln.c @@ -158,7 +158,8 @@ bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa) void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) { - int i, n_seqs, tot_seqs = 0; + int i, n_seqs; + long long tot_seqs = 0; bwa_seq_t *seqs; bwa_seqio_t *ks; clock_t t; @@ -218,7 +219,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); bwa_free_read_seq(n_seqs, seqs); - fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs); + fprintf(stderr, "[bwa_aln_core] %lld sequences have been processed.\n", tot_seqs); } // destroy From 690649872b456f17e8f5cf2d52108f8e7eaf9d96 Mon Sep 17 00:00:00 2001 From: John Marshall Date: Fri, 30 Jun 2017 12:46:56 +0100 Subject: [PATCH 10/11] Copy the whole kstring_t even if it contains NULs FASTQ files containing NULs are invalid but should not cause bwa to crash, as it does if the quality line contains a NUL. Fixes #122. --- bwa.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/bwa.c b/bwa.c index 1ca5634..bc9d373 100644 --- a/bwa.c +++ b/bwa.c @@ -30,13 +30,23 @@ static inline void trim_readno(kstring_t *s) s->l -= 2, s->s[s->l] = 0; } +static inline char *dupkstring(const kstring_t *str, int dupempty) +{ + char *s = (str->l > 0 || dupempty)? malloc(str->l + 1) : NULL; + if (!s) return NULL; + + memcpy(s, str->s, str->l); + s[str->l] = '\0'; + return s; +} + static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s) { // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice - s->name = strdup(ks->name.s); - s->comment = ks->comment.l? strdup(ks->comment.s) : 0; - s->seq = strdup(ks->seq.s); - s->qual = ks->qual.l? strdup(ks->qual.s) : 0; - s->l_seq = strlen(s->seq); + s->name = dupkstring(&ks->name, 1); + s->comment = dupkstring(&ks->comment, 0); + s->seq = dupkstring(&ks->seq, 1); + s->qual = dupkstring(&ks->qual, 0); + s->l_seq = ks->seq.l; } bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_) From c56c2e4677d64cc301167b7345b88210e83762e0 Mon Sep 17 00:00:00 2001 From: Andreas Tille Date: Sun, 12 Jul 2015 14:59:23 +0200 Subject: [PATCH 11/11] Several trivial Debian patches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Andreas Tille] Check exact number of arguments of bwtupdate Fix spelling [Fabian Klötzl] change printing as size_t is unsigned --- bwtindex.c | 2 +- fastmap.c | 2 +- malloc_wrap.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bwtindex.c b/bwtindex.c index 571d087..e1322f8 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -175,7 +175,7 @@ void bwt_bwtupdate_core(bwt_t *bwt) int bwa_bwtupdate(int argc, char *argv[]) // the "bwtupdate" command { bwt_t *bwt; - if (argc < 2) { + if (argc != 2) { fprintf(stderr, "Usage: bwa bwtupdate \n"); return 1; } diff --git a/fastmap.c b/fastmap.c index c2ad90a..0619999 100644 --- a/fastmap.c +++ b/fastmap.c @@ -258,7 +258,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [%d,%d]\n", opt->e_del, opt->e_ins); fprintf(stderr, " -L INT[,INT] penalty for 5'- and 3'-end clipping [%d,%d]\n", opt->pen_clip5, opt->pen_clip3); fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n\n", opt->pen_unpaired); - fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n"); + fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overridden [null]\n"); fprintf(stderr, " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref)\n"); fprintf(stderr, " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref)\n"); fprintf(stderr, " intractg: -B9 -O16 -L5 (intra-species contigs to ref)\n"); diff --git a/malloc_wrap.c b/malloc_wrap.c index 100b8cb..6165e04 100644 --- a/malloc_wrap.c +++ b/malloc_wrap.c @@ -13,7 +13,7 @@ void *wrap_calloc(size_t nmemb, size_t size, void *p = calloc(nmemb, size); if (NULL == p) { fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + "[%s] Failed to allocate %zu bytes at %s line %u: %s\n", func, nmemb * size, file, line, strerror(errno)); exit(EXIT_FAILURE); } @@ -25,7 +25,7 @@ void *wrap_malloc(size_t size, void *p = malloc(size); if (NULL == p) { fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + "[%s] Failed to allocate %zu bytes at %s line %u: %s\n", func, size, file, line, strerror(errno)); exit(EXIT_FAILURE); } @@ -37,7 +37,7 @@ void *wrap_realloc(void *ptr, size_t size, void *p = realloc(ptr, size); if (NULL == p) { fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + "[%s] Failed to allocate %zu bytes at %s line %u: %s\n", func, size, file, line, strerror(errno)); exit(EXIT_FAILURE); } @@ -49,7 +49,7 @@ char *wrap_strdup(const char *s, char *p = strdup(s); if (NULL == p) { fprintf(stderr, - "[%s] Failed to allocate %zd bytes at %s line %u: %s\n", + "[%s] Failed to allocate %zu bytes at %s line %u: %s\n", func, strlen(s), file, line, strerror(errno)); exit(EXIT_FAILURE); }