r898: read the index into a single memory block
Prepare for shared memory. Not used now.
This commit is contained in:
parent
c44357926f
commit
c5e859b49f
15
README.md
15
README.md
|
|
@ -71,11 +71,11 @@ algorithm and setting may vary. The following list gives the recommended
|
|||
settings:
|
||||
|
||||
* Illumina/454/IonTorrent single-end reads longer than ~70bp or assembly
|
||||
contigs up to a few megabases mapped to a close related reference genome:
|
||||
contigs up to a few megabases mapped to a closely related reference genome:
|
||||
|
||||
bwa mem ref.fa reads.fq > aln.sam
|
||||
|
||||
* Illumina single-end reads no longer than ~70bp:
|
||||
* Illumina single-end reads shorter than ~70bp:
|
||||
|
||||
bwa aln ref.fa reads.fq > reads.sai; bwa samse ref.fa reads.sai reads.fq > aln-se.sam
|
||||
|
||||
|
|
@ -83,24 +83,21 @@ settings:
|
|||
|
||||
bwa mem ref.fa read1.fq read2.fq > aln-pe.sam
|
||||
|
||||
* Illumina paired-end reads no longer than ~70bp:
|
||||
* Illumina paired-end reads shorter than ~70bp:
|
||||
|
||||
bwa aln ref.fa read1.fq > read1.sai; bwa aln ref.fa read2.fq > read2.sai
|
||||
bwa sampe ref.fa read1.sai read2.sai read1.fq read2.fq > aln-pe.sam
|
||||
|
||||
* PacBio subreads to a reference genome:
|
||||
* PacBio subreads or Oxford Nanopore reads to a reference genome:
|
||||
|
||||
bwa mem -x pacbio ref.fa reads.fq > aln.sam
|
||||
|
||||
* Oxford Nanopore reads to a reference genome:
|
||||
|
||||
bwa mem -x ont2d ref.fa reads.fq > aln.sam
|
||||
|
||||
BWA-MEM is recommended for query sequences longer than ~70bp for a variety of
|
||||
error rates (or sequence divergence). Generally, BWA-MEM is more tolerant with
|
||||
errors given longer query sequences as the chance of missing all seeds is small.
|
||||
As is shown above, with non-default settings, BWA-MEM works with PacBio subreads
|
||||
with a sequencing error rate as high as ~15%.
|
||||
As is shown above, with non-default settings, BWA-MEM works with Oxford Nanopore
|
||||
reads with a sequencing error rate over 20%.
|
||||
|
||||
####<a name="multihit"></a>2. Why does a read appear multiple times in the output SAM?
|
||||
|
||||
|
|
|
|||
74
bwa.c
74
bwa.c
|
|
@ -259,12 +259,80 @@ bwaidx_t *bwa_idx_load(const char *hint, int which)
|
|||
void bwa_idx_destroy(bwaidx_t *idx)
|
||||
{
|
||||
if (idx == 0) return;
|
||||
if (idx->bwt) bwt_destroy(idx->bwt);
|
||||
if (idx->bns) bns_destroy(idx->bns);
|
||||
if (idx->pac) free(idx->pac);
|
||||
if (idx->mem == 0) {
|
||||
if (idx->bwt) bwt_destroy(idx->bwt);
|
||||
if (idx->bns) bns_destroy(idx->bns);
|
||||
if (idx->pac) free(idx->pac);
|
||||
} else free((uint8_t*)idx->mem);
|
||||
free(idx);
|
||||
}
|
||||
|
||||
int bwa_mem2idx(int64_t l_mem, const uint8_t *mem, bwaidx_t *idx)
|
||||
{
|
||||
int64_t k = 0, x;
|
||||
int i;
|
||||
|
||||
// generate idx->bwt
|
||||
x = sizeof(bwt_t); idx->bwt = (bwt_t*)(mem + k); k += x;
|
||||
x = idx->bwt->n_sa * sizeof(bwtint_t); idx->bwt->sa = (bwtint_t*)(mem + k); k += x;
|
||||
x = idx->bwt->bwt_size * 4; idx->bwt->bwt = (uint32_t*)(mem + k); k += x;
|
||||
|
||||
// generate idx->bns and idx->pac
|
||||
x = sizeof(bntseq_t); idx->bns = (bntseq_t*)(mem + k); k += x;
|
||||
x = idx->bns->n_holes * sizeof(bntamb1_t); idx->bns->ambs = (bntamb1_t*)(mem + k); k += x;
|
||||
x = idx->bns->n_seqs * sizeof(bntann1_t); idx->bns->anns = (bntann1_t*)(mem + k); k += x;
|
||||
for (i = 0; i < idx->bns->n_seqs; ++i) {
|
||||
idx->bns->anns[i].name = (char*)(mem + k); k += strlen(idx->bns->anns[i].name) + 1;
|
||||
idx->bns->anns[i].anno = (char*)(mem + k); k += strlen(idx->bns->anns[i].anno) + 1;
|
||||
}
|
||||
idx->pac = (uint8_t*)(mem + k);
|
||||
|
||||
idx->l_mem = k; idx->mem = mem;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bwa_idx2mem(bwaidx_t *idx)
|
||||
{
|
||||
int i;
|
||||
int64_t k = 0, x, tmp;
|
||||
uint8_t *mem = 0;
|
||||
|
||||
// copy idx->bwt
|
||||
mem = malloc(sizeof(bwt_t) + idx->bwt->n_sa * sizeof(bwtint_t));
|
||||
x = sizeof(bwt_t); memcpy(mem + k, idx->bwt, x); k += x;
|
||||
x = idx->bwt->n_sa * sizeof(bwtint_t); memcpy(mem + k, idx->bwt->sa, x); k += x;
|
||||
free(idx->bwt->sa);
|
||||
mem = realloc(mem, k + idx->bwt->bwt_size * 4);
|
||||
x = idx->bwt->bwt_size * 4; memcpy(mem + k, idx->bwt->bwt, x); k += x;
|
||||
free(idx->bwt->bwt);
|
||||
free(idx->bwt); idx->bwt = 0;
|
||||
|
||||
// copy idx->bns
|
||||
tmp = idx->bns->n_seqs * sizeof(bntann1_t) + idx->bns->n_holes * sizeof(bntamb1_t);
|
||||
for (i = 0; i < idx->bns->n_seqs; ++i) // compute the size of heap-allocated memory
|
||||
tmp += strlen(idx->bns->anns[i].name) + strlen(idx->bns->anns[i].anno) + 2;
|
||||
mem = realloc(mem, k + sizeof(bntseq_t) + tmp);
|
||||
x = sizeof(bntseq_t); memcpy(mem + k, idx->bns, x); k += x;
|
||||
x = idx->bns->n_holes * sizeof(bntamb1_t); memcpy(mem + k, idx->bns->ambs, x); k += x;
|
||||
free(idx->bns->ambs);
|
||||
x = idx->bns->n_seqs * sizeof(bntann1_t); memcpy(mem + k, idx->bns->anns, x); k += x;
|
||||
for (i = 0; i < idx->bns->n_seqs; ++i) {
|
||||
x = strlen(idx->bns->anns[i].name) + 1; memcpy(mem + k, idx->bns->anns[i].name, x); k += x;
|
||||
x = strlen(idx->bns->anns[i].anno) + 1; memcpy(mem + k, idx->bns->anns[i].anno, x); k += x;
|
||||
free(idx->bns->anns[i].name); free(idx->bns->anns[i].anno);
|
||||
}
|
||||
free(idx->bns->anns);
|
||||
|
||||
// copy idx->pac
|
||||
x = idx->bns->l_pac/4+1;
|
||||
mem = realloc(mem, k + x);
|
||||
memcpy(mem + k, idx->pac, x); k += x;
|
||||
free(idx->bns); idx->bns = 0;
|
||||
free(idx->pac); idx->pac = 0;
|
||||
|
||||
return bwa_mem2idx(k, mem, idx);
|
||||
}
|
||||
|
||||
/***********************
|
||||
* SAM header routines *
|
||||
***********************/
|
||||
|
|
|
|||
5
bwa.h
5
bwa.h
|
|
@ -14,6 +14,9 @@ typedef struct {
|
|||
bwt_t *bwt; // FM-index
|
||||
bntseq_t *bns; // information on the reference sequences
|
||||
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base
|
||||
|
||||
int64_t l_mem;
|
||||
const uint8_t *mem;
|
||||
} bwaidx_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -39,6 +42,8 @@ extern "C" {
|
|||
|
||||
bwaidx_t *bwa_idx_load(const char *hint, int which);
|
||||
void bwa_idx_destroy(bwaidx_t *idx);
|
||||
int bwa_idx2mem(bwaidx_t *idx);
|
||||
int bwa_mem2idx(int64_t l_mem, const uint8_t *mem, bwaidx_t *idx);
|
||||
|
||||
void bwa_print_sam_hdr(const bntseq_t *bns, const char *rg_line);
|
||||
char *bwa_set_rg(const char *s);
|
||||
|
|
|
|||
Loading…
Reference in New Issue