138 lines
4.7 KiB
C++
138 lines
4.7 KiB
C++
#include <cstdio>
|
|
#include <cstring>
|
|
|
|
#include "bwase.h"
|
|
#include "bwa_gateway.h"
|
|
|
|
BWA::BWA(const char* ann_filename,
|
|
const char* amb_filename,
|
|
const char* pac_filename,
|
|
const char* forward_bwt_filename,
|
|
const char* forward_sa_filename,
|
|
const char* reverse_bwt_filename,
|
|
const char* reverse_sa_filename)
|
|
{
|
|
bns = bns_restore_core(ann_filename,amb_filename,pac_filename);
|
|
bwts[0] = bwt_restore_bwt(forward_bwt_filename);
|
|
bwt_restore_sa(forward_sa_filename, bwts[0]);
|
|
bwts[1] = bwt_restore_bwt(reverse_bwt_filename);
|
|
bwt_restore_sa(reverse_sa_filename, bwts[1]);
|
|
load_default_options();
|
|
}
|
|
|
|
BWA::~BWA() {
|
|
bwt_destroy(bwts[0]);
|
|
bwt_destroy(bwts[1]);
|
|
}
|
|
|
|
void BWA::align(const char* bases, const unsigned read_length, Alignment*& alignments, unsigned& num_alignments)
|
|
{
|
|
bwa_seq_t sequence;
|
|
initialize_sequence(sequence);
|
|
copy_bases_into_sequence(sequence, bases, read_length, true);
|
|
|
|
// Calculate the suffix array interval for each sequence, storing the result in sequence.aln (and sequence.n_aln).
|
|
// This method will destroy the contents of seq and rseq.
|
|
bwa_cal_sa_reg_gap(0,bwts,1,&sequence,&options);
|
|
|
|
// Translate suffix array indices into exactly how many alignments have been found.
|
|
bwa_aln2seq(sequence.n_aln,sequence.aln,&sequence);
|
|
|
|
// Calculate and refine the position for each alignment. This position may be inaccurate
|
|
// if the read contains indels, etc. Refinement requires the original sequences in the proper order.
|
|
copy_bases_into_sequence(sequence, bases, read_length, false);
|
|
create_alignments(sequence, alignments, num_alignments);
|
|
}
|
|
|
|
void BWA::load_default_options()
|
|
{
|
|
options.s_mm = 3;
|
|
options.s_gapo = 11;
|
|
options.s_gape = 4;
|
|
options.mode = 3;
|
|
options.indel_end_skip = 5;
|
|
options.max_del_occ = 10;
|
|
options.max_entries = 2000000;
|
|
options.fnr = 0.04;
|
|
options.max_diff = -1;
|
|
options.max_gapo = 1;
|
|
options.max_gape = 6;
|
|
options.max_seed_diff = 2;
|
|
options.seed_len = 2147483647;
|
|
options.n_threads = 1;
|
|
options.max_top2 = 30;
|
|
options.trim_qual = 0;
|
|
}
|
|
|
|
void BWA::initialize_sequence(bwa_seq_t& sequence)
|
|
{
|
|
sequence.tid = -1;
|
|
sequence.name = 0;
|
|
sequence.qual = 0;
|
|
|
|
sequence.seq = NULL;
|
|
sequence.rseq = NULL;
|
|
|
|
sequence.cigar = 0;
|
|
sequence.n_cigar = NULL;
|
|
}
|
|
|
|
void BWA::copy_bases_into_sequence(bwa_seq_t& sequence, const char* bases, unsigned read_length, bool reverse)
|
|
{
|
|
// seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap
|
|
sequence.seq = new ubyte_t[read_length];
|
|
sequence.rseq = new ubyte_t[read_length];
|
|
for(unsigned i = 0; i < read_length; i++) sequence.seq[i] = nst_nt4_table[(unsigned)bases[i]];
|
|
memcpy(sequence.rseq,sequence.seq,read_length);
|
|
if(reverse) {
|
|
seq_reverse(read_length,sequence.seq,0);
|
|
seq_reverse(read_length,sequence.rseq,1);
|
|
}
|
|
sequence.full_len = sequence.len = read_length;
|
|
}
|
|
|
|
void BWA::create_alignments(bwa_seq_t& sequence, Alignment*& alignments, unsigned& num_alignments) {
|
|
num_alignments = 0;
|
|
for(unsigned i = 0; i < (unsigned)sequence.n_aln; i++)
|
|
num_alignments += (sequence.aln + i)->l - (sequence.aln + i)->k + 1;
|
|
|
|
alignments = new Alignment[num_alignments];
|
|
unsigned alignment_idx = 0;
|
|
|
|
// backup existing alignment blocks.
|
|
bwt_aln1_t* alignment_blocks = sequence.aln;
|
|
int num_alignment_blocks = sequence.n_aln;
|
|
|
|
for(unsigned alignment_block_idx = 0; alignment_block_idx < (unsigned)num_alignment_blocks; alignment_block_idx++) {
|
|
// Stub in a 'working' alignment block, so that only the desired alignment is local-aligned.
|
|
const bwt_aln1_t* alignment_block = alignment_blocks + alignment_block_idx;
|
|
bwt_aln1_t working_alignment_block = *alignment_block;
|
|
|
|
// Loop through all alignments, aligning each one individually.
|
|
for(unsigned sa_idx = alignment_block->k; sa_idx <= alignment_block->l; sa_idx++) {
|
|
working_alignment_block.k = working_alignment_block.l = sa_idx;
|
|
sequence.aln = &working_alignment_block;
|
|
sequence.n_aln = 1;
|
|
|
|
// Calculate the local alignment.
|
|
bwa_cal_pac_pos_core(bwts[0],bwts[1],&sequence,options.max_diff,options.fnr);
|
|
bwa_refine_gapped(bns, 1, &sequence, 0, NULL);
|
|
|
|
// Copy the local alignment data into the alignment object.
|
|
Alignment& alignment = *(alignments + alignment_idx);
|
|
alignment.type = sequence.type;
|
|
bns_coor_pac2real(bns, sequence.pos, pos_end(&sequence) - sequence.pos, &alignment.contig);
|
|
alignment.pos = sequence.pos - bns->anns[alignment.contig].offset + 1;
|
|
alignment.negative_strand = sequence.strand;
|
|
alignment.cigar = sequence.cigar;
|
|
alignment.n_cigar = sequence.n_cigar;
|
|
alignment.mapQ = sequence.mapQ;
|
|
|
|
alignment_idx++;
|
|
}
|
|
}
|
|
|
|
sequence.aln = alignment_blocks;
|
|
sequence.n_aln = num_alignment_blocks;
|
|
}
|