2009-10-29 05:37:49 +08:00
# include <cstdio>
# include <cstring>
# include "bwase.h"
# include "bwa_gateway.h"
BWA : : BWA ( const char * ann_filename ,
const char * amb_filename ,
const char * pac_filename ,
const char * forward_bwt_filename ,
const char * forward_sa_filename ,
const char * reverse_bwt_filename ,
const char * reverse_sa_filename )
{
2009-11-03 08:01:55 +08:00
// Load the bns (?) and reference
2009-10-29 05:37:49 +08:00
bns = bns_restore_core ( ann_filename , amb_filename , pac_filename ) ;
2009-11-03 08:01:55 +08:00
reference = new ubyte_t [ bns - > l_pac / 4 + 1 ] ;
rewind ( bns - > fp_pac ) ;
fread ( reference , 1 , bns - > l_pac / 4 + 1 , bns - > fp_pac ) ;
// Load the BWTs (both directions) and suffix arrays (both directions)
2009-10-29 05:37:49 +08:00
bwts [ 0 ] = bwt_restore_bwt ( forward_bwt_filename ) ;
bwt_restore_sa ( forward_sa_filename , bwts [ 0 ] ) ;
bwts [ 1 ] = bwt_restore_bwt ( reverse_bwt_filename ) ;
bwt_restore_sa ( reverse_sa_filename , bwts [ 1 ] ) ;
load_default_options ( ) ;
2009-11-04 22:33:23 +08:00
// initialize the bwase subsystem
bwase_initialize ( ) ;
2009-10-29 05:37:49 +08:00
}
BWA : : ~ BWA ( ) {
2009-11-03 08:01:55 +08:00
delete [ ] reference ;
bns_destroy ( bns ) ;
2009-10-29 05:37:49 +08:00
bwt_destroy ( bwts [ 0 ] ) ;
bwt_destroy ( bwts [ 1 ] ) ;
}
void BWA : : align ( const char * bases , const unsigned read_length , Alignment * & alignments , unsigned & num_alignments )
{
2009-10-30 00:32:43 +08:00
bwa_seq_t * sequence = create_sequence ( ) ;
2009-10-30 05:33:13 +08:00
copy_bases_into_sequence ( sequence , bases , read_length ) ;
2009-10-29 05:37:49 +08:00
// Calculate the suffix array interval for each sequence, storing the result in sequence.aln (and sequence.n_aln).
// This method will destroy the contents of seq and rseq.
2009-10-30 00:32:43 +08:00
bwa_cal_sa_reg_gap ( 0 , bwts , 1 , sequence , & options ) ;
2009-10-29 05:37:49 +08:00
// Translate suffix array indices into exactly how many alignments have been found.
2009-10-30 00:32:43 +08:00
bwa_aln2seq ( sequence - > n_aln , sequence - > aln , sequence ) ;
2009-10-29 05:37:49 +08:00
// Calculate and refine the position for each alignment. This position may be inaccurate
// if the read contains indels, etc. Refinement requires the original sequences in the proper order.
2009-10-30 05:33:13 +08:00
copy_bases_into_sequence ( sequence , bases , read_length ) ;
2009-10-29 05:37:49 +08:00
create_alignments ( sequence , alignments , num_alignments ) ;
2009-10-30 00:32:43 +08:00
bwa_free_read_seq ( 1 , sequence ) ;
2009-10-29 05:37:49 +08:00
}
void BWA : : load_default_options ( )
{
options . s_mm = 3 ;
options . s_gapo = 11 ;
options . s_gape = 4 ;
options . mode = 3 ;
options . indel_end_skip = 5 ;
options . max_del_occ = 10 ;
options . max_entries = 2000000 ;
options . fnr = 0.04 ;
options . max_diff = - 1 ;
options . max_gapo = 1 ;
options . max_gape = 6 ;
options . max_seed_diff = 2 ;
options . seed_len = 2147483647 ;
options . n_threads = 1 ;
options . max_top2 = 30 ;
options . trim_qual = 0 ;
}
2009-11-12 04:54:49 +08:00
void BWA : : set_max_edit_distance ( int edit_distance ) { options . max_diff = edit_distance ; }
void BWA : : set_max_gap_opens ( int max_gap_opens ) { options . max_gapo = max_gap_opens ; }
void BWA : : set_max_gap_extensions ( int max_gap_extensions ) { options . max_gape = max_gap_extensions ; }
void BWA : : set_disallow_indel_within_range ( int indel_range ) { options . indel_end_skip = indel_range ; printf ( " set indel end skip to %d \n " , options . indel_end_skip ) ; }
void BWA : : set_mismatch_penalty ( int penalty ) { options . s_mm = penalty ; }
void BWA : : set_gap_open_penalty ( int penalty ) { options . s_gapo = penalty ; }
void BWA : : set_gap_extension_penalty ( int penalty ) { options . s_gape = penalty ; }
2009-10-30 00:32:43 +08:00
/**
* Create a sequence with a set of reasonable initial defaults .
* Will leave seq and rseq empty .
*/
bwa_seq_t * BWA : : create_sequence ( )
2009-10-29 05:37:49 +08:00
{
2009-10-30 00:32:43 +08:00
bwa_seq_t * sequence = new bwa_seq_t ;
sequence - > tid = - 1 ;
sequence - > name = 0 ;
sequence - > seq = NULL ;
sequence - > rseq = NULL ;
sequence - > qual = 0 ;
sequence - > aln = 0 ;
sequence - > md = 0 ;
2009-10-29 05:37:49 +08:00
2009-10-30 00:32:43 +08:00
sequence - > cigar = NULL ;
sequence - > n_cigar = 0 ;
2009-10-29 05:37:49 +08:00
2009-10-30 00:32:43 +08:00
return sequence ;
2009-10-29 05:37:49 +08:00
}
2009-10-30 05:33:13 +08:00
void BWA : : copy_bases_into_sequence ( bwa_seq_t * sequence , const char * bases , const unsigned read_length )
2009-10-29 05:37:49 +08:00
{
// seq, rseq will ultimately be freed by bwa_cal_sa_reg_gap
2009-10-30 00:32:43 +08:00
sequence - > seq = new ubyte_t [ read_length ] ;
sequence - > rseq = new ubyte_t [ read_length ] ;
for ( unsigned i = 0 ; i < read_length ; i + + ) sequence - > seq [ i ] = nst_nt4_table [ ( unsigned ) bases [ i ] ] ;
memcpy ( sequence - > rseq , sequence - > seq , read_length ) ;
2009-10-30 05:33:13 +08:00
// BWA expects the read bases to arrive reversed.
seq_reverse ( read_length , sequence - > seq , 0 ) ;
seq_reverse ( read_length , sequence - > rseq , 1 ) ;
2009-10-30 00:32:43 +08:00
sequence - > full_len = sequence - > len = read_length ;
2009-10-29 05:37:49 +08:00
}
2009-10-30 00:32:43 +08:00
void BWA : : create_alignments ( bwa_seq_t * sequence , Alignment * & alignments , unsigned & num_alignments ) {
2009-10-29 05:37:49 +08:00
num_alignments = 0 ;
2009-10-30 00:32:43 +08:00
for ( unsigned i = 0 ; i < ( unsigned ) sequence - > n_aln ; i + + )
num_alignments + = ( sequence - > aln + i ) - > l - ( sequence - > aln + i ) - > k + 1 ;
2009-10-29 05:37:49 +08:00
alignments = new Alignment [ num_alignments ] ;
unsigned alignment_idx = 0 ;
// backup existing alignment blocks.
2009-10-30 00:32:43 +08:00
bwt_aln1_t * alignment_blocks = sequence - > aln ;
int num_alignment_blocks = sequence - > n_aln ;
2009-10-29 05:37:49 +08:00
for ( unsigned alignment_block_idx = 0 ; alignment_block_idx < ( unsigned ) num_alignment_blocks ; alignment_block_idx + + ) {
// Stub in a 'working' alignment block, so that only the desired alignment is local-aligned.
const bwt_aln1_t * alignment_block = alignment_blocks + alignment_block_idx ;
bwt_aln1_t working_alignment_block = * alignment_block ;
// Loop through all alignments, aligning each one individually.
for ( unsigned sa_idx = alignment_block - > k ; sa_idx < = alignment_block - > l ; sa_idx + + ) {
working_alignment_block . k = working_alignment_block . l = sa_idx ;
2009-10-30 00:32:43 +08:00
sequence - > aln = & working_alignment_block ;
sequence - > n_aln = 1 ;
2009-10-29 05:37:49 +08:00
2009-11-02 12:02:09 +08:00
sequence - > sa = sa_idx ;
sequence - > strand = alignment_block - > a ;
sequence - > score = alignment_block - > score ;
// Each time through bwa_refine_gapped, seq gets reversed. Revert the reverse.
// TODO: Fix the interface to bwa_refine_gapped so its easier to work with.
if ( alignment_idx > 0 )
seq_reverse ( sequence - > len , sequence - > seq , 0 ) ;
2009-11-03 08:01:55 +08:00
// Calculate the local coordinate and local alignment.
2009-10-30 00:32:43 +08:00
bwa_cal_pac_pos_core ( bwts [ 0 ] , bwts [ 1 ] , sequence , options . max_diff , options . fnr ) ;
2009-11-03 08:01:55 +08:00
bwa_refine_gapped ( bns , 1 , sequence , reference , NULL ) ;
2009-10-29 05:37:49 +08:00
// Copy the local alignment data into the alignment object.
Alignment & alignment = * ( alignments + alignment_idx ) ;
2009-10-30 00:32:43 +08:00
alignment . type = sequence - > type ;
bns_coor_pac2real ( bns , sequence - > pos , pos_end ( sequence ) - sequence - > pos , & alignment . contig ) ;
alignment . pos = sequence - > pos - bns - > anns [ alignment . contig ] . offset + 1 ;
alignment . negative_strand = sequence - > strand ;
alignment . mapQ = sequence - > mapQ ;
2009-10-29 05:37:49 +08:00
2009-10-30 05:33:13 +08:00
alignment . cigar = NULL ;
if ( sequence - > cigar ) {
alignment . cigar = new uint16_t [ sequence - > n_cigar ] ;
memcpy ( alignment . cigar , sequence - > cigar , sequence - > n_cigar * sizeof ( uint16_t ) ) ;
}
alignment . n_cigar = sequence - > n_cigar ;
2009-10-29 05:37:49 +08:00
alignment_idx + + ;
}
}
2009-10-30 00:32:43 +08:00
// Restore original alignment blocks.
sequence - > aln = alignment_blocks ;
sequence - > n_aln = num_alignment_blocks ;
2009-10-29 05:37:49 +08:00
}