2011-10-25 12:22:28 +08:00
# include <zlib.h>
# include <stdio.h>
2011-10-26 03:06:13 +08:00
# include <unistd.h>
# include <stdlib.h>
2014-03-31 23:21:03 +08:00
# include <string.h>
2014-08-25 23:59:27 +08:00
# include <limits.h>
2013-08-29 03:59:05 +08:00
# include <ctype.h>
2013-09-07 00:31:47 +08:00
# include <math.h>
2013-02-24 04:55:55 +08:00
# include "bwa.h"
2013-02-01 02:59:48 +08:00
# include "bwamem.h"
2011-10-25 12:22:28 +08:00
# include "kvec.h"
2013-02-12 23:36:15 +08:00
# include "utils.h"
2014-09-28 11:21:50 +08:00
# include "bntseq.h"
2011-10-25 12:22:28 +08:00
# include "kseq.h"
2013-02-07 03:38:40 +08:00
KSEQ_DECLARE ( gzFile )
2011-10-25 12:22:28 +08:00
extern unsigned char nst_nt4_table [ 256 ] ;
2013-02-24 06:09:23 +08:00
void * kopen ( const char * fn , int * _fd ) ;
int kclose ( void * a ) ;
2014-11-27 01:32:48 +08:00
void kt_pipeline ( int n_threads , void * ( * func ) ( void * , int , void * ) , void * shared_data , int n_steps ) ;
typedef struct {
kseq_t * ks , * ks2 ;
mem_opt_t * opt ;
mem_pestat_t * pes0 ;
int64_t n_processed ;
int copy_comment , actual_chunk_size ;
bwaidx_t * idx ;
} ktp_aux_t ;
typedef struct {
ktp_aux_t * aux ;
int n_seqs ;
bseq1_t * seqs ;
} ktp_data_t ;
static void * process ( void * shared , int step , void * _data )
{
ktp_aux_t * aux = ( ktp_aux_t * ) shared ;
ktp_data_t * data = ( ktp_data_t * ) _data ;
int i ;
if ( step = = 0 ) {
ktp_data_t * ret ;
int64_t size = 0 ;
ret = calloc ( 1 , sizeof ( ktp_data_t ) ) ;
ret - > seqs = bseq_read ( aux - > actual_chunk_size , & ret - > n_seqs , aux - > ks , aux - > ks2 ) ;
if ( ret - > seqs = = 0 ) {
free ( ret ) ;
return 0 ;
}
if ( ! aux - > copy_comment )
for ( i = 0 ; i < ret - > n_seqs ; + + i ) {
free ( ret - > seqs [ i ] . comment ) ;
ret - > seqs [ i ] . comment = 0 ;
}
for ( i = 0 ; i < ret - > n_seqs ; + + i ) size + = ret - > seqs [ i ] . l_seq ;
if ( bwa_verbose > = 3 )
fprintf ( stderr , " [M::%s] read %d sequences (%ld bp)... \n " , __func__ , ret - > n_seqs , ( long ) size ) ;
return ret ;
} else if ( step = = 1 ) {
const mem_opt_t * opt = aux - > opt ;
const bwaidx_t * idx = aux - > idx ;
if ( opt - > flag & MEM_F_SMARTPE ) {
bseq1_t * sep [ 2 ] ;
int n_sep [ 2 ] ;
mem_opt_t tmp_opt = * opt ;
bseq_classify ( data - > n_seqs , data - > seqs , n_sep , sep ) ;
if ( bwa_verbose > = 3 )
fprintf ( stderr , " [M::%s] %d single-end sequences; %d paired-end sequences \n " , __func__ , n_sep [ 0 ] , n_sep [ 1 ] ) ;
if ( n_sep [ 0 ] ) {
tmp_opt . flag & = ~ MEM_F_PE ;
mem_process_seqs ( & tmp_opt , idx - > bwt , idx - > bns , idx - > pac , aux - > n_processed , n_sep [ 0 ] , sep [ 0 ] , 0 ) ;
for ( i = 0 ; i < n_sep [ 0 ] ; + + i )
data - > seqs [ sep [ 0 ] [ i ] . id ] . sam = sep [ 0 ] [ i ] . sam ;
}
if ( n_sep [ 1 ] ) {
tmp_opt . flag | = MEM_F_PE ;
mem_process_seqs ( & tmp_opt , idx - > bwt , idx - > bns , idx - > pac , aux - > n_processed + n_sep [ 0 ] , n_sep [ 1 ] , sep [ 1 ] , aux - > pes0 ) ;
for ( i = 0 ; i < n_sep [ 1 ] ; + + i )
data - > seqs [ sep [ 1 ] [ i ] . id ] . sam = sep [ 1 ] [ i ] . sam ;
}
free ( sep [ 0 ] ) ; free ( sep [ 1 ] ) ;
} else mem_process_seqs ( opt , idx - > bwt , idx - > bns , idx - > pac , aux - > n_processed , data - > n_seqs , data - > seqs , aux - > pes0 ) ;
aux - > n_processed + = data - > n_seqs ;
return data ;
} else if ( step = = 2 ) {
for ( i = 0 ; i < data - > n_seqs ; + + i ) {
if ( data - > seqs [ i ] . sam ) err_fputs ( data - > seqs [ i ] . sam , stdout ) ;
free ( data - > seqs [ i ] . name ) ; free ( data - > seqs [ i ] . comment ) ;
free ( data - > seqs [ i ] . seq ) ; free ( data - > seqs [ i ] . qual ) ; free ( data - > seqs [ i ] . sam ) ;
}
free ( data - > seqs ) ; free ( data ) ;
return 0 ;
}
return 0 ;
}
2011-11-25 08:15:14 +08:00
2014-04-07 23:29:36 +08:00
static void update_a ( mem_opt_t * opt , const mem_opt_t * opt0 )
{
if ( opt0 - > a ) { // matching score is changed
if ( ! opt0 - > b ) opt - > b * = opt - > a ;
if ( ! opt0 - > T ) opt - > T * = opt - > a ;
if ( ! opt0 - > o_del ) opt - > o_del * = opt - > a ;
if ( ! opt0 - > e_del ) opt - > e_del * = opt - > a ;
if ( ! opt0 - > o_ins ) opt - > o_ins * = opt - > a ;
if ( ! opt0 - > e_ins ) opt - > e_ins * = opt - > a ;
if ( ! opt0 - > zdrop ) opt - > zdrop * = opt - > a ;
if ( ! opt0 - > pen_clip5 ) opt - > pen_clip5 * = opt - > a ;
if ( ! opt0 - > pen_clip3 ) opt - > pen_clip3 * = opt - > a ;
if ( ! opt0 - > pen_unpaired ) opt - > pen_unpaired * = opt - > a ;
}
}
2013-02-01 02:59:48 +08:00
int main_mem ( int argc , char * argv [ ] )
2011-11-25 08:15:14 +08:00
{
2014-03-31 23:52:52 +08:00
mem_opt_t * opt , opt0 ;
2014-11-27 01:32:48 +08:00
int fd , fd2 , i , c , ignore_alt = 0 , no_mt_io = 0 ;
int fixed_chunk_size = - 1 ;
2013-02-08 02:13:43 +08:00
gzFile fp , fp2 = 0 ;
2014-11-19 23:59:05 +08:00
char * p , * rg_line = 0 , * hdr_line = 0 ;
2014-04-07 23:29:36 +08:00
const char * mode = 0 ;
2013-02-24 06:09:23 +08:00
void * ko = 0 , * ko2 = 0 ;
2014-11-27 01:32:48 +08:00
mem_pestat_t pes [ 4 ] ;
ktp_aux_t aux ;
2014-03-31 23:21:03 +08:00
2014-11-27 01:32:48 +08:00
memset ( & aux , 0 , sizeof ( ktp_aux_t ) ) ;
2014-03-31 23:21:03 +08:00
memset ( pes , 0 , 4 * sizeof ( mem_pestat_t ) ) ;
for ( i = 0 ; i < 4 ; + + i ) pes [ i ] . failed = 1 ;
2011-11-25 08:15:14 +08:00
2014-11-27 01:32:48 +08:00
aux . opt = opt = mem_opt_init ( ) ;
2014-04-07 23:29:36 +08:00
memset ( & opt0 , 0 , sizeof ( mem_opt_t ) ) ;
2018-04-02 22:47:45 +08:00
while ( ( c = getopt ( argc , argv , " 51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H: " ) ) > = 0 ) {
2014-04-07 23:29:36 +08:00
if ( c = = ' k ' ) opt - > min_seed_len = atoi ( optarg ) , opt0 . min_seed_len = 1 ;
2014-11-27 01:32:48 +08:00
else if ( c = = ' 1 ' ) no_mt_io = 1 ;
2014-04-07 23:29:36 +08:00
else if ( c = = ' x ' ) mode = optarg ;
else if ( c = = ' w ' ) opt - > w = atoi ( optarg ) , opt0 . w = 1 ;
2014-03-31 23:52:52 +08:00
else if ( c = = ' A ' ) opt - > a = atoi ( optarg ) , opt0 . a = 1 ;
else if ( c = = ' B ' ) opt - > b = atoi ( optarg ) , opt0 . b = 1 ;
else if ( c = = ' T ' ) opt - > T = atoi ( optarg ) , opt0 . T = 1 ;
else if ( c = = ' U ' ) opt - > pen_unpaired = atoi ( optarg ) , opt0 . pen_unpaired = 1 ;
2013-02-24 03:48:54 +08:00
else if ( c = = ' t ' ) opt - > n_threads = atoi ( optarg ) , opt - > n_threads = opt - > n_threads > 1 ? opt - > n_threads : 1 ;
2013-02-19 05:33:06 +08:00
else if ( c = = ' P ' ) opt - > flag | = MEM_F_NOPAIRING ;
2013-02-25 02:23:43 +08:00
else if ( c = = ' a ' ) opt - > flag | = MEM_F_ALL ;
2014-11-19 03:30:22 +08:00
else if ( c = = ' p ' ) opt - > flag | = MEM_F_PE | MEM_F_SMARTPE ;
2013-02-26 00:18:35 +08:00
else if ( c = = ' M ' ) opt - > flag | = MEM_F_NO_MULTI ;
2013-04-10 04:13:55 +08:00
else if ( c = = ' S ' ) opt - > flag | = MEM_F_NO_RESCUE ;
2014-05-01 01:12:43 +08:00
else if ( c = = ' Y ' ) opt - > flag | = MEM_F_SOFTCLIP ;
2014-08-29 22:51:23 +08:00
else if ( c = = ' V ' ) opt - > flag | = MEM_F_REF_HDR ;
2017-09-26 21:35:19 +08:00
else if ( c = = ' 5 ' ) opt - > flag | = MEM_F_PRIMARY5 | MEM_F_KEEP_SUPP_MAPQ ; // always apply MEM_F_KEEP_SUPP_MAPQ with -5
else if ( c = = ' q ' ) opt - > flag | = MEM_F_KEEP_SUPP_MAPQ ;
2018-04-02 22:43:41 +08:00
else if ( c = = ' u ' ) opt - > flag | = MEM_F_XB ;
2014-04-07 23:29:36 +08:00
else if ( c = = ' c ' ) opt - > max_occ = atoi ( optarg ) , opt0 . max_occ = 1 ;
2014-03-31 23:52:52 +08:00
else if ( c = = ' d ' ) opt - > zdrop = atoi ( optarg ) , opt0 . zdrop = 1 ;
2013-02-24 05:57:34 +08:00
else if ( c = = ' v ' ) bwa_verbose = atoi ( optarg ) ;
2014-09-28 11:21:50 +08:00
else if ( c = = ' j ' ) ignore_alt = 1 ;
2014-04-07 23:29:36 +08:00
else if ( c = = ' r ' ) opt - > split_factor = atof ( optarg ) , opt0 . split_factor = 1. ;
2014-04-09 09:45:49 +08:00
else if ( c = = ' D ' ) opt - > drop_ratio = atof ( optarg ) , opt0 . drop_ratio = 1. ;
2014-04-07 23:29:36 +08:00
else if ( c = = ' m ' ) opt - > max_matesw = atoi ( optarg ) , opt0 . max_matesw = 1 ;
else if ( c = = ' s ' ) opt - > split_width = atoi ( optarg ) , opt0 . split_width = 1 ;
2014-04-09 09:45:49 +08:00
else if ( c = = ' G ' ) opt - > max_chain_gap = atoi ( optarg ) , opt0 . max_chain_gap = 1 ;
else if ( c = = ' N ' ) opt - > max_chain_extend = atoi ( optarg ) , opt0 . max_chain_extend = 1 ;
2017-06-28 00:05:44 +08:00
else if ( c = = ' o ' | | c = = ' f ' ) xreopen ( optarg , " wb " , stdout ) ;
2014-04-07 23:29:36 +08:00
else if ( c = = ' W ' ) opt - > min_chain_weight = atoi ( optarg ) , opt0 . min_chain_weight = 1 ;
2014-08-25 23:59:27 +08:00
else if ( c = = ' y ' ) opt - > max_mem_intv = atol ( optarg ) , opt0 . max_mem_intv = 1 ;
2014-11-27 01:32:48 +08:00
else if ( c = = ' C ' ) aux . copy_comment = 1 ;
2014-09-16 11:42:24 +08:00
else if ( c = = ' K ' ) fixed_chunk_size = atoi ( optarg ) ;
2014-10-30 23:33:43 +08:00
else if ( c = = ' X ' ) opt - > mask_level = atof ( optarg ) ;
2014-09-20 04:50:21 +08:00
else if ( c = = ' h ' ) {
opt0 . max_XA_hits = opt0 . max_XA_hits_alt = 1 ;
opt - > max_XA_hits = opt - > max_XA_hits_alt = strtol ( optarg , & p , 10 ) ;
if ( * p ! = 0 & & ispunct ( * p ) & & isdigit ( p [ 1 ] ) )
opt - > max_XA_hits_alt = strtol ( p + 1 , & p , 10 ) ;
}
2013-09-07 00:31:47 +08:00
else if ( c = = ' Q ' ) {
2014-04-07 23:29:36 +08:00
opt0 . mapQ_coef_len = 1 ;
2013-09-07 00:31:47 +08:00
opt - > mapQ_coef_len = atoi ( optarg ) ;
opt - > mapQ_coef_fac = opt - > mapQ_coef_len > 0 ? log ( opt - > mapQ_coef_len ) : 0 ;
2014-03-29 02:54:06 +08:00
} else if ( c = = ' O ' ) {
2014-03-31 23:52:52 +08:00
opt0 . o_del = opt0 . o_ins = 1 ;
2014-03-29 02:54:06 +08:00
opt - > o_del = opt - > o_ins = strtol ( optarg , & p , 10 ) ;
if ( * p ! = 0 & & ispunct ( * p ) & & isdigit ( p [ 1 ] ) )
opt - > o_ins = strtol ( p + 1 , & p , 10 ) ;
} else if ( c = = ' E ' ) {
2014-03-31 23:52:52 +08:00
opt0 . e_del = opt0 . e_ins = 1 ;
2014-03-29 02:54:06 +08:00
opt - > e_del = opt - > e_ins = strtol ( optarg , & p , 10 ) ;
if ( * p ! = 0 & & ispunct ( * p ) & & isdigit ( p [ 1 ] ) )
opt - > e_ins = strtol ( p + 1 , & p , 10 ) ;
2013-09-07 00:31:47 +08:00
} else if ( c = = ' L ' ) {
2014-03-31 23:52:52 +08:00
opt0 . pen_clip5 = opt0 . pen_clip3 = 1 ;
2013-08-29 03:59:05 +08:00
opt - > pen_clip5 = opt - > pen_clip3 = strtol ( optarg , & p , 10 ) ;
if ( * p ! = 0 & & ispunct ( * p ) & & isdigit ( p [ 1 ] ) )
opt - > pen_clip3 = strtol ( p + 1 , & p , 10 ) ;
} else if ( c = = ' R ' ) {
2013-02-24 05:41:44 +08:00
if ( ( rg_line = bwa_set_rg ( optarg ) ) = = 0 ) return 1 ; // FIXME: memory leak
2014-11-19 23:59:05 +08:00
} else if ( c = = ' H ' ) {
2014-12-11 23:38:36 +08:00
if ( optarg [ 0 ] ! = ' @ ' ) {
FILE * fp ;
if ( ( fp = fopen ( optarg , " r " ) ) ! = 0 ) {
char * buf ;
buf = calloc ( 1 , 0x10000 ) ;
while ( fgets ( buf , 0xffff , fp ) ) {
i = strlen ( buf ) ;
assert ( buf [ i - 1 ] = = ' \n ' ) ; // a long line
buf [ i - 1 ] = 0 ;
hdr_line = bwa_insert_header ( buf , hdr_line ) ;
}
free ( buf ) ;
fclose ( fp ) ;
}
} else hdr_line = bwa_insert_header ( optarg , hdr_line ) ;
2014-03-31 23:21:03 +08:00
} else if ( c = = ' I ' ) { // specify the insert size distribution
2014-11-27 01:32:48 +08:00
aux . pes0 = pes ;
2014-03-31 23:21:03 +08:00
pes [ 1 ] . failed = 0 ;
pes [ 1 ] . avg = strtod ( optarg , & p ) ;
pes [ 1 ] . std = pes [ 1 ] . avg * .1 ;
if ( * p ! = 0 & & ispunct ( * p ) & & isdigit ( p [ 1 ] ) )
pes [ 1 ] . std = strtod ( p + 1 , & p ) ;
pes [ 1 ] . high = ( int ) ( pes [ 1 ] . avg + 4. * pes [ 1 ] . std + .499 ) ;
pes [ 1 ] . low = ( int ) ( pes [ 1 ] . avg - 4. * pes [ 1 ] . std + .499 ) ;
if ( pes [ 1 ] . low < 1 ) pes [ 1 ] . low = 1 ;
if ( * p ! = 0 & & ispunct ( * p ) & & isdigit ( p [ 1 ] ) )
pes [ 1 ] . high = ( int ) ( strtod ( p + 1 , & p ) + .499 ) ;
if ( * p ! = 0 & & ispunct ( * p ) & & isdigit ( p [ 1 ] ) )
pes [ 1 ] . low = ( int ) ( strtod ( p + 1 , & p ) + .499 ) ;
2014-03-31 23:52:52 +08:00
if ( bwa_verbose > = 3 )
fprintf ( stderr , " [M::%s] mean insert size: %.3f, stddev: %.3f, max: %d, min: %d \n " ,
__func__ , pes [ 1 ] . avg , pes [ 1 ] . std , pes [ 1 ] . high , pes [ 1 ] . low ) ;
2014-03-31 23:21:03 +08:00
}
2013-04-29 20:58:28 +08:00
else return 1 ;
2013-02-01 02:59:48 +08:00
}
2014-11-19 23:59:05 +08:00
if ( rg_line ) {
hdr_line = bwa_insert_header ( rg_line , hdr_line ) ;
free ( rg_line ) ;
}
2013-02-27 02:36:01 +08:00
if ( opt - > n_threads < 1 ) opt - > n_threads = 1 ;
2013-06-14 21:00:24 +08:00
if ( optind + 1 > = argc | | optind + 3 < argc ) {
2013-02-01 02:59:48 +08:00
fprintf ( stderr , " \n " ) ;
2013-02-26 00:18:35 +08:00
fprintf ( stderr , " Usage: bwa mem [options] <idxbase> <in1.fq> [in2.fq] \n \n " ) ;
fprintf ( stderr , " Algorithm options: \n \n " ) ;
2014-03-29 02:54:06 +08:00
fprintf ( stderr , " -t INT number of threads [%d] \n " , opt - > n_threads ) ;
fprintf ( stderr , " -k INT minimum seed length [%d] \n " , opt - > min_seed_len ) ;
fprintf ( stderr , " -w INT band width for banded alignment [%d] \n " , opt - > w ) ;
fprintf ( stderr , " -d INT off-diagonal X-dropoff [%d] \n " , opt - > zdrop ) ;
fprintf ( stderr , " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g] \n " , opt - > split_factor ) ;
2014-10-21 05:34:15 +08:00
fprintf ( stderr , " -y INT seed occurrence for the 3rd round seeding [%ld] \n " , ( long ) opt - > max_mem_intv ) ;
2014-03-29 02:54:06 +08:00
// fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
fprintf ( stderr , " -c INT skip seeds with more than INT occurrences [%d] \n " , opt - > max_occ ) ;
2014-04-09 09:45:49 +08:00
fprintf ( stderr , " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f] \n " , opt - > drop_ratio ) ;
2014-04-07 23:29:36 +08:00
fprintf ( stderr , " -W INT discard a chain if seeded bases shorter than INT [0] \n " ) ;
2014-03-29 02:54:06 +08:00
fprintf ( stderr , " -m INT perform at most INT rounds of mate rescues for each read [%d] \n " , opt - > max_matesw ) ;
fprintf ( stderr , " -S skip mate rescue \n " ) ;
fprintf ( stderr , " -P skip pairing; mate rescue performed unless -S also in use \n " ) ;
2014-08-25 23:59:27 +08:00
fprintf ( stderr , " \n Scoring options: \n \n " ) ;
2014-04-07 23:29:36 +08:00
fprintf ( stderr , " -A INT score for a sequence match, which scales options -TdBOELU unless overridden [%d] \n " , opt - > a ) ;
2014-03-29 02:54:06 +08:00
fprintf ( stderr , " -B INT penalty for a mismatch [%d] \n " , opt - > b ) ;
fprintf ( stderr , " -O INT[,INT] gap open penalties for deletions and insertions [%d,%d] \n " , opt - > o_del , opt - > o_ins ) ;
fprintf ( stderr , " -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [%d,%d] \n " , opt - > e_del , opt - > e_ins ) ;
fprintf ( stderr , " -L INT[,INT] penalty for 5'- and 3'-end clipping [%d,%d] \n " , opt - > pen_clip5 , opt - > pen_clip3 ) ;
2014-08-25 23:59:27 +08:00
fprintf ( stderr , " -U INT penalty for an unpaired read pair [%d] \n \n " , opt - > pen_unpaired ) ;
2015-07-12 20:59:23 +08:00
fprintf ( stderr , " -x STR read type. Setting -x changes multiple parameters unless overridden [null] \n " ) ;
2014-09-16 22:53:07 +08:00
fprintf ( stderr , " pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) \n " ) ;
fprintf ( stderr , " ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) \n " ) ;
fprintf ( stderr , " intractg: -B9 -O16 -L5 (intra-species contigs to ref) \n " ) ;
2013-02-26 00:18:35 +08:00
fprintf ( stderr , " \n Input/output options: \n \n " ) ;
2014-11-19 03:30:22 +08:00
fprintf ( stderr , " -p smart pairing (ignoring in2.fq) \n " ) ;
2014-03-29 02:54:06 +08:00
fprintf ( stderr , " -R STR read group header line such as '@RG \\ tID:foo \\ tSM:bar' [null] \n " ) ;
2014-12-11 23:38:36 +08:00
fprintf ( stderr , " -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null] \n " ) ;
2017-06-28 00:05:44 +08:00
fprintf ( stderr , " -o FILE sam file to output results to [stdout] \n " ) ;
2014-12-22 05:30:29 +08:00
fprintf ( stderr , " -j treat ALT contigs as part of the primary assembly (i.e. ignore <idxbase>.alt file) \n " ) ;
2017-09-26 21:35:19 +08:00
fprintf ( stderr , " -5 for split alignment, take the alignment with the smallest coordinate as primary \n " ) ;
fprintf ( stderr , " -q don't modify mapQ of supplementary alignments \n " ) ;
2017-07-31 10:35:38 +08:00
fprintf ( stderr , " -K INT process INT input bases in each batch regardless of nThreads (for reproducibility) [] \n " ) ;
2013-02-26 00:18:35 +08:00
fprintf ( stderr , " \n " ) ;
2017-07-31 10:35:38 +08:00
fprintf ( stderr , " -v INT verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [%d] \n " , bwa_verbose ) ;
2014-03-29 02:54:06 +08:00
fprintf ( stderr , " -T INT minimum score to output [%d] \n " , opt - > T ) ;
2014-09-20 04:50:21 +08:00
fprintf ( stderr , " -h INT[,INT] if there are <INT hits with score >80%% of the max score, output all in XA [%d,%d] \n " , opt - > max_XA_hits , opt - > max_XA_hits_alt ) ;
2014-03-29 02:54:06 +08:00
fprintf ( stderr , " -a output all alignments for SE or unpaired PE \n " ) ;
fprintf ( stderr , " -C append FASTA/FASTQ comment to SAM output \n " ) ;
2014-08-29 22:51:23 +08:00
fprintf ( stderr , " -V output the reference FASTA header in the XR tag \n " ) ;
2014-05-12 03:15:44 +08:00
fprintf ( stderr , " -Y use soft clipping for supplementary alignments \n " ) ;
2014-03-31 23:21:03 +08:00
fprintf ( stderr , " -M mark shorter split hits as secondary \n \n " ) ;
fprintf ( stderr , " -I FLOAT[,FLOAT[,INT[,INT]]] \n " ) ;
2014-04-01 03:27:23 +08:00
fprintf ( stderr , " specify the mean, standard deviation (10%% of the mean if absent), max \n " ) ;
fprintf ( stderr , " (4 sigma from the mean if absent) and min of the insert size distribution. \n " ) ;
2014-04-01 00:03:27 +08:00
fprintf ( stderr , " FR orientation only. [inferred] \n " ) ;
2014-04-05 04:05:41 +08:00
fprintf ( stderr , " \n " ) ;
fprintf ( stderr , " Note: Please read the man page for detailed description of the command line and options. \n " ) ;
2013-02-01 02:59:48 +08:00
fprintf ( stderr , " \n " ) ;
free ( opt ) ;
return 1 ;
}
2011-11-25 08:15:14 +08:00
2014-04-07 23:29:36 +08:00
if ( mode ) {
2014-09-16 22:53:07 +08:00
if ( strcmp ( mode , " intractg " ) = = 0 ) {
if ( ! opt0 . o_del ) opt - > o_del = 16 ;
if ( ! opt0 . o_ins ) opt - > o_ins = 16 ;
if ( ! opt0 . b ) opt - > b = 9 ;
if ( ! opt0 . pen_clip5 ) opt - > pen_clip5 = 5 ;
if ( ! opt0 . pen_clip3 ) opt - > pen_clip3 = 5 ;
2016-01-27 11:37:01 +08:00
} else if ( strcmp ( mode , " pacbio " ) = = 0 | | strcmp ( mode , " pbref " ) = = 0 | | strcmp ( mode , " ont2d " ) = = 0 ) {
2014-09-16 11:22:05 +08:00
if ( ! opt0 . o_del ) opt - > o_del = 1 ;
2014-04-15 04:04:29 +08:00
if ( ! opt0 . e_del ) opt - > e_del = 1 ;
2014-09-16 11:22:05 +08:00
if ( ! opt0 . o_ins ) opt - > o_ins = 1 ;
2014-04-15 04:04:29 +08:00
if ( ! opt0 . e_ins ) opt - > e_ins = 1 ;
2014-09-16 11:22:05 +08:00
if ( ! opt0 . b ) opt - > b = 1 ;
2014-04-07 23:29:36 +08:00
if ( opt0 . split_factor = = 0. ) opt - > split_factor = 10. ;
2016-01-27 11:37:01 +08:00
if ( strcmp ( mode , " ont2d " ) = = 0 ) {
2014-09-16 22:38:15 +08:00
if ( ! opt0 . min_chain_weight ) opt - > min_chain_weight = 20 ;
if ( ! opt0 . min_seed_len ) opt - > min_seed_len = 14 ;
if ( ! opt0 . pen_clip5 ) opt - > pen_clip5 = 0 ;
if ( ! opt0 . pen_clip3 ) opt - > pen_clip3 = 0 ;
2014-04-08 23:40:54 +08:00
} else {
2014-09-16 22:38:15 +08:00
if ( ! opt0 . min_chain_weight ) opt - > min_chain_weight = 40 ;
2014-04-08 23:40:54 +08:00
if ( ! opt0 . min_seed_len ) opt - > min_seed_len = 17 ;
if ( ! opt0 . pen_clip5 ) opt - > pen_clip5 = 0 ;
if ( ! opt0 . pen_clip3 ) opt - > pen_clip3 = 0 ;
}
2014-04-07 23:29:36 +08:00
} else {
fprintf ( stderr , " [E::%s] unknown read type '%s' \n " , __func__ , mode ) ;
return 1 ; // FIXME memory leak
}
} else update_a ( opt , & opt0 ) ;
2013-03-05 22:38:12 +08:00
bwa_fill_scmat ( opt - > a , opt - > b , opt - > mat ) ;
2014-04-07 23:29:36 +08:00
2014-11-27 01:32:48 +08:00
aux . idx = bwa_idx_load_from_shm ( argv [ optind ] ) ;
if ( aux . idx = = 0 ) {
if ( ( aux . idx = bwa_idx_load ( argv [ optind ] , BWA_IDX_ALL ) ) = = 0 ) return 1 ; // FIXME: memory leak
2014-10-16 03:44:06 +08:00
} else if ( bwa_verbose > = 3 )
fprintf ( stderr , " [M::%s] load the bwa index from shared memory \n " , __func__ ) ;
2014-09-28 11:21:50 +08:00
if ( ignore_alt )
2014-11-27 01:32:48 +08:00
for ( i = 0 ; i < aux . idx - > bns - > n_seqs ; + + i )
aux . idx - > bns - > anns [ i ] . is_alt = 0 ;
2011-11-25 08:15:14 +08:00
2013-02-24 06:09:23 +08:00
ko = kopen ( argv [ optind + 1 ] , & fd ) ;
2013-04-27 22:08:01 +08:00
if ( ko = = 0 ) {
if ( bwa_verbose > = 1 ) fprintf ( stderr , " [E::%s] fail to open file `%s'. \n " , __func__ , argv [ optind + 1 ] ) ;
return 1 ;
}
2013-02-24 06:09:23 +08:00
fp = gzdopen ( fd , " r " ) ;
2014-11-27 01:32:48 +08:00
aux . ks = kseq_init ( fp ) ;
2013-02-08 02:13:43 +08:00
if ( optind + 2 < argc ) {
2013-02-25 13:13:32 +08:00
if ( opt - > flag & MEM_F_PE ) {
if ( bwa_verbose > = 2 )
2014-11-19 03:30:22 +08:00
fprintf ( stderr , " [W::%s] when '-p' is in use, the second query file is ignored. \n " , __func__ ) ;
2013-02-25 13:13:32 +08:00
} else {
ko2 = kopen ( argv [ optind + 2 ] , & fd2 ) ;
2013-04-27 22:08:01 +08:00
if ( ko2 = = 0 ) {
if ( bwa_verbose > = 1 ) fprintf ( stderr , " [E::%s] fail to open file `%s'. \n " , __func__ , argv [ optind + 2 ] ) ;
return 1 ;
}
2013-02-25 13:13:32 +08:00
fp2 = gzdopen ( fd2 , " r " ) ;
2014-11-27 01:32:48 +08:00
aux . ks2 = kseq_init ( fp2 ) ;
2013-02-25 13:13:32 +08:00
opt - > flag | = MEM_F_PE ;
}
2013-02-08 02:13:43 +08:00
}
2016-01-27 11:37:01 +08:00
bwa_print_sam_hdr ( aux . idx - > bns , hdr_line ) ;
2014-11-27 01:32:48 +08:00
aux . actual_chunk_size = fixed_chunk_size > 0 ? fixed_chunk_size : opt - > chunk_size * opt - > n_threads ;
kt_pipeline ( no_mt_io ? 1 : 2 , process , & aux , 3 ) ;
2014-11-19 23:59:05 +08:00
free ( hdr_line ) ;
2013-02-24 04:55:55 +08:00
free ( opt ) ;
2014-11-27 01:32:48 +08:00
bwa_idx_destroy ( aux . idx ) ;
kseq_destroy ( aux . ks ) ;
2013-03-01 17:37:46 +08:00
err_gzclose ( fp ) ; kclose ( ko ) ;
2014-11-27 01:32:48 +08:00
if ( aux . ks2 ) {
kseq_destroy ( aux . ks2 ) ;
2013-03-01 17:37:46 +08:00
err_gzclose ( fp2 ) ; kclose ( ko2 ) ;
2013-02-08 02:13:43 +08:00
}
2013-02-01 02:59:48 +08:00
return 0 ;
2011-11-25 08:15:14 +08:00
}
2011-10-25 12:22:28 +08:00
int main_fastmap ( int argc , char * argv [ ] )
{
2014-08-25 23:59:27 +08:00
int c , i , min_iwidth = 20 , min_len = 17 , print_seq = 0 , min_intv = 1 , max_len = INT_MAX ;
uint64_t max_intv = 0 ;
2011-10-25 12:22:28 +08:00
kseq_t * seq ;
2011-10-26 03:06:13 +08:00
bwtint_t k ;
2011-10-25 12:22:28 +08:00
gzFile fp ;
2013-02-01 01:34:05 +08:00
smem_i * itr ;
2013-02-02 03:38:44 +08:00
const bwtintv_v * a ;
2013-02-24 04:55:55 +08:00
bwaidx_t * idx ;
2011-10-26 03:06:13 +08:00
2014-08-25 23:59:27 +08:00
while ( ( c = getopt ( argc , argv , " w:l:pi:I:L: " ) ) > = 0 ) {
2011-10-25 12:22:28 +08:00
switch ( c ) {
2013-02-02 03:20:38 +08:00
case ' p ' : print_seq = 1 ; break ;
2011-10-26 03:06:13 +08:00
case ' w ' : min_iwidth = atoi ( optarg ) ; break ;
case ' l ' : min_len = atoi ( optarg ) ; break ;
2014-08-25 23:59:27 +08:00
case ' i ' : min_intv = atoi ( optarg ) ; break ;
case ' I ' : max_intv = atol ( optarg ) ; break ;
case ' L ' : max_len = atoi ( optarg ) ; break ;
2013-04-29 20:58:28 +08:00
default : return 1 ;
2011-10-25 12:22:28 +08:00
}
}
if ( optind + 1 > = argc ) {
2014-08-25 23:59:27 +08:00
fprintf ( stderr , " \n " ) ;
fprintf ( stderr , " Usage: bwa fastmap [options] <idxbase> <in.fq> \n \n " ) ;
fprintf ( stderr , " Options: -l INT min SMEM length to output [%d] \n " , min_len ) ;
fprintf ( stderr , " -w INT max interval size to find coordiantes [%d] \n " , min_iwidth ) ;
fprintf ( stderr , " -i INT min SMEM interval size [%d] \n " , min_intv ) ;
2016-02-02 00:41:46 +08:00
fprintf ( stderr , " -L INT max MEM length [%d] \n " , max_len ) ;
2014-08-25 23:59:27 +08:00
fprintf ( stderr , " -I INT stop if MEM is longer than -l with a size less than INT [%ld] \n " , ( long ) max_intv ) ;
fprintf ( stderr , " \n " ) ;
2011-10-25 12:22:28 +08:00
return 1 ;
}
2011-10-26 03:06:13 +08:00
2013-01-04 00:57:37 +08:00
fp = xzopen ( argv [ optind + 1 ] , " r " ) ;
2011-10-25 12:22:28 +08:00
seq = kseq_init ( fp ) ;
2013-04-29 20:58:28 +08:00
if ( ( idx = bwa_idx_load ( argv [ optind ] , BWA_IDX_BWT | BWA_IDX_BNS ) ) = = 0 ) return 1 ;
2013-02-24 04:55:55 +08:00
itr = smem_itr_init ( idx - > bwt ) ;
2014-08-25 23:59:27 +08:00
smem_config ( itr , min_intv , max_len , max_intv ) ;
2011-10-25 12:22:28 +08:00
while ( kseq_read ( seq ) > = 0 ) {
2013-03-01 17:37:46 +08:00
err_printf ( " SQ \t %s \t %ld " , seq - > name . s , seq - > seq . l ) ;
2011-11-25 08:44:21 +08:00
if ( print_seq ) {
2013-01-09 22:43:36 +08:00
err_putchar ( ' \t ' ) ;
err_puts ( seq - > seq . s ) ;
} else err_putchar ( ' \n ' ) ;
2011-10-25 12:22:28 +08:00
for ( i = 0 ; i < seq - > seq . l ; + + i )
seq - > seq . s [ i ] = nst_nt4_table [ ( int ) seq - > seq . s [ i ] ] ;
2013-02-02 03:20:38 +08:00
smem_set_query ( itr , seq - > seq . l , ( uint8_t * ) seq - > seq . s ) ;
2014-04-04 03:23:48 +08:00
while ( ( a = smem_next ( itr ) ) ! = 0 ) {
2013-02-02 03:38:44 +08:00
for ( i = 0 ; i < a - > n ; + + i ) {
bwtintv_t * p = & a - > a [ i ] ;
2011-11-25 08:15:14 +08:00
if ( ( uint32_t ) p - > info - ( p - > info > > 32 ) < min_len ) continue ;
2013-03-01 17:37:46 +08:00
err_printf ( " EM \t %d \t %d \t %ld " , ( uint32_t ) ( p - > info > > 32 ) , ( uint32_t ) p - > info , ( long ) p - > x [ 2 ] ) ;
2011-11-25 08:15:14 +08:00
if ( p - > x [ 2 ] < = min_iwidth ) {
for ( k = 0 ; k < p - > x [ 2 ] ; + + k ) {
bwtint_t pos ;
int len , is_rev , ref_id ;
len = ( uint32_t ) p - > info - ( p - > info > > 32 ) ;
2013-02-24 04:55:55 +08:00
pos = bns_depos ( idx - > bns , bwt_sa ( idx - > bwt , p - > x [ 0 ] + k ) , & is_rev ) ;
2011-11-25 08:15:14 +08:00
if ( is_rev ) pos - = len - 1 ;
2013-02-24 04:55:55 +08:00
bns_cnt_ambi ( idx - > bns , pos , len , & ref_id ) ;
2013-03-01 17:37:46 +08:00
err_printf ( " \t %s:%c%ld " , idx - > bns - > anns [ ref_id ] . name , " +- " [ is_rev ] , ( long ) ( pos - idx - > bns - > anns [ ref_id ] . offset ) + 1 ) ;
2011-11-25 08:15:14 +08:00
}
2013-01-09 22:43:36 +08:00
} else err_puts ( " \t * " ) ;
err_putchar ( ' \n ' ) ;
2011-10-26 03:06:13 +08:00
}
2011-10-25 12:22:28 +08:00
}
2013-01-09 22:43:36 +08:00
err_puts ( " // " ) ;
2011-10-25 12:22:28 +08:00
}
2011-10-26 03:06:13 +08:00
2013-02-01 01:34:05 +08:00
smem_itr_destroy ( itr ) ;
2013-02-24 04:55:55 +08:00
bwa_idx_destroy ( idx ) ;
2011-10-25 12:22:28 +08:00
kseq_destroy ( seq ) ;
2013-01-04 00:57:37 +08:00
err_gzclose ( fp ) ;
2011-10-25 12:22:28 +08:00
return 0 ;
}