fixed a long-existing bug in trimming

This commit is contained in:
Heng Li 2011-11-23 23:11:50 -05:00
parent 84aa3fa696
commit dec584d50b
4 changed files with 9 additions and 10 deletions

6
bwa.1
View File

@ -46,7 +46,7 @@ command. It works for single-end reads only.
.SH COMMANDS AND OPTIONS .SH COMMANDS AND OPTIONS
.TP .TP
.B index .B index
bwa index [-p prefix] [-a algoType] [-c] <in.db.fasta> bwa index [-p prefix] [-a algoType] <in.db.fasta>
Index database sequences in the FASTA format. Index database sequences in the FASTA format.
@ -54,7 +54,7 @@ Index database sequences in the FASTA format.
.RS .RS
.TP 10 .TP 10
.B -c .B -c
Build color-space index. The input fast should be in nucleotide space. Build color-space index. The input fast should be in nucleotide space. (Disabled since 0.6.x)
.TP .TP
.BI -p \ STR .BI -p \ STR
Prefix of the output database [same as db filename] Prefix of the output database [same as db filename]
@ -142,7 +142,7 @@ especially for short reads (~32bp).
.TP .TP
.B -c .B -c
Reverse query but not complement it, which is required for alignment in Reverse query but not complement it, which is required for alignment in
the color space. the color space. (Disabled since 0.6.x)
.TP .TP
.B -N .B -N
Disable iterative search. All hits with no more than Disable iterative search. All hits with no more than

View File

@ -73,16 +73,14 @@ void seq_reverse(int len, ubyte_t *seq, int is_comp)
int bwa_trim_read(int trim_qual, bwa_seq_t *p) int bwa_trim_read(int trim_qual, bwa_seq_t *p)
{ {
int s = 0, l, max = 0, max_l = p->len - 1; int s = 0, l, max = 0, max_l = p->len;
if (trim_qual < 1 || p->qual == 0) return 0; if (trim_qual < 1 || p->qual == 0) return 0;
for (l = p->len - 1; l >= BWA_MIN_RDLEN - 1; --l) { for (l = p->len - 1; l >= BWA_MIN_RDLEN - 1; --l) {
s += trim_qual - (p->qual[l] - 33); s += trim_qual - (p->qual[l] - 33);
if (s < 0) break; if (s < 0) break;
if (s > max) { if (s > max) max = s, max_l = l;
max = s; max_l = l;
} }
} p->clip_len = p->len = max_l;
p->clip_len = p->len = max_l + 1;
return p->full_len - p->len; return p->full_len - p->len;
} }

View File

@ -283,7 +283,7 @@ int bwa_aln(int argc, char *argv[])
fprintf(stderr, " -q INT quality threshold for read trimming down to %dbp [%d]\n", BWA_MIN_RDLEN, opt->trim_qual); fprintf(stderr, " -q INT quality threshold for read trimming down to %dbp [%d]\n", BWA_MIN_RDLEN, opt->trim_qual);
fprintf(stderr, " -f FILE file to write output to instead of stdout\n"); fprintf(stderr, " -f FILE file to write output to instead of stdout\n");
fprintf(stderr, " -B INT length of barcode\n"); fprintf(stderr, " -B INT length of barcode\n");
fprintf(stderr, " -c input sequences are in the color space\n"); // fprintf(stderr, " -c input sequences are in the color space\n");
fprintf(stderr, " -L log-scaled gap penalty for long deletions\n"); fprintf(stderr, " -L log-scaled gap penalty for long deletions\n");
fprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\n"); fprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\n");
fprintf(stderr, " -I the input is in the Illumina 1.3+ FASTQ-like format\n"); fprintf(stderr, " -I the input is in the Illumina 1.3+ FASTQ-like format\n");

View File

@ -65,7 +65,8 @@ int bwa_index(int argc, char *argv[])
fprintf(stderr, "Usage: bwa index [-a bwtsw|div|is] [-c] <in.fasta>\n\n"); fprintf(stderr, "Usage: bwa index [-a bwtsw|div|is] [-c] <in.fasta>\n\n");
fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [is]\n"); fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [is]\n");
fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n"); fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n");
fprintf(stderr, " -c build color-space index\n\n"); // fprintf(stderr, " -c build color-space index\n");
fprintf(stderr, "\n");
fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n"); fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n"); fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n");
fprintf(stderr, " according to the length of the genome.\n\n"); fprintf(stderr, " according to the length of the genome.\n\n");