diff --git a/README b/README
deleted file mode 100644
index dd1d335..0000000
--- a/README
+++ /dev/null
@@ -1,36 +0,0 @@
-Released packages can be downloaded from SourceForge.net:
-
-  http://sourceforge.net/projects/bio-bwa/files/
-
-Introduction and FAQ are available at:
-
-  http://bio-bwa.sourceforge.net
-
-Manual page at:
-
-  http://bio-bwa.sourceforge.net/bwa.shtml
-
-Mailing list:
-
-  bio-bwa-help@lists.sourceforge.net
-
-To sign up:
-
-  http://sourceforge.net/mail/?group_id=276243
-
-Publications (Open Access):
-
-  http://www.ncbi.nlm.nih.gov/pubmed/20080505
-  http://www.ncbi.nlm.nih.gov/pubmed/19451168
-
-Incomplete list of citations (via HubMed.org):
-
-  http://www.hubmed.org/references.cgi?uids=20080505
-  http://www.hubmed.org/references.cgi?uids=19451168
-
-Related projects:
-
-  http://pbwa.sourceforge.net/
-  http://www.many-core.group.cam.ac.uk/projects/lam.shtml
-  http://biodoop-seal.sourceforge.net/
-  http://gitorious.org/bwa-cuda
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a46a691
--- /dev/null
+++ b/README.md
@@ -0,0 +1,73 @@
+###Getting started
+
+	git clone https://github.com/lh3/bwa.git
+	cd bwa; make
+	./bwa index ref.fa
+	./bwa mem ref.fa read-se.fq.gz | gzip -3 > aln-se.sam.gz
+	./bwa mem ref.fa read1.fq read2.fq | gzip -3 > aln-pe.sam.gz
+
+###Introduction
+
+BWA is a software package for mapping low-divergent sequences against a large
+reference genome, such as the human genome. It consists of three algorithms:
+BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is designed for Illumina
+sequence reads up to 100bp, while the rest two for longer sequences ranged from
+70bp to 1Mbp. BWA-MEM and BWA-SW share similar features such as the support of
+long reads and chimeric alignment, but BWA-MEM, which is the latest, is
+generally recommended for high-quality queries as it is faster and more
+accurate. BWA-MEM also has better performance than BWA-backtrack for 70-100bp
+Illumina reads.
+
+For all the algorithms, BWA first needs to construct the FM-index for the
+reference genome (the **index** command). Alignment algorithms are invoked with
+different sub-commands: **aln**/**samse**/**sampe** for BWA-backtrack,
+**bwasw** for BWA-SW and **mem** for the BWA-MEM algorithm.
+
+###Availability
+
+BWA is released under [GPLv3][1]. The latest souce code is [freely
+available][2] at github. Released packages can [be downloaded ][3] at
+SourceForge. After you acquire the source code, simply use `make` to compile
+and copy the single executable `bwa` to the destination you want.
+
+###Seeking helps
+
+The detailed usage is described in the man page available together with the
+source code. You can use `man ./bwa.1` to view the man page in a terminal. The
+[HTML version][4] of the man page can be found at the [BWA website][5]. If you
+have questions about BWA, you may [sign up the mailing list][6] and then send
+the questions to [bio-bwa-help@sourceforge.net][7]. You may also ask questions
+in forums such as [BioStar][8] and [SEQanswers][9].
+
+###Citing BWA
+
+* Li H. and Durbin R. (2009) Fast and accurate short read alignment with
+ Burrows-Wheeler transform. *Bioinformatics*, **25**, 1754-1760. [PMID:
+ [19451168][10]]. (if you use the BWA-backtrack algorithm)
+
+* Li H. and Durbin R. (2010) Fast and accurate long-read alignment with
+ Burrows-Wheeler transform. *Bioinformatics*, **26**, 589-595. [PMID:
+ [20080505][11]]. (if you use the BWA-SW algorithm)
+
+* Li H. (2013) Aligning sequence reads, clone sequences and assembly contigs
+ with BWA-MEM. [arXiv:1303.3997v1][12] [q-bio.GN]. (if you use the BWA-MEM
+ algorithm or the **fastmap** command)
+
+Please note that the last reference is a preprint hosted at [arXiv.org][13]. I
+do not have plan to submit it to a peer-reviewed journal in the near future.
+
+
+
+[1]: http://en.wikipedia.org/wiki/GNU_General_Public_License
+[2]: https://github.com/lh3/bwa
+[3]: http://sourceforge.net/projects/bio-bwa/files/
+[4]: http://bio-bwa.sourceforge.net/bwa.shtml
+[5]: http://bio-bwa.sourceforge.net/
+[6]: https://lists.sourceforge.net/lists/listinfo/bio-bwa-help
+[7]: mailto:bio-bwa-help@sourceforge.net
+[8]: http://biostars.org
+[9]: http://seqanswers.com/
+[10]: http://www.ncbi.nlm.nih.gov/pubmed/19451168
+[11]: http://www.ncbi.nlm.nih.gov/pubmed/20080505
+[12]: http://arxiv.org/abs/1303.3997
+[13]: http://arxiv.org/
diff --git a/bwa.1 b/bwa.1
index d25ba4a..e63fe8d 100644
--- a/bwa.1
+++ b/bwa.1
@@ -1,4 +1,4 @@
-.TH bwa 1 "23 April 2013" "bwa-0.7.4" "Bioinformatics tools"
+.TH bwa 1 "24 May 2013" "bwa-0.7.5" "Bioinformatics tools"
 .SH NAME
 .PP
 bwa - Burrows-Wheeler Alignment Tool
@@ -718,12 +718,13 @@ If you use the BWA-SW algorithm, please cite:
 Li H. and Durbin R. (2010) Fast and accurate long-read alignment with
 Burrows-Wheeler transform. Bioinformatics, 26, 589-595. [PMID: 20080505]
 .PP
-If you use the fastmap component of BWA, please cite:
+If you use BWA-MEM or the fastmap component of BWA, please cite:
 .PP
-Li H. (2012) Exploring single-sample SNP and INDEL calling with whole-genome de
-novo assembly. Bioinformatics, 28, 1838-1844. [PMID: 22569178]
+Li H. (2013) Aligning sequence reads, clone sequences and assembly contigs with
+BWA-MEM. arXiv:1303.3997v1 [q-bio.GN].
 .PP
-The BWA-MEM algorithm has not been published yet.
+It is likely that the BWA-MEM manuscript will not appear in a peer-reviewed
+journal.
 
 .SH HISTORY
 BWA is largely influenced by BWT-SW. It uses source codes from BWT-SW
diff --git a/bwase.c b/bwase.c
index dcf29bf..5bb8116 100644
--- a/bwase.c
+++ b/bwase.c
@@ -167,20 +167,29 @@ void bwa_cal_pac_pos(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_se
 
 #define SW_BW 50
 
-bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, ubyte_t *seq, int ref_shift, bwtint_t rb, int *n_cigar)
+bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, ubyte_t *seq, int ref_shift, bwtint_t *_rb, int *n_cigar)
 {
 	bwa_cigar_t *cigar = 0;
 	uint32_t *cigar32 = 0;
 	ubyte_t *rseq;
-	int64_t k, re, rlen;
+	int64_t k, rb, re, rlen;
 	int8_t mat[25];
 
 	bwa_fill_scmat(1, 3, mat);
-	re = rb + len + ref_shift;
+	rb = *_rb; re = rb + len + ref_shift;
 	assert(re <= l_pac);
 	rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
 	assert(re - rb == rlen);
-	ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension
+	ksw_global(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32);
+	assert(*n_cigar > 0);
+	if ((cigar32[*n_cigar - 1]&0xf) == 1) cigar32[*n_cigar - 1] = (cigar32[*n_cigar - 1]>>4<<4) | 4; // change endding ins to soft clipping
+	if ((cigar32[0]&0xf) == 1) cigar32[0] = (cigar32[0]>>4<<4) | 4; // change beginning ins to soft clipping
+	if ((cigar32[*n_cigar - 1]&0xf) == 2) --*n_cigar; // delete endding del
+	if ((cigar32[0]&0xf) == 2) { // delete beginning del
+		*_rb += cigar32[0]>>4;
+		--*n_cigar;
+		memmove(cigar32, cigar32+1, (*n_cigar) * 4);
+	}
 	cigar = (bwa_cigar_t*)cigar32;
 	for (k = 0; k < *n_cigar; ++k)
 		cigar[k] = __cigar_create((cigar32[k]&0xf), (cigar32[k]>>4));
@@ -292,14 +301,14 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
 			bwt_multi1_t *q = s->multi + j;
 			int n_cigar;
 			if (q->gap) { // gapped alignment
-				q->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, q->strand? s->rseq : s->seq, q->ref_shift, q->pos, &n_cigar);
+				q->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, q->strand? s->rseq : s->seq, q->ref_shift, &q->pos, &n_cigar);
 				q->n_cigar = n_cigar;
 				if (q->cigar) s->multi[k++] = *q;
 			} else s->multi[k++] = *q;
 		}
 		s->n_multi = k; // this squeezes out gapped alignments which failed the CIGAR generation
 		if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
-		s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, s->ref_shift, s->pos, &s->n_cigar);
+		s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, s->ref_shift, &s->pos, &s->n_cigar);
 		if (s->cigar == 0) s->type = BWA_TYPE_NO_MATCH;
 	}
 	// generate MD tag