#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include "thread_mem.h"
#include "common.h"

#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
#else
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif

typedef struct
{
	int32_t h, e;
} eh_t;

int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off)
{
	// return h0;
	eh_t *eh;	// score array
	int8_t *qp; // query profile
	int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
	assert(h0 > 0);
	// qp = malloc(qlen * m);
	// eh = calloc(qlen + 1, 8);
	qp = thread_mem_request(tmem, qlen * m);
	eh = thread_mem_request_and_clean(tmem, (qlen + 1) * 8);
	//   generate the query profile
	for (k = i = 0; k < m; ++k)
	{
		const int8_t *p = &mat[k * m];
		for (j = 0; j < qlen; ++j)
			qp[i++] = p[query[j]]; // 对于qp数组，第0到qlen个元素，表示和A比对的分值，第qlen到2*qlen，表示和C比对的分值，以此类推
	}
	// fill the first row
	// 初始化第一行分值
	eh[0].h = h0;
	eh[1].h = h0 > oe_ins ? h0 - oe_ins : 0;
	for (j = 2; j <= qlen && eh[j - 1].h > e_ins; ++j)
		eh[j].h = eh[j - 1].h - e_ins;
	// adjust $w if it is too large
	k = m * m;						 // 字符矩阵
	for (i = 0, max = 0; i < k; ++i) // get the max score
		max = max > mat[i] ? max : mat[i];
	max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.); // 最大可插入的长度？
	max_ins = max_ins > 1 ? max_ins : 1;
	w = w < max_ins ? w : max_ins;
	max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.);
	max_del = max_del > 1 ? max_del : 1;
	w = w < max_del ? w : max_del; // TODO: is this necessary? 上述几行代码都是为了看看能否缩小窗口，减小计算
	// DP loop
	max = h0, max_i = max_j = -1;
	max_ie = -1, gscore = -1;
	max_off = 0;
	beg = 0, end = qlen;
#ifdef DEBUG_OUT
	fprintf(score_f_arr[0], "%-3d", h0);
	fprintf(ins_ext_f_arr[0], "%-3d", h0);
	fprintf(del_ext_f_arr[0], "%-3d", h0);
	for (j = 0; LIKELY(j < end); ++j) // 遍历query字符序列
	{
		fprintf(score_f_arr[0], "%-3d", MAX(h0 - o_ins - (j + 1) * e_ins, 0));
		fprintf(ins_ext_f_arr[0], "%-3d", MAX(h0 - o_ins - (j + 1) * e_ins, 0));
		fprintf(del_ext_f_arr[0], "%-3d", 0);
	}
	fprintf(score_f_arr[0], "\n");
	fprintf(ins_ext_f_arr[0], "\n");
	fprintf(del_ext_f_arr[0], "\n");
#endif
	for (i = 0; LIKELY(i < tlen); ++i) // 对target逐个字符进行遍历
	{
		int t, f = 0, h1, m = 0, mj = -1;
		// 对于target第i个字符，query中每个字符的分值，只有匹配和不匹配
		int8_t *q = &qp[target[i] * qlen];
		// apply the band and the constraint (if provided)
		if (beg < i - w) // 检查开始点是否可以缩小一些
			beg = i - w;
		if (end > i + w + 1) // 检查终点是否可以缩小，使得整体的遍历范围缩小
			end = i + w + 1;
		if (end > qlen) // 终点不超过query长度
			end = qlen;
		// beg = 0;
		// end = qlen;
		// compute the first column
		if (beg == 0)
		{
			h1 = h0 - (o_del + e_del * (i + 1)); // 只消耗了target序列，query从第一个字符开始匹配第i个target字符
			if (h1 < 0)
				h1 = 0;
		}
		else
			h1 = 0;
#ifdef DEBUG_OUT
		fprintf(ins_ext_f_arr[0], "%-3d", 0);
		fprintf(del_ext_f_arr[0], "%-3d", MAX(h0 - o_del - (i + 1) * e_del, 0));
#endif
		for (j = beg; LIKELY(j < end); ++j) // 遍历query字符序列
		{
#ifdef DEBUG_OUT
			fprintf(score_f_arr[0], "%-3d", h1);
			fprintf(ins_ext_f_arr[0], "%-3d", f);
			fprintf(del_ext_f_arr[0], "%-3d", eh[j].e);
#endif
			// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
			// Similar to SSE2-SW, cells are computed in the following order:
			// H(i,j)   = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
			// E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape // E表示delete，只消耗target
			// F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape // F表示insert，只消耗query，target的row id固定，query的col index一直增加
			eh_t *p = &eh[j];
			int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j) // 获取上一轮h值和e值
			p->h = h1;				   // set H(i,j-1) for the next row // h1是上一轮计算的结果
			M = M ? M + q[j] : 0;	   // separating H and M to disallow a cigar like "100M3I3D20M" // M大于0则当前两个字符进行匹配，无论是否相等，将分值加到M上，此时M可能变为负数
			h = M > e ? M : e;		   // e and f are guaranteed to be non-negative, so h>=0 even if M<0 // e和f保证是非负的，所以h肯定非负，即使M可能是负数，因为h取e,f和M的最大值
			h = h > f ? h : f;
			h1 = h;				 // save H(i,j) to h1 for the next column // 用h1来保存当前表格（i,j)对应的分值，用来下次计算
			mj = m > h ? mj : j; // record the position where max score is achieved // 记录取得最大值时query的字符位置
			m = m > h ? m : h;	 // m is stored at eh[mj+1] 因为eh[mj+1]->h表示的是H(i, mj)，及上一轮记录的h
			t = M - oe_del;		 // 用来计算delete，假设当前字符(i,j)匹配（无论match还是mismatch），target下一个字符串被空消耗（delete)的分值F(i+1, j)
			t = t > 0 ? t : 0;
			e -= e_del;		   // 假设当前query字符
			e = e > t ? e : t; // computed E(i+1,j) // t表示(i,j)强行匹配，(i+1, j)是delete的分数，此前e表示(i+1,j)继续delete的分数
			p->e = e;		   // save E(i+1,j) for the next row
			t = M - oe_ins;
			t = t > 0 ? t : 0;
			f -= e_ins;
			f = f > t ? f : t; // computed F(i,j+1)
		}
#ifdef DEBUG_OUT
		fprintf(score_f_arr[0], "%-3d", h1);
		fprintf(score_f_arr[0], "\n");
		fprintf(ins_ext_f_arr[0], "\n");
		fprintf(del_ext_f_arr[0], "\n");
#endif
		eh[end].h = h1; // end是query序列之外的位置
		eh[end].e = 0;
		if (j == qlen) // 此轮遍历到了query的最后一个字符
		{
			max_ie = gscore > h1 ? max_ie : i; // max_ie表示取得全局最大分值时，target字符串的位置
			gscore = gscore > h1 ? gscore : h1;
		}
		if (m == 0) // 遍历完query之后，当前轮次的最大分值为0，则跳出循环
			break;
		if (m > max) // 当前轮最大分值大于之前的最大分值
		{
			max = m, max_i = i, max_j = mj;							 // 更新取得最大值的target和query的位置
			max_off = max_off > abs(mj - i) ? max_off : abs(mj - i); // 取得最大分值时，query和target对应字符串坐标的差值
		}
		else if (0) //(zdrop > 0) // 当前轮匹配之后取得的最大分值没有大于之前的最大值，而且zdrop值大于0
		{
			if (i - max_i > mj - max_j)
			{
				if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) // 之前最大分值 -（从取得最大值的点出发，当前的delete总长度对应的分值 + 当前轮取得的最大值） > zdrop
					break;
			}
			else
			{
				if (max - m - ((mj - max_j) - (i - max_i)) * e_ins > zdrop) // 同上，不过这次是insert，可能是说明有很多mismatch
					break;
			}
		}
		// update beg and end for the next round
		for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j)
			;
		beg = j;
		for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j)
			;
		end = j + 2 < qlen ? j + 2 : qlen; // 剪枝没考虑f，即insert
										   // beg = 0, end = qlen;			   // uncomment this line for debugging
										   //   fprintf(stderr, "\n");
										   //   fprintf(stderr, "%d\n", end);
	}
	// free(eh);
	// free(qp);
	thread_mem_release(tmem, qlen * m + (qlen + 1) * 8);
	if (_qle)
		*_qle = max_j + 1;
	if (_tle)
		*_tle = max_i + 1;
	if (_gtle)
		*_gtle = max_ie + 1;
	if (_gscore)
		*_gscore = gscore;
	if (_max_off)
		*_max_off = max_off;
	return max;
}