From 9d77db3c730780c8ef5ccd4b66403ff5675cfe4e Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Mon, 13 May 2024 10:30:14 +0000
Subject: [PATCH] modify sin led gpio

---
 kernel/drivers/infiniband/hw/hns/hns_roce_mr.c | 1790 ++++++++++++++++++++++++++++-------------------------------
 1 files changed, 851 insertions(+), 939 deletions(-)

diff --git a/kernel/drivers/infiniband/hw/hns/hns_roce_mr.c b/kernel/drivers/infiniband/hw/hns/hns_roce_mr.c
index c68596d..d5b3b10 100644
--- a/kernel/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/kernel/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -47,643 +47,152 @@
 {
 	return (key << 24) | (key >> 8);
 }
-EXPORT_SYMBOL_GPL(key_to_hw_index);
 
-static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
-			      struct hns_roce_cmd_mailbox *mailbox,
-			      unsigned long mpt_index)
+static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev,
+				  struct hns_roce_cmd_mailbox *mailbox,
+				  unsigned long mpt_index)
 {
 	return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
-				 HNS_ROCE_CMD_SW2HW_MPT,
+				 HNS_ROCE_CMD_CREATE_MPT,
 				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
 
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
-			      struct hns_roce_cmd_mailbox *mailbox,
-			      unsigned long mpt_index)
+int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_cmd_mailbox *mailbox,
+			    unsigned long mpt_index)
 {
 	return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
-				 mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
+				 mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT,
 				 HNS_ROCE_CMD_TIMEOUT_MSECS);
 }
-EXPORT_SYMBOL_GPL(hns_roce_hw2sw_mpt);
 
-static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
-				unsigned long *seg)
+static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
+			u32 pd, u64 iova, u64 size, u32 access)
 {
-	int o;
-	u32 m;
-
-	spin_lock(&buddy->lock);
-
-	for (o = order; o <= buddy->max_order; ++o) {
-		if (buddy->num_free[o]) {
-			m = 1 << (buddy->max_order - o);
-			*seg = find_first_bit(buddy->bits[o], m);
-			if (*seg < m)
-				goto found;
-		}
-	}
-	spin_unlock(&buddy->lock);
-	return -1;
-
- found:
-	clear_bit(*seg, buddy->bits[o]);
-	--buddy->num_free[o];
-
-	while (o > order) {
-		--o;
-		*seg <<= 1;
-		set_bit(*seg ^ 1, buddy->bits[o]);
-		++buddy->num_free[o];
-	}
-
-	spin_unlock(&buddy->lock);
-
-	*seg <<= order;
-	return 0;
-}
-
-static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg,
-				int order)
-{
-	seg >>= order;
-
-	spin_lock(&buddy->lock);
-
-	while (test_bit(seg ^ 1, buddy->bits[order])) {
-		clear_bit(seg ^ 1, buddy->bits[order]);
-		--buddy->num_free[order];
-		seg >>= 1;
-		++order;
-	}
-
-	set_bit(seg, buddy->bits[order]);
-	++buddy->num_free[order];
-
-	spin_unlock(&buddy->lock);
-}
-
-static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
-{
-	int i, s;
-
-	buddy->max_order = max_order;
-	spin_lock_init(&buddy->lock);
-	buddy->bits = kcalloc(buddy->max_order + 1,
-			      sizeof(*buddy->bits),
-			      GFP_KERNEL);
-	buddy->num_free = kcalloc(buddy->max_order + 1,
-				  sizeof(*buddy->num_free),
-				  GFP_KERNEL);
-	if (!buddy->bits || !buddy->num_free)
-		goto err_out;
-
-	for (i = 0; i <= buddy->max_order; ++i) {
-		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
-		buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
-					 __GFP_NOWARN);
-		if (!buddy->bits[i]) {
-			buddy->bits[i] = vzalloc(array_size(s, sizeof(long)));
-			if (!buddy->bits[i])
-				goto err_out_free;
-		}
-	}
-
-	set_bit(0, buddy->bits[buddy->max_order]);
-	buddy->num_free[buddy->max_order] = 1;
-
-	return 0;
-
-err_out_free:
-	for (i = 0; i <= buddy->max_order; ++i)
-		kvfree(buddy->bits[i]);
-
-err_out:
-	kfree(buddy->bits);
-	kfree(buddy->num_free);
-	return -ENOMEM;
-}
-
-static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
-{
-	int i;
-
-	for (i = 0; i <= buddy->max_order; ++i)
-		kvfree(buddy->bits[i]);
-
-	kfree(buddy->bits);
-	kfree(buddy->num_free);
-}
-
-static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
-				    unsigned long *seg, u32 mtt_type)
-{
-	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
-	struct hns_roce_hem_table *table;
-	struct hns_roce_buddy *buddy;
-	int ret;
-
-	if (mtt_type == MTT_TYPE_WQE) {
-		buddy = &mr_table->mtt_buddy;
-		table = &mr_table->mtt_table;
-	} else {
-		buddy = &mr_table->mtt_cqe_buddy;
-		table = &mr_table->mtt_cqe_table;
-	}
-
-	ret = hns_roce_buddy_alloc(buddy, order, seg);
-	if (ret == -1)
-		return -1;
-
-	if (hns_roce_table_get_range(hr_dev, table, *seg,
-				     *seg + (1 << order) - 1)) {
-		hns_roce_buddy_free(buddy, *seg, order);
-		return -1;
-	}
-
-	return 0;
-}
-
-int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
-		      struct hns_roce_mtt *mtt)
-{
-	int ret;
-	int i;
-
-	/* Page num is zero, correspond to DMA memory register */
-	if (!npages) {
-		mtt->order = -1;
-		mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT;
-		return 0;
-	}
-
-	/* Note: if page_shift is zero, FAST memory register */
-	mtt->page_shift = page_shift;
-
-	/* Compute MTT entry necessary */
-	for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
-	     i <<= 1)
-		++mtt->order;
-
-	/* Allocate MTT entry */
-	ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg,
-				       mtt->mtt_type);
-	if (ret == -1)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
-{
-	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
-
-	if (mtt->order < 0)
-		return;
-
-	if (mtt->mtt_type == MTT_TYPE_WQE) {
-		hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
-				    mtt->order);
-		hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
-					mtt->first_seg,
-					mtt->first_seg + (1 << mtt->order) - 1);
-	} else {
-		hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
-				    mtt->order);
-		hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
-					mtt->first_seg,
-					mtt->first_seg + (1 << mtt->order) - 1);
-	}
-}
-EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup);
-
-static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
-			       struct hns_roce_mr *mr, int err_loop_index,
-			       int loop_i, int loop_j)
-{
-	struct device *dev = hr_dev->dev;
-	u32 mhop_num;
-	u32 pbl_bt_sz;
-	u64 bt_idx;
-	int i, j;
-
-	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
-	mhop_num = hr_dev->caps.pbl_hop_num;
-
-	i = loop_i;
-	if (mhop_num == 3 && err_loop_index == 2) {
-		for (; i >= 0; i--) {
-			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
-					  mr->pbl_l1_dma_addr[i]);
-
-			for (j = 0; j < pbl_bt_sz / 8; j++) {
-				if (i == loop_i && j >= loop_j)
-					break;
-
-				bt_idx = i * pbl_bt_sz / 8 + j;
-				dma_free_coherent(dev, pbl_bt_sz,
-						  mr->pbl_bt_l2[bt_idx],
-						  mr->pbl_l2_dma_addr[bt_idx]);
-			}
-		}
-	} else if (mhop_num == 3 && err_loop_index == 1) {
-		for (i -= 1; i >= 0; i--) {
-			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
-					  mr->pbl_l1_dma_addr[i]);
-
-			for (j = 0; j < pbl_bt_sz / 8; j++) {
-				bt_idx = i * pbl_bt_sz / 8 + j;
-				dma_free_coherent(dev, pbl_bt_sz,
-						  mr->pbl_bt_l2[bt_idx],
-						  mr->pbl_l2_dma_addr[bt_idx]);
-			}
-		}
-	} else if (mhop_num == 2 && err_loop_index == 1) {
-		for (i -= 1; i >= 0; i--)
-			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
-					  mr->pbl_l1_dma_addr[i]);
-	} else {
-		dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.",
-			 mhop_num, err_loop_index);
-		return;
-	}
-
-	dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr);
-	mr->pbl_bt_l0 = NULL;
-	mr->pbl_l0_dma_addr = 0;
-}
-
-/* PBL multi hop addressing */
-static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
-			       struct hns_roce_mr *mr)
-{
-	struct device *dev = hr_dev->dev;
-	int mr_alloc_done = 0;
-	int npages_allocated;
-	int i = 0, j = 0;
-	u32 pbl_bt_sz;
-	u32 mhop_num;
-	u64 pbl_last_bt_num;
-	u64 pbl_bt_cnt = 0;
-	u64 bt_idx;
-	u64 size;
-
-	mhop_num = hr_dev->caps.pbl_hop_num;
-	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
-	pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
-
-	if (mhop_num == HNS_ROCE_HOP_NUM_0)
-		return 0;
-
-	/* hop_num = 1 */
-	if (mhop_num == 1) {
-		if (npages > pbl_bt_sz / 8) {
-			dev_err(dev, "npages %d is larger than buf_pg_sz!",
-				npages);
-			return -EINVAL;
-		}
-		mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
-						 &(mr->pbl_dma_addr),
-						 GFP_KERNEL);
-		if (!mr->pbl_buf)
-			return -ENOMEM;
-
-		mr->pbl_size = npages;
-		mr->pbl_ba = mr->pbl_dma_addr;
-		mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
-		mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
-		mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
-		return 0;
-	}
-
-	mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
-				      sizeof(*mr->pbl_l1_dma_addr),
-				      GFP_KERNEL);
-	if (!mr->pbl_l1_dma_addr)
-		return -ENOMEM;
-
-	mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
-				GFP_KERNEL);
-	if (!mr->pbl_bt_l1)
-		goto err_kcalloc_bt_l1;
-
-	if (mhop_num == 3) {
-		mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
-					      sizeof(*mr->pbl_l2_dma_addr),
-					      GFP_KERNEL);
-		if (!mr->pbl_l2_dma_addr)
-			goto err_kcalloc_l2_dma;
-
-		mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
-					sizeof(*mr->pbl_bt_l2),
-					GFP_KERNEL);
-		if (!mr->pbl_bt_l2)
-			goto err_kcalloc_bt_l2;
-	}
-
-	/* alloc L0 BT */
-	mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
-					   &(mr->pbl_l0_dma_addr),
-					   GFP_KERNEL);
-	if (!mr->pbl_bt_l0)
-		goto err_dma_alloc_l0;
-
-	if (mhop_num == 2) {
-		/* alloc L1 BT */
-		for (i = 0; i < pbl_bt_sz / 8; i++) {
-			if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
-				size = pbl_bt_sz;
-			} else {
-				npages_allocated = i * (pbl_bt_sz / 8);
-				size = (npages - npages_allocated) * 8;
-			}
-			mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
-						    &(mr->pbl_l1_dma_addr[i]),
-						    GFP_KERNEL);
-			if (!mr->pbl_bt_l1[i]) {
-				hns_roce_loop_free(hr_dev, mr, 1, i, 0);
-				goto err_dma_alloc_l0;
-			}
-
-			*(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
-
-			pbl_bt_cnt++;
-			if (pbl_bt_cnt >= pbl_last_bt_num)
-				break;
-		}
-	} else if (mhop_num == 3) {
-		/* alloc L1, L2 BT */
-		for (i = 0; i < pbl_bt_sz / 8; i++) {
-			mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
-						    &(mr->pbl_l1_dma_addr[i]),
-						    GFP_KERNEL);
-			if (!mr->pbl_bt_l1[i]) {
-				hns_roce_loop_free(hr_dev, mr, 1, i, 0);
-				goto err_dma_alloc_l0;
-			}
-
-			*(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
-
-			for (j = 0; j < pbl_bt_sz / 8; j++) {
-				bt_idx = i * pbl_bt_sz / 8 + j;
-
-				if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
-					size = pbl_bt_sz;
-				} else {
-					npages_allocated = bt_idx *
-							   (pbl_bt_sz / 8);
-					size = (npages - npages_allocated) * 8;
-				}
-				mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
-					      dev, size,
-					      &(mr->pbl_l2_dma_addr[bt_idx]),
-					      GFP_KERNEL);
-				if (!mr->pbl_bt_l2[bt_idx]) {
-					hns_roce_loop_free(hr_dev, mr, 2, i, j);
-					goto err_dma_alloc_l0;
-				}
-
-				*(mr->pbl_bt_l1[i] + j) =
-						mr->pbl_l2_dma_addr[bt_idx];
-
-				pbl_bt_cnt++;
-				if (pbl_bt_cnt >= pbl_last_bt_num) {
-					mr_alloc_done = 1;
-					break;
-				}
-			}
-
-			if (mr_alloc_done)
-				break;
-		}
-	}
-
-	mr->l0_chunk_last_num = i + 1;
-	if (mhop_num == 3)
-		mr->l1_chunk_last_num = j + 1;
-
-	mr->pbl_size = npages;
-	mr->pbl_ba = mr->pbl_l0_dma_addr;
-	mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
-	mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
-	mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
-
-	return 0;
-
-err_dma_alloc_l0:
-	kfree(mr->pbl_bt_l2);
-	mr->pbl_bt_l2 = NULL;
-
-err_kcalloc_bt_l2:
-	kfree(mr->pbl_l2_dma_addr);
-	mr->pbl_l2_dma_addr = NULL;
-
-err_kcalloc_l2_dma:
-	kfree(mr->pbl_bt_l1);
-	mr->pbl_bt_l1 = NULL;
-
-err_kcalloc_bt_l1:
-	kfree(mr->pbl_l1_dma_addr);
-	mr->pbl_l1_dma_addr = NULL;
-
-	return -ENOMEM;
-}
-
-static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
-			     u64 size, u32 access, int npages,
-			     struct hns_roce_mr *mr)
-{
-	struct device *dev = hr_dev->dev;
-	unsigned long index = 0;
-	int ret = 0;
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	unsigned long obj = 0;
+	int err;
 
 	/* Allocate a key for mr from mr_table */
-	ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
-	if (ret == -1)
+	err = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &obj);
+	if (err) {
+		ibdev_err(ibdev,
+			  "failed to alloc bitmap for MR key, ret = %d.\n",
+			  err);
 		return -ENOMEM;
+	}
 
 	mr->iova = iova;			/* MR va starting addr */
 	mr->size = size;			/* MR addr range */
 	mr->pd = pd;				/* MR num */
 	mr->access = access;			/* MR access permit */
 	mr->enabled = 0;			/* MR active status */
-	mr->key = hw_index_to_key(index);	/* MR key */
+	mr->key = hw_index_to_key(obj);		/* MR key */
 
-	if (size == ~0ull) {
-		mr->type = MR_TYPE_DMA;
-		mr->pbl_buf = NULL;
-		mr->pbl_dma_addr = 0;
-		/* PBL multi-hop addressing parameters */
-		mr->pbl_bt_l2 = NULL;
-		mr->pbl_bt_l1 = NULL;
-		mr->pbl_bt_l0 = NULL;
-		mr->pbl_l2_dma_addr = NULL;
-		mr->pbl_l1_dma_addr = NULL;
-		mr->pbl_l0_dma_addr = 0;
-	} else {
-		mr->type = MR_TYPE_MR;
-		if (!hr_dev->caps.pbl_hop_num) {
-			mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
-							 &(mr->pbl_dma_addr),
-							 GFP_KERNEL);
-			if (!mr->pbl_buf)
-				return -ENOMEM;
-		} else {
-			ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
-		}
+	err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
+	if (err) {
+		ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err);
+		goto err_free_bitmap;
 	}
 
-	return ret;
+	return 0;
+err_free_bitmap:
+	hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR);
+	return err;
 }
 
-static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
-			       struct hns_roce_mr *mr)
+static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
 {
-	struct device *dev = hr_dev->dev;
-	int npages_allocated;
-	int npages;
-	int i, j;
-	u32 pbl_bt_sz;
-	u32 mhop_num;
-	u64 bt_idx;
+	unsigned long obj = key_to_hw_index(mr->key);
 
-	npages = ib_umem_page_count(mr->umem);
-	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
-	mhop_num = hr_dev->caps.pbl_hop_num;
+	hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
+	hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, obj, BITMAP_NO_RR);
+}
 
-	if (mhop_num == HNS_ROCE_HOP_NUM_0)
-		return;
+static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
+			size_t length, struct ib_udata *udata, u64 start,
+			int access)
+{
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	bool is_fast = mr->type == MR_TYPE_FRMR;
+	struct hns_roce_buf_attr buf_attr = {};
+	int err;
 
-	/* hop_num = 1 */
-	if (mhop_num == 1) {
-		dma_free_coherent(dev, (unsigned int)(npages * 8),
-				  mr->pbl_buf, mr->pbl_dma_addr);
-		return;
-	}
+	mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
+	buf_attr.page_shift = is_fast ? PAGE_SHIFT :
+			      hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
+	buf_attr.region[0].size = length;
+	buf_attr.region[0].hopnum = mr->pbl_hop_num;
+	buf_attr.region_count = 1;
+	buf_attr.fixed_page = true;
+	buf_attr.user_access = access;
+	/* fast MR's buffer is alloced before mapping, not at creation */
+	buf_attr.mtt_only = is_fast;
 
-	dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0,
-			  mr->pbl_l0_dma_addr);
+	err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
+				  hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT,
+				  udata, start);
+	if (err)
+		ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
+	else
+		mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
 
-	if (mhop_num == 2) {
-		for (i = 0; i < mr->l0_chunk_last_num; i++) {
-			if (i == mr->l0_chunk_last_num - 1) {
-				npages_allocated = i * (pbl_bt_sz / 8);
+	return err;
+}
 
-				dma_free_coherent(dev,
-					      (npages - npages_allocated) * 8,
-					      mr->pbl_bt_l1[i],
-					      mr->pbl_l1_dma_addr[i]);
-
-				break;
-			}
-
-			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
-					  mr->pbl_l1_dma_addr[i]);
-		}
-	} else if (mhop_num == 3) {
-		for (i = 0; i < mr->l0_chunk_last_num; i++) {
-			dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
-					  mr->pbl_l1_dma_addr[i]);
-
-			for (j = 0; j < pbl_bt_sz / 8; j++) {
-				bt_idx = i * (pbl_bt_sz / 8) + j;
-
-				if ((i == mr->l0_chunk_last_num - 1)
-				    && j == mr->l1_chunk_last_num - 1) {
-					npages_allocated = bt_idx *
-							   (pbl_bt_sz / 8);
-
-					dma_free_coherent(dev,
-					      (npages - npages_allocated) * 8,
-					      mr->pbl_bt_l2[bt_idx],
-					      mr->pbl_l2_dma_addr[bt_idx]);
-
-					break;
-				}
-
-				dma_free_coherent(dev, pbl_bt_sz,
-						mr->pbl_bt_l2[bt_idx],
-						mr->pbl_l2_dma_addr[bt_idx]);
-			}
-		}
-	}
-
-	kfree(mr->pbl_bt_l1);
-	kfree(mr->pbl_l1_dma_addr);
-	mr->pbl_bt_l1 = NULL;
-	mr->pbl_l1_dma_addr = NULL;
-	if (mhop_num == 3) {
-		kfree(mr->pbl_bt_l2);
-		kfree(mr->pbl_l2_dma_addr);
-		mr->pbl_bt_l2 = NULL;
-		mr->pbl_l2_dma_addr = NULL;
-	}
+static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+	hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
 }
 
 static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
 			     struct hns_roce_mr *mr)
 {
-	struct device *dev = hr_dev->dev;
-	int npages = 0;
+	struct ib_device *ibdev = &hr_dev->ib_dev;
 	int ret;
 
 	if (mr->enabled) {
-		ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
-					 & (hr_dev->caps.num_mtpts - 1));
+		ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
+					      key_to_hw_index(mr->key) &
+					      (hr_dev->caps.num_mtpts - 1));
 		if (ret)
-			dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
+			ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
+				   ret);
 	}
 
-	if (mr->size != ~0ULL) {
-		npages = ib_umem_page_count(mr->umem);
-
-		if (!hr_dev->caps.pbl_hop_num)
-			dma_free_coherent(dev, (unsigned int)(npages * 8),
-					  mr->pbl_buf, mr->pbl_dma_addr);
-		else
-			hns_roce_mhop_free(hr_dev, mr);
-	}
-
-	if (mr->enabled)
-		hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
-				   key_to_hw_index(mr->key));
-
-	hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
-			     key_to_hw_index(mr->key), BITMAP_NO_RR);
+	free_mr_pbl(hr_dev, mr);
+	free_mr_key(hr_dev, mr);
 }
 
 static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
 			      struct hns_roce_mr *mr)
 {
-	int ret;
 	unsigned long mtpt_idx = key_to_hw_index(mr->key);
-	struct device *dev = hr_dev->dev;
 	struct hns_roce_cmd_mailbox *mailbox;
-	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
-
-	/* Prepare HEM entry memory */
-	ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
-	if (ret)
-		return ret;
+	struct device *dev = hr_dev->dev;
+	int ret;
 
 	/* Allocate mailbox memory */
 	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
 	if (IS_ERR(mailbox)) {
 		ret = PTR_ERR(mailbox);
-		goto err_table;
+		return ret;
 	}
 
-	ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+	if (mr->type != MR_TYPE_FRMR)
+		ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr,
+					     mtpt_idx);
+	else
+		ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
 	if (ret) {
-		dev_err(dev, "Write mtpt fail!\n");
+		dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
 		goto err_page;
 	}
 
-	ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
-				 mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+	ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
+				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
 	if (ret) {
-		dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
+		dev_err(dev, "failed to create mpt, ret = %d.\n", ret);
 		goto err_page;
 	}
 
@@ -694,111 +203,6 @@
 
 err_page:
 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
-err_table:
-	hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
-	return ret;
-}
-
-static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
-				    struct hns_roce_mtt *mtt, u32 start_index,
-				    u32 npages, u64 *page_list)
-{
-	struct hns_roce_hem_table *table;
-	dma_addr_t dma_handle;
-	__le64 *mtts;
-	u32 bt_page_size;
-	u32 i;
-
-	if (mtt->mtt_type == MTT_TYPE_WQE)
-		bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
-	else
-		bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
-
-	/* All MTTs must fit in the same page */
-	if (start_index / (bt_page_size / sizeof(u64)) !=
-		(start_index + npages - 1) / (bt_page_size / sizeof(u64)))
-		return -EINVAL;
-
-	if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
-		return -EINVAL;
-
-	if (mtt->mtt_type == MTT_TYPE_WQE)
-		table = &hr_dev->mr_table.mtt_table;
-	else
-		table = &hr_dev->mr_table.mtt_cqe_table;
-
-	mtts = hns_roce_table_find(hr_dev, table,
-				mtt->first_seg +
-				start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
-				&dma_handle);
-	if (!mtts)
-		return -ENOMEM;
-
-	/* Save page addr, low 12 bits : 0 */
-	for (i = 0; i < npages; ++i) {
-		if (!hr_dev->caps.mtt_hop_num)
-			mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT);
-		else
-			mtts[i] = cpu_to_le64(page_list[i]);
-	}
-
-	return 0;
-}
-
-static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
-			      struct hns_roce_mtt *mtt, u32 start_index,
-			      u32 npages, u64 *page_list)
-{
-	int chunk;
-	int ret;
-	u32 bt_page_size;
-
-	if (mtt->order < 0)
-		return -EINVAL;
-
-	if (mtt->mtt_type == MTT_TYPE_WQE)
-		bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
-	else
-		bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
-
-	while (npages > 0) {
-		chunk = min_t(int, bt_page_size / sizeof(u64), npages);
-
-		ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
-					       page_list);
-		if (ret)
-			return ret;
-
-		npages -= chunk;
-		start_index += chunk;
-		page_list += chunk;
-	}
-
-	return 0;
-}
-
-int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
-			   struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
-{
-	u64 *page_list;
-	int ret;
-	u32 i;
-
-	page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
-	if (!page_list)
-		return -ENOMEM;
-
-	for (i = 0; i < buf->npages; ++i) {
-		if (buf->nbufs == 1)
-			page_list[i] = buf->direct.map + (i << buf->page_shift);
-		else
-			page_list[i] = buf->page_list[i].map;
-
-	}
-	ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
-
-	kfree(page_list);
 
 	return ret;
 }
@@ -812,27 +216,6 @@
 				   hr_dev->caps.num_mtpts,
 				   hr_dev->caps.num_mtpts - 1,
 				   hr_dev->caps.reserved_mrws, 0);
-	if (ret)
-		return ret;
-
-	ret = hns_roce_buddy_init(&mr_table->mtt_buddy,
-				  ilog2(hr_dev->caps.num_mtt_segs));
-	if (ret)
-		goto err_buddy;
-
-	if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
-		ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy,
-					  ilog2(hr_dev->caps.num_cqe_segs));
-		if (ret)
-			goto err_buddy_cqe;
-	}
-	return 0;
-
-err_buddy_cqe:
-	hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
-
-err_buddy:
-	hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
 	return ret;
 }
 
@@ -840,24 +223,24 @@
 {
 	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
 
-	hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
-	if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
-		hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
 	hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
 }
 
 struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
 {
+	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
 	struct hns_roce_mr *mr;
 	int ret;
 
-	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (mr == NULL)
 		return  ERR_PTR(-ENOMEM);
 
+	mr->type = MR_TYPE_DMA;
+
 	/* Allocate memory region key */
-	ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
-				~0ULL, acc, 0, mr);
+	hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
+	ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc);
 	if (ret)
 		goto err_free;
 
@@ -866,121 +249,14 @@
 		goto err_mr;
 
 	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
-	mr->umem = NULL;
 
 	return &mr->ibmr;
-
 err_mr:
-	hns_roce_mr_free(to_hr_dev(pd->device), mr);
+	free_mr_key(hr_dev, mr);
 
 err_free:
 	kfree(mr);
 	return ERR_PTR(ret);
-}
-
-int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
-			       struct hns_roce_mtt *mtt, struct ib_umem *umem)
-{
-	struct device *dev = hr_dev->dev;
-	struct scatterlist *sg;
-	unsigned int order;
-	int i, k, entry;
-	int npage = 0;
-	int ret = 0;
-	int len;
-	u64 page_addr;
-	u64 *pages;
-	u32 bt_page_size;
-	u32 n;
-
-	order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz :
-		hr_dev->caps.cqe_ba_pg_sz;
-	bt_page_size = 1 << (order + PAGE_SHIFT);
-
-	pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
-	if (!pages)
-		return -ENOMEM;
-
-	i = n = 0;
-
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		len = sg_dma_len(sg) >> PAGE_SHIFT;
-		for (k = 0; k < len; ++k) {
-			page_addr =
-				sg_dma_address(sg) + (k << umem->page_shift);
-			if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
-				if (page_addr & ((1 << mtt->page_shift) - 1)) {
-					dev_err(dev, "page_addr 0x%llx is not page_shift %d alignment!\n",
-						page_addr, mtt->page_shift);
-					ret = -EINVAL;
-					goto out;
-				}
-				pages[i++] = page_addr;
-			}
-			npage++;
-			if (i == bt_page_size / sizeof(u64)) {
-				ret = hns_roce_write_mtt(hr_dev, mtt, n, i,
-							 pages);
-				if (ret)
-					goto out;
-				n += i;
-				i = 0;
-			}
-		}
-	}
-
-	if (i)
-		ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
-
-out:
-	free_pages((unsigned long) pages, order);
-	return ret;
-}
-
-static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
-				     struct hns_roce_mr *mr,
-				     struct ib_umem *umem)
-{
-	struct scatterlist *sg;
-	int i = 0, j = 0, k;
-	int entry;
-	int len;
-	u64 page_addr;
-	u32 pbl_bt_sz;
-
-	if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
-		return 0;
-
-	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		len = sg_dma_len(sg) >> PAGE_SHIFT;
-		for (k = 0; k < len; ++k) {
-			page_addr = sg_dma_address(sg) +
-				    (k << umem->page_shift);
-
-			if (!hr_dev->caps.pbl_hop_num) {
-				mr->pbl_buf[i++] = page_addr >> 12;
-			} else if (hr_dev->caps.pbl_hop_num == 1) {
-				mr->pbl_buf[i++] = page_addr;
-			} else {
-				if (hr_dev->caps.pbl_hop_num == 2)
-					mr->pbl_bt_l1[i][j] = page_addr;
-				else if (hr_dev->caps.pbl_hop_num == 3)
-					mr->pbl_bt_l2[i][j] = page_addr;
-
-				j++;
-				if (j >= (pbl_bt_sz / 8)) {
-					i++;
-					j = 0;
-				}
-			}
-		}
-	}
-
-	/* Memory barrier */
-	mb();
-
-	return 0;
 }
 
 struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
@@ -988,75 +264,67 @@
 				   struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
-	struct device *dev = hr_dev->dev;
 	struct hns_roce_mr *mr;
-	int bt_size;
 	int ret;
-	int n;
-	int i;
 
-	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	mr->umem = ib_umem_get(pd->uobject->context, start, length,
-			       access_flags, 0);
-	if (IS_ERR(mr->umem)) {
-		ret = PTR_ERR(mr->umem);
-		goto err_free;
-	}
-
-	n = ib_umem_page_count(mr->umem);
-
-	if (!hr_dev->caps.pbl_hop_num) {
-		if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
-			dev_err(dev,
-			     " MR len %lld err. MR is limited to 4G at most!\n",
-			     length);
-			ret = -EINVAL;
-			goto err_umem;
-		}
-	} else {
-		u64 pbl_size = 1;
-
-		bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8;
-		for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
-			pbl_size *= bt_size;
-		if (n > pbl_size) {
-			dev_err(dev,
-			    " MR len %lld err. MR page num is limited to %lld!\n",
-			    length, pbl_size);
-			ret = -EINVAL;
-			goto err_umem;
-		}
-	}
-
-	ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
-				access_flags, n, mr);
+	mr->type = MR_TYPE_MR;
+	ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length,
+			   access_flags);
 	if (ret)
-		goto err_umem;
+		goto err_alloc_mr;
 
-	ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
+	ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags);
 	if (ret)
-		goto err_mr;
+		goto err_alloc_key;
 
 	ret = hns_roce_mr_enable(hr_dev, mr);
 	if (ret)
-		goto err_mr;
+		goto err_alloc_pbl;
 
 	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
 
 	return &mr->ibmr;
 
-err_mr:
-	hns_roce_mr_free(hr_dev, mr);
-
-err_umem:
-	ib_umem_release(mr->umem);
-
-err_free:
+err_alloc_pbl:
+	free_mr_pbl(hr_dev, mr);
+err_alloc_key:
+	free_mr_key(hr_dev, mr);
+err_alloc_mr:
 	kfree(mr);
 	return ERR_PTR(ret);
+}
+
+static int rereg_mr_trans(struct ib_mr *ibmr, int flags,
+			  u64 start, u64 length,
+			  u64 virt_addr, int mr_access_flags,
+			  struct hns_roce_cmd_mailbox *mailbox,
+			  u32 pdn, struct ib_udata *udata)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_mr *mr = to_hr_mr(ibmr);
+	int ret;
+
+	free_mr_pbl(hr_dev, mr);
+	ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags);
+	if (ret) {
+		ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
+					   mr_access_flags, virt_addr,
+					   length, mailbox->buf);
+	if (ret) {
+		ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret);
+		free_mr_pbl(hr_dev, mr);
+	}
+
+	return ret;
 }
 
 int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
@@ -1064,12 +332,11 @@
 			   struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+	struct ib_device *ib_dev = &hr_dev->ib_dev;
 	struct hns_roce_mr *mr = to_hr_mr(ibmr);
 	struct hns_roce_cmd_mailbox *mailbox;
-	struct device *dev = hr_dev->dev;
 	unsigned long mtpt_idx;
 	u32 pdn = 0;
-	int npages;
 	int ret;
 
 	if (!mr->enabled)
@@ -1086,9 +353,9 @@
 	if (ret)
 		goto free_cmd_mbox;
 
-	ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx);
+	ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx);
 	if (ret)
-		dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
+		ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
 
 	mr->enabled = 0;
 
@@ -1096,73 +363,24 @@
 		pdn = to_hr_pd(pd)->pdn;
 
 	if (flags & IB_MR_REREG_TRANS) {
-		if (mr->size != ~0ULL) {
-			npages = ib_umem_page_count(mr->umem);
-
-			if (hr_dev->caps.pbl_hop_num)
-				hns_roce_mhop_free(hr_dev, mr);
-			else
-				dma_free_coherent(dev, npages * 8, mr->pbl_buf,
-						  mr->pbl_dma_addr);
-		}
-		ib_umem_release(mr->umem);
-
-		mr->umem = ib_umem_get(ibmr->uobject->context, start, length,
-				       mr_access_flags, 0);
-		if (IS_ERR(mr->umem)) {
-			ret = PTR_ERR(mr->umem);
-			mr->umem = NULL;
+		ret = rereg_mr_trans(ibmr, flags,
+				     start, length,
+				     virt_addr, mr_access_flags,
+				     mailbox, pdn, udata);
+		if (ret)
 			goto free_cmd_mbox;
-		}
-		npages = ib_umem_page_count(mr->umem);
-
-		if (hr_dev->caps.pbl_hop_num) {
-			ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
-			if (ret)
-				goto release_umem;
-		} else {
-			mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
-							 &(mr->pbl_dma_addr),
-							 GFP_KERNEL);
-			if (!mr->pbl_buf) {
-				ret = -ENOMEM;
-				goto release_umem;
-			}
-		}
-	}
-
-	ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
-					   mr_access_flags, virt_addr,
-					   length, mailbox->buf);
-	if (ret) {
-		if (flags & IB_MR_REREG_TRANS)
-			goto release_umem;
-		else
+	} else {
+		ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
+						   mr_access_flags, virt_addr,
+						   length, mailbox->buf);
+		if (ret)
 			goto free_cmd_mbox;
 	}
 
-	if (flags & IB_MR_REREG_TRANS) {
-		ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
-		if (ret) {
-			if (mr->size != ~0ULL) {
-				npages = ib_umem_page_count(mr->umem);
-
-				if (hr_dev->caps.pbl_hop_num)
-					hns_roce_mhop_free(hr_dev, mr);
-				else
-					dma_free_coherent(dev, npages * 8,
-							  mr->pbl_buf,
-							  mr->pbl_dma_addr);
-			}
-
-			goto release_umem;
-		}
-	}
-
-	ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx);
+	ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx);
 	if (ret) {
-		dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
-		goto release_umem;
+		ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret);
+		goto free_cmd_mbox;
 	}
 
 	mr->enabled = 1;
@@ -1173,31 +391,725 @@
 
 	return 0;
 
-release_umem:
-	ib_umem_release(mr->umem);
-
 free_cmd_mbox:
 	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 
 	return ret;
 }
 
-int hns_roce_dereg_mr(struct ib_mr *ibmr)
+int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
 	struct hns_roce_mr *mr = to_hr_mr(ibmr);
 	int ret = 0;
 
 	if (hr_dev->hw->dereg_mr) {
-		ret = hr_dev->hw->dereg_mr(hr_dev, mr);
+		ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata);
 	} else {
 		hns_roce_mr_free(hr_dev, mr);
-
-		if (mr->umem)
-			ib_umem_release(mr->umem);
-
 		kfree(mr);
 	}
 
 	return ret;
 }
+
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+				u32 max_num_sg)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+	struct device *dev = hr_dev->dev;
+	struct hns_roce_mr *mr;
+	u64 length;
+	int ret;
+
+	if (mr_type != IB_MR_TYPE_MEM_REG)
+		return ERR_PTR(-EINVAL);
+
+	if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
+		dev_err(dev, "max_num_sg larger than %d\n",
+			HNS_ROCE_FRMR_MAX_PA);
+		return ERR_PTR(-EINVAL);
+	}
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->type = MR_TYPE_FRMR;
+
+	/* Allocate memory region key */
+	length = max_num_sg * (1 << PAGE_SHIFT);
+	ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0);
+	if (ret)
+		goto err_free;
+
+	ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0);
+	if (ret)
+		goto err_key;
+
+	ret = hns_roce_mr_enable(hr_dev, mr);
+	if (ret)
+		goto err_pbl;
+
+	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+	mr->ibmr.length = length;
+
+	return &mr->ibmr;
+
+err_pbl:
+	free_mr_pbl(hr_dev, mr);
+err_key:
+	free_mr_key(hr_dev, mr);
+err_free:
+	kfree(mr);
+	return ERR_PTR(ret);
+}
+
+static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
+{
+	struct hns_roce_mr *mr = to_hr_mr(ibmr);
+
+	if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) {
+		mr->page_list[mr->npages++] = addr;
+		return 0;
+	}
+
+	return -ENOBUFS;
+}
+
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+		       unsigned int *sg_offset)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_mr *mr = to_hr_mr(ibmr);
+	struct hns_roce_mtr *mtr = &mr->pbl_mtr;
+	int ret = 0;
+
+	mr->npages = 0;
+	mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
+				 sizeof(dma_addr_t), GFP_KERNEL);
+	if (!mr->page_list)
+		return ret;
+
+	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+	if (ret < 1) {
+		ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
+			  mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
+		goto err_page_list;
+	}
+
+	mtr->hem_cfg.region[0].offset = 0;
+	mtr->hem_cfg.region[0].count = mr->npages;
+	mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num;
+	mtr->hem_cfg.region_count = 1;
+	ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
+	if (ret) {
+		ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
+		ret = 0;
+	} else {
+		mr->pbl_mtr.hem_cfg.buf_pg_shift = ilog2(ibmr->page_size);
+		ret = mr->npages;
+	}
+
+err_page_list:
+	kvfree(mr->page_list);
+	mr->page_list = NULL;
+
+	return ret;
+}
+
+static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
+			     struct hns_roce_mw *mw)
+{
+	struct device *dev = hr_dev->dev;
+	int ret;
+
+	if (mw->enabled) {
+		ret = hns_roce_hw_destroy_mpt(hr_dev, NULL,
+					      key_to_hw_index(mw->rkey) &
+					      (hr_dev->caps.num_mtpts - 1));
+		if (ret)
+			dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
+
+		hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
+				   key_to_hw_index(mw->rkey));
+	}
+
+	hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
+			     key_to_hw_index(mw->rkey), BITMAP_NO_RR);
+}
+
+static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
+			      struct hns_roce_mw *mw)
+{
+	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+	struct hns_roce_cmd_mailbox *mailbox;
+	struct device *dev = hr_dev->dev;
+	unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
+	int ret;
+
+	/* prepare HEM entry memory */
+	ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+	if (ret)
+		return ret;
+
+	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+	if (IS_ERR(mailbox)) {
+		ret = PTR_ERR(mailbox);
+		goto err_table;
+	}
+
+	ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
+	if (ret) {
+		dev_err(dev, "MW write mtpt fail!\n");
+		goto err_page;
+	}
+
+	ret = hns_roce_hw_create_mpt(hr_dev, mailbox,
+				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+	if (ret) {
+		dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
+		goto err_page;
+	}
+
+	mw->enabled = 1;
+
+	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+	return 0;
+
+err_page:
+	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+err_table:
+	hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+
+	return ret;
+}
+
+int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+	struct hns_roce_mw *mw = to_hr_mw(ibmw);
+	unsigned long index = 0;
+	int ret;
+
+	/* Allocate a key for mw from bitmap */
+	ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
+	if (ret)
+		return ret;
+
+	mw->rkey = hw_index_to_key(index);
+
+	ibmw->rkey = mw->rkey;
+	mw->pdn = to_hr_pd(ibmw->pd)->pdn;
+	mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
+	mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
+	mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
+
+	ret = hns_roce_mw_enable(hr_dev, mw);
+	if (ret)
+		goto err_mw;
+
+	return 0;
+
+err_mw:
+	hns_roce_mw_free(hr_dev, mw);
+	return ret;
+}
+
+int hns_roce_dealloc_mw(struct ib_mw *ibmw)
+{
+	struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
+	struct hns_roce_mw *mw = to_hr_mw(ibmw);
+
+	hns_roce_mw_free(hr_dev, mw);
+	return 0;
+}
+
+static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+			  dma_addr_t *pages, struct hns_roce_buf_region *region)
+{
+	__le64 *mtts;
+	int offset;
+	int count;
+	int npage;
+	u64 addr;
+	int end;
+	int i;
+
+	/* if hopnum is 0, buffer cannot store BAs, so skip write mtt */
+	if (!region->hopnum)
+		return 0;
+
+	offset = region->offset;
+	end = offset + region->count;
+	npage = 0;
+	while (offset < end) {
+		mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+						  offset, &count, NULL);
+		if (!mtts)
+			return -ENOBUFS;
+
+		for (i = 0; i < count; i++) {
+			if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
+				addr = to_hr_hw_page_addr(pages[npage]);
+			else
+				addr = pages[npage];
+
+			mtts[i] = cpu_to_le64(addr);
+			npage++;
+		}
+		offset += count;
+	}
+
+	return 0;
+}
+
+static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
+{
+	int i;
+
+	for (i = 0; i < attr->region_count; i++)
+		if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 &&
+		    attr->region[i].hopnum > 0)
+			return true;
+
+	/* because the mtr only one root base address, when hopnum is 0 means
+	 * root base address equals the first buffer address, thus all alloced
+	 * memory must in a continuous space accessed by direct mode.
+	 */
+	return false;
+}
+
+static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
+{
+	size_t size = 0;
+	int i;
+
+	for (i = 0; i < attr->region_count; i++)
+		size += attr->region[i].size;
+
+	return size;
+}
+
+static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size,
+					  unsigned int page_shift)
+{
+	if (is_direct)
+		return ALIGN(alloc_size, 1 << page_shift);
+	else
+		return HNS_HW_DIRECT_PAGE_COUNT << page_shift;
+}
+
+/*
+ * check the given pages in continuous address space
+ * Returns 0 on success, or the error page num.
+ */
+static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count,
+					 unsigned int page_shift)
+{
+	size_t page_size = 1 << page_shift;
+	int i;
+
+	for (i = 1; i < page_count; i++)
+		if (pages[i] - pages[i - 1] != page_size)
+			return i;
+
+	return 0;
+}
+
+static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+	/* release user buffers */
+	if (mtr->umem) {
+		ib_umem_release(mtr->umem);
+		mtr->umem = NULL;
+	}
+
+	/* release kernel buffers */
+	if (mtr->kmem) {
+		hns_roce_buf_free(hr_dev, mtr->kmem);
+		kfree(mtr->kmem);
+		mtr->kmem = NULL;
+	}
+}
+
+static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+			  struct hns_roce_buf_attr *buf_attr, bool is_direct,
+			  struct ib_udata *udata, unsigned long user_addr)
+{
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	unsigned int best_pg_shift;
+	int all_pg_count = 0;
+	size_t direct_size;
+	size_t total_size;
+	int ret;
+
+	total_size = mtr_bufs_size(buf_attr);
+	if (total_size < 1) {
+		ibdev_err(ibdev, "Failed to check mtr size\n");
+		return -EINVAL;
+	}
+
+	if (udata) {
+		unsigned long pgsz_bitmap;
+		unsigned long page_size;
+
+		mtr->kmem = NULL;
+		mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
+					buf_attr->user_access);
+		if (IS_ERR_OR_NULL(mtr->umem)) {
+			ibdev_err(ibdev, "Failed to get umem, ret %ld\n",
+				  PTR_ERR(mtr->umem));
+			return -ENOMEM;
+		}
+		if (buf_attr->fixed_page)
+			pgsz_bitmap = 1 << buf_attr->page_shift;
+		else
+			pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT);
+
+		page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap,
+						   user_addr);
+		if (!page_size)
+			return -EINVAL;
+		best_pg_shift = order_base_2(page_size);
+		all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size);
+		ret = 0;
+	} else {
+		mtr->umem = NULL;
+		mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL);
+		if (!mtr->kmem) {
+			ibdev_err(ibdev, "Failed to alloc kmem\n");
+			return -ENOMEM;
+		}
+		direct_size = mtr_kmem_direct_size(is_direct, total_size,
+						   buf_attr->page_shift);
+		ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size,
+					 mtr->kmem, buf_attr->page_shift);
+		if (ret) {
+			ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret);
+			goto err_alloc_mem;
+		}
+		best_pg_shift = buf_attr->page_shift;
+		all_pg_count = mtr->kmem->npages;
+	}
+
+	/* must bigger than minimum hardware page shift */
+	if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) {
+		ret = -EINVAL;
+		ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n",
+			  best_pg_shift, all_pg_count);
+		goto err_alloc_mem;
+	}
+
+	mtr->hem_cfg.buf_pg_shift = best_pg_shift;
+	mtr->hem_cfg.buf_pg_count = all_pg_count;
+
+	return 0;
+err_alloc_mem:
+	mtr_free_bufs(hr_dev, mtr);
+	return ret;
+}
+
+static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+			 dma_addr_t *pages, int count, unsigned int page_shift)
+{
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	int npage;
+	int err;
+
+	if (mtr->umem)
+		npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0,
+					       mtr->umem, page_shift);
+	else
+		npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0,
+					       mtr->kmem);
+
+	if (mtr->hem_cfg.is_direct && npage > 1) {
+		err = mtr_check_direct_pages(pages, npage, page_shift);
+		if (err) {
+			ibdev_err(ibdev, "Failed to check %s direct page-%d\n",
+				  mtr->umem ? "user" : "kernel", err);
+			npage = err;
+		}
+	}
+
+	return npage;
+}
+
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+		     dma_addr_t *pages, int page_cnt)
+{
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_buf_region *r;
+	int err;
+	int i;
+
+	/*
+	 * Only use the first page address as root ba when hopnum is 0, this
+	 * is because the addresses of all pages are consecutive in this case.
+	 */
+	if (mtr->hem_cfg.is_direct) {
+		mtr->hem_cfg.root_ba = pages[0];
+		return 0;
+	}
+
+	for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+		r = &mtr->hem_cfg.region[i];
+		if (r->offset + r->count > page_cnt) {
+			err = -EINVAL;
+			ibdev_err(ibdev,
+				  "failed to check mtr%u end %u + %u, max %u.\n",
+				  i, r->offset, r->count, page_cnt);
+			return err;
+		}
+
+		err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r);
+		if (err) {
+			ibdev_err(ibdev,
+				  "failed to map mtr%u offset %u, ret = %d.\n",
+				  i, r->offset, err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+		      int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
+{
+	struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+	int start_index;
+	int mtt_count;
+	int total = 0;
+	__le64 *mtts;
+	int npage;
+	u64 addr;
+	int left;
+
+	if (!mtt_buf || mtt_max < 1)
+		goto done;
+
+	/* no mtt memory in direct mode, so just return the buffer address */
+	if (cfg->is_direct) {
+		start_index = offset >> HNS_HW_PAGE_SHIFT;
+		for (mtt_count = 0; mtt_count < cfg->region_count &&
+		     total < mtt_max; mtt_count++) {
+			npage = cfg->region[mtt_count].offset;
+			if (npage < start_index)
+				continue;
+
+			addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
+			if (hr_dev->hw_rev == HNS_ROCE_HW_VER1)
+				mtt_buf[total] = to_hr_hw_page_addr(addr);
+			else
+				mtt_buf[total] = addr;
+
+			total++;
+		}
+
+		goto done;
+	}
+
+	start_index = offset >> cfg->buf_pg_shift;
+	left = mtt_max;
+	while (left > 0) {
+		mtt_count = 0;
+		mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+						  start_index + total,
+						  &mtt_count, NULL);
+		if (!mtts || !mtt_count)
+			goto done;
+
+		npage = min(mtt_count, left);
+		left -= npage;
+		for (mtt_count = 0; mtt_count < npage; mtt_count++)
+			mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
+	}
+
+done:
+	if (base_addr)
+		*base_addr = cfg->root_ba;
+
+	return total;
+}
+
+static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
+			    struct hns_roce_buf_attr *attr,
+			    struct hns_roce_hem_cfg *cfg,
+			    unsigned int *buf_page_shift)
+{
+	struct hns_roce_buf_region *r;
+	unsigned int page_shift;
+	int page_cnt = 0;
+	size_t buf_size;
+	int region_cnt;
+
+	if (cfg->is_direct) {
+		buf_size = cfg->buf_pg_count << cfg->buf_pg_shift;
+		page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE);
+		/*
+		 * When HEM buffer use level-0 addressing, the page size equals
+		 * the buffer size, and the the page size = 4K * 2^N.
+		 */
+		cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt);
+		if (attr->region_count > 1) {
+			cfg->buf_pg_count = page_cnt;
+			page_shift = HNS_HW_PAGE_SHIFT;
+		} else {
+			cfg->buf_pg_count = 1;
+			page_shift = cfg->buf_pg_shift;
+			if (buf_size != 1 << page_shift) {
+				ibdev_err(&hr_dev->ib_dev,
+					  "failed to check direct size %zu shift %d.\n",
+					  buf_size, page_shift);
+				return -EINVAL;
+			}
+		}
+	} else {
+		page_shift = cfg->buf_pg_shift;
+	}
+
+	/* convert buffer size to page index and page count */
+	for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count &&
+	     region_cnt < attr->region_count &&
+	     region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
+		r = &cfg->region[region_cnt];
+		r->offset = page_cnt;
+		buf_size = hr_hw_page_align(attr->region[region_cnt].size);
+		r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
+		page_cnt += r->count;
+		r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
+					     r->count);
+	}
+
+	if (region_cnt < 1) {
+		ibdev_err(&hr_dev->ib_dev,
+			  "failed to check mtr region count, pages = %d.\n",
+			  cfg->buf_pg_count);
+		return -ENOBUFS;
+	}
+
+	cfg->region_count = region_cnt;
+	*buf_page_shift = page_shift;
+
+	return page_cnt;
+}
+
+/**
+ * hns_roce_mtr_create - Create hns memory translate region.
+ *
+ * @mtr: memory translate region
+ * @buf_attr: buffer attribute for creating mtr
+ * @ba_page_shift: page shift for multi-hop base address table
+ * @udata: user space context, if it's NULL, means kernel space
+ * @user_addr: userspace virtual address to start at
+ */
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+			struct hns_roce_buf_attr *buf_attr,
+			unsigned int ba_page_shift, struct ib_udata *udata,
+			unsigned long user_addr)
+{
+	struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	unsigned int buf_page_shift = 0;
+	dma_addr_t *pages = NULL;
+	int all_pg_cnt;
+	int get_pg_cnt;
+	int ret = 0;
+
+	/* if disable mtt, all pages must in a continuous address range */
+	cfg->is_direct = !mtr_has_mtt(buf_attr);
+
+	/* if buffer only need mtt, just init the hem cfg */
+	if (buf_attr->mtt_only) {
+		cfg->buf_pg_shift = buf_attr->page_shift;
+		cfg->buf_pg_count = mtr_bufs_size(buf_attr) >>
+				    buf_attr->page_shift;
+		mtr->umem = NULL;
+		mtr->kmem = NULL;
+	} else {
+		ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct,
+				     udata, user_addr);
+		if (ret) {
+			ibdev_err(ibdev,
+				  "failed to alloc mtr bufs, ret = %d.\n", ret);
+			return ret;
+		}
+	}
+
+	all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift);
+	if (all_pg_cnt < 1) {
+		ret = -ENOBUFS;
+		ibdev_err(ibdev, "failed to init mtr buf cfg.\n");
+		goto err_alloc_bufs;
+	}
+
+	hns_roce_hem_list_init(&mtr->hem_list);
+	if (!cfg->is_direct) {
+		ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+						cfg->region, cfg->region_count,
+						ba_page_shift);
+		if (ret) {
+			ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n",
+				  ret);
+			goto err_alloc_bufs;
+		}
+		cfg->root_ba = mtr->hem_list.root_ba;
+		cfg->ba_pg_shift = ba_page_shift;
+	} else {
+		cfg->ba_pg_shift = cfg->buf_pg_shift;
+	}
+
+	/* no buffer to map */
+	if (buf_attr->mtt_only)
+		return 0;
+
+	/* alloc a tmp array to store buffer's dma address */
+	pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL);
+	if (!pages) {
+		ret = -ENOMEM;
+		ibdev_err(ibdev, "failed to alloc mtr page list %d.\n",
+			  all_pg_cnt);
+		goto err_alloc_hem_list;
+	}
+
+	get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt,
+				   buf_page_shift);
+	if (get_pg_cnt != all_pg_cnt) {
+		ibdev_err(ibdev, "failed to get mtr page %d != %d.\n",
+			  get_pg_cnt, all_pg_cnt);
+		ret = -ENOBUFS;
+		goto err_alloc_page_list;
+	}
+
+	/* write buffer's dma address to BA table */
+	ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt);
+	if (ret) {
+		ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
+		goto err_alloc_page_list;
+	}
+
+	/* drop tmp array */
+	kvfree(pages);
+	return 0;
+err_alloc_page_list:
+	kvfree(pages);
+err_alloc_hem_list:
+	hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+err_alloc_bufs:
+	mtr_free_bufs(hr_dev, mtr);
+	return ret;
+}
+
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+	/* release multi-hop addressing resource */
+	hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+
+	/* free buffers */
+	mtr_free_bufs(hr_dev, mtr);
+}

--
Gitblit v1.6.2