Blob Blame History Raw
From: Changbin Du <changbin.du@intel.com>
Date: Tue, 30 Jan 2018 19:19:40 +0800
Subject: drm/i915/gvt: Rework shadow graphic memory management code
Git-commit: ede9d0cfcb789b6fd86ecb71b4721a19c53956e6
Patch-mainline: v4.17-rc1
References: FATE#326289 FATE#326079 FATE#326049 FATE#322398 FATE#326166

This is a big one and the GVT shadow graphic memory management code is
heavily refined. The new code is more straightforward with less code.

The struct intel_vgpu_mm is restructured to be clearly defined, use
accurate names and some of the original fields are removed which are
really redundant.

Now we only manage ppgtt mm object with mm->ppgtt_mm.lru_list. No need
to mix ppgtt and ggtt together, since one vGPU only has one ggtt object.

v4: Don't invoke ppgtt_free_all_shadow_page before intel_vgpu_destroy_all_ppgtt_mm.
v3: Add GVT_RING_CTX_NR_PDPS to avoid confusing about the PDPs.
v2: Split some changes into small standalone patches.

Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Acked-by: Petr Tesarik <ptesarik@suse.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c       |  520 +++++++++++++++--------------------
 drivers/gpu/drm/i915/gvt/gtt.h       |   82 ++---
 drivers/gpu/drm/i915/gvt/handlers.c  |   15 -
 drivers/gpu/drm/i915/gvt/mmio.c      |    5 
 drivers/gpu/drm/i915/gvt/scheduler.c |   27 +
 drivers/gpu/drm/i915/gvt/trace.h     |    8 
 6 files changed, 300 insertions(+), 357 deletions(-)

--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -481,7 +481,10 @@ int intel_vgpu_mm_get_entry(struct intel
 	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
 	int ret;
 
-	e->type = mm->page_table_entry_type;
+	if (mm->type == INTEL_GVT_MM_PPGTT)
+		e->type = mm->ppgtt_mm.root_entry_type;
+	else
+		e->type = GTT_TYPE_GGTT_PTE;
 
 	ret = ops->get_entry(page_table, e, index, false, 0, mm->vgpu);
 	if (ret)
@@ -782,7 +785,7 @@ static int ppgtt_write_protection_handle
 	return ret;
 }
 
-static int reclaim_one_mm(struct intel_gvt *gvt);
+static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
 
 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_shadow_page(
 		struct intel_vgpu *vgpu, int type, unsigned long gfn)
@@ -793,7 +796,7 @@ static struct intel_vgpu_ppgtt_spt *ppgt
 retry:
 	spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
 	if (!spt) {
-		if (reclaim_one_mm(vgpu->gvt))
+		if (reclaim_one_ppgtt_mm(vgpu->gvt))
 			goto retry;
 
 		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
@@ -1445,111 +1448,37 @@ static int ppgtt_handle_guest_write_page
 	return 0;
 }
 
-/*
- * mm page table allocation policy for bdw+
- *  - for ggtt, only virtual page table will be allocated.
- *  - for ppgtt, dedicated virtual/shadow page table will be allocated.
- */
-static int gen8_mm_alloc_page_table(struct intel_vgpu_mm *mm)
-{
-	struct intel_vgpu *vgpu = mm->vgpu;
-	struct intel_gvt *gvt = vgpu->gvt;
-	const struct intel_gvt_device_info *info = &gvt->device_info;
-	void *mem;
-
-	if (mm->type == INTEL_GVT_MM_PPGTT) {
-		mm->page_table_entry_cnt = 4;
-		mm->page_table_entry_size = mm->page_table_entry_cnt *
-			info->gtt_entry_size;
-		mem = kzalloc(mm->has_shadow_page_table ?
-			mm->page_table_entry_size * 2
-				: mm->page_table_entry_size, GFP_KERNEL);
-		if (!mem)
-			return -ENOMEM;
-		mm->virtual_page_table = mem;
-		if (!mm->has_shadow_page_table)
-			return 0;
-		mm->shadow_page_table = mem + mm->page_table_entry_size;
-	} else if (mm->type == INTEL_GVT_MM_GGTT) {
-		mm->page_table_entry_cnt =
-			(gvt_ggtt_gm_sz(gvt) >> I915_GTT_PAGE_SHIFT);
-		mm->page_table_entry_size = mm->page_table_entry_cnt *
-			info->gtt_entry_size;
-		mem = vzalloc(mm->page_table_entry_size);
-		if (!mem)
-			return -ENOMEM;
-		mm->virtual_page_table = mem;
-	}
-	return 0;
-}
-
-static void gen8_mm_free_page_table(struct intel_vgpu_mm *mm)
-{
-	if (mm->type == INTEL_GVT_MM_PPGTT) {
-		kfree(mm->virtual_page_table);
-	} else if (mm->type == INTEL_GVT_MM_GGTT) {
-		if (mm->virtual_page_table)
-			vfree(mm->virtual_page_table);
-	}
-	mm->virtual_page_table = mm->shadow_page_table = NULL;
-}
-
-static void invalidate_mm(struct intel_vgpu_mm *mm)
+static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
 {
 	struct intel_vgpu *vgpu = mm->vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_gvt_gtt *gtt = &gvt->gtt;
 	struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
 	struct intel_gvt_gtt_entry se;
-	int i;
+	int index;
 
-	if (WARN_ON(!mm->has_shadow_page_table || !mm->shadowed))
+	if (!mm->ppgtt_mm.shadowed)
 		return;
 
-	for (i = 0; i < mm->page_table_entry_cnt; i++) {
-		ppgtt_get_shadow_root_entry(mm, &se, i);
+	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
+		ppgtt_get_shadow_root_entry(mm, &se, index);
+
 		if (!ops->test_present(&se))
 			continue;
-		ppgtt_invalidate_shadow_page_by_shadow_entry(
-				vgpu, &se);
+
+		ppgtt_invalidate_shadow_page_by_shadow_entry(vgpu, &se);
 		se.val64 = 0;
-		ppgtt_set_shadow_root_entry(mm, &se, i);
+		ppgtt_set_shadow_root_entry(mm, &se, index);
 
 		trace_gpt_change(vgpu->id, "destroy root pointer",
-				NULL, se.type, se.val64, i);
+				 NULL, se.type, se.val64, index);
 	}
-	mm->shadowed = false;
-}
 
-/**
- * intel_vgpu_destroy_mm - destroy a mm object
- * @mm: a kref object
- *
- * This function is used to destroy a mm object for vGPU
- *
- */
-void intel_vgpu_destroy_mm(struct kref *mm_ref)
-{
-	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
-	struct intel_vgpu *vgpu = mm->vgpu;
-	struct intel_gvt *gvt = vgpu->gvt;
-	struct intel_gvt_gtt *gtt = &gvt->gtt;
-
-	if (!mm->initialized)
-		goto out;
-
-	list_del(&mm->list);
-	list_del(&mm->lru_list);
-
-	if (mm->has_shadow_page_table)
-		invalidate_mm(mm);
-
-	gtt->mm_free_page_table(mm);
-out:
-	kfree(mm);
+	mm->ppgtt_mm.shadowed = false;
 }
 
-static int shadow_mm(struct intel_vgpu_mm *mm)
+
+static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
 {
 	struct intel_vgpu *vgpu = mm->vgpu;
 	struct intel_gvt *gvt = vgpu->gvt;
@@ -1557,21 +1486,21 @@ static int shadow_mm(struct intel_vgpu_m
 	struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
 	struct intel_vgpu_ppgtt_spt *spt;
 	struct intel_gvt_gtt_entry ge, se;
-	int i;
-	int ret;
+	int index, ret;
 
-	if (WARN_ON(!mm->has_shadow_page_table || mm->shadowed))
+	if (mm->ppgtt_mm.shadowed)
 		return 0;
 
-	mm->shadowed = true;
+	mm->ppgtt_mm.shadowed = true;
+
+	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
+		ppgtt_get_guest_root_entry(mm, &ge, index);
 
-	for (i = 0; i < mm->page_table_entry_cnt; i++) {
-		ppgtt_get_guest_root_entry(mm, &ge, i);
 		if (!ops->test_present(&ge))
 			continue;
 
 		trace_gpt_change(vgpu->id, __func__, NULL,
-				ge.type, ge.val64, i);
+				ge.type, ge.val64, index);
 
 		spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge);
 		if (IS_ERR(spt)) {
@@ -1580,96 +1509,132 @@ static int shadow_mm(struct intel_vgpu_m
 			goto fail;
 		}
 		ppgtt_generate_shadow_entry(&se, spt, &ge);
-		ppgtt_set_shadow_root_entry(mm, &se, i);
+		ppgtt_set_shadow_root_entry(mm, &se, index);
 
 		trace_gpt_change(vgpu->id, "populate root pointer",
-				NULL, se.type, se.val64, i);
+				NULL, se.type, se.val64, index);
 	}
+
 	return 0;
 fail:
-	invalidate_mm(mm);
+	invalidate_ppgtt_mm(mm);
 	return ret;
 }
 
+static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_mm *mm;
+
+	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
+	if (!mm)
+		return NULL;
+
+	mm->vgpu = vgpu;
+	kref_init(&mm->ref);
+	atomic_set(&mm->pincount, 0);
+
+	return mm;
+}
+
+static void vgpu_free_mm(struct intel_vgpu_mm *mm)
+{
+	kfree(mm);
+}
+
 /**
- * intel_vgpu_create_mm - create a mm object for a vGPU
+ * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
  * @vgpu: a vGPU
- * @mm_type: mm object type, should be PPGTT or GGTT
- * @virtual_page_table: page table root pointers. Could be NULL if user wants
- *	to populate shadow later.
- * @page_table_level: describe the page table level of the mm object
- * @pde_base_index: pde root pointer base in GGTT MMIO.
+ * @root_entry_type: ppgtt root entry type
+ * @pdps: guest pdps.
  *
- * This function is used to create a mm object for a vGPU.
+ * This function is used to create a ppgtt mm object for a vGPU.
  *
  * Returns:
  * Zero on success, negative error code in pointer if failed.
  */
-struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
-		int mm_type, void *virtual_page_table, int page_table_level,
-		u32 pde_base_index)
+struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
+		intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
 {
 	struct intel_gvt *gvt = vgpu->gvt;
-	struct intel_gvt_gtt *gtt = &gvt->gtt;
 	struct intel_vgpu_mm *mm;
 	int ret;
 
-	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
-	if (!mm) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	mm = vgpu_alloc_mm(vgpu);
+	if (!mm)
+		return ERR_PTR(-ENOMEM);
 
-	mm->type = mm_type;
+	mm->type = INTEL_GVT_MM_PPGTT;
 
-	if (page_table_level == 1)
-		mm->page_table_entry_type = GTT_TYPE_GGTT_PTE;
-	else if (page_table_level == 3)
-		mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
-	else if (page_table_level == 4)
-		mm->page_table_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
-	else {
-		WARN_ON(1);
-		ret = -EINVAL;
-		goto fail;
-	}
+	GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
+		   root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
+	mm->ppgtt_mm.root_entry_type = root_entry_type;
 
-	mm->page_table_level = page_table_level;
-	mm->pde_base_index = pde_base_index;
+	INIT_LIST_HEAD(&mm->ppgtt_mm.list);
+	INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
 
-	mm->vgpu = vgpu;
-	mm->has_shadow_page_table = !!(mm_type == INTEL_GVT_MM_PPGTT);
-
-	kref_init(&mm->ref);
-	atomic_set(&mm->pincount, 0);
-	INIT_LIST_HEAD(&mm->list);
-	INIT_LIST_HEAD(&mm->lru_list);
-	list_add_tail(&mm->list, &vgpu->gtt.mm_list_head);
+	if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
+		mm->ppgtt_mm.guest_pdps[0] = pdps[0];
+	else
+		memcpy(mm->ppgtt_mm.guest_pdps, pdps,
+		       sizeof(mm->ppgtt_mm.guest_pdps));
 
-	ret = gtt->mm_alloc_page_table(mm);
+	ret = shadow_ppgtt_mm(mm);
 	if (ret) {
-		gvt_vgpu_err("fail to allocate page table for mm\n");
-		goto fail;
+		gvt_vgpu_err("failed to shadow ppgtt mm\n");
+		vgpu_free_mm(mm);
+		return ERR_PTR(ret);
 	}
 
-	mm->initialized = true;
+	list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
+	list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
+	return mm;
+}
 
-	if (virtual_page_table)
-		memcpy(mm->virtual_page_table, virtual_page_table,
-				mm->page_table_entry_size);
+static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_mm *mm;
+	unsigned long nr_entries;
 
-	if (mm->has_shadow_page_table) {
-		ret = shadow_mm(mm);
-		if (ret)
-			goto fail;
-		list_add_tail(&mm->lru_list, &gvt->gtt.mm_lru_list_head);
+	mm = vgpu_alloc_mm(vgpu);
+	if (!mm)
+		return ERR_PTR(-ENOMEM);
+
+	mm->type = INTEL_GVT_MM_GGTT;
+
+	nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
+	mm->ggtt_mm.virtual_ggtt = vzalloc(nr_entries *
+					vgpu->gvt->device_info.gtt_entry_size);
+	if (!mm->ggtt_mm.virtual_ggtt) {
+		vgpu_free_mm(mm);
+		return ERR_PTR(-ENOMEM);
 	}
+
 	return mm;
-fail:
-	gvt_vgpu_err("fail to create mm\n");
-	if (mm)
-		intel_gvt_mm_unreference(mm);
-	return ERR_PTR(ret);
+}
+
+/**
+ * intel_vgpu_destroy_mm - destroy a mm object
+ * @mm_ref: a kref object
+ *
+ * This function is used to destroy a mm object for vGPU
+ *
+ */
+void intel_vgpu_destroy_mm(struct kref *mm_ref)
+{
+	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
+
+	if (GEM_WARN_ON(atomic_read(&mm->pincount)))
+		gvt_err("vgpu mm pin count bug detected\n");
+
+	if (mm->type == INTEL_GVT_MM_PPGTT) {
+		list_del(&mm->ppgtt_mm.list);
+		list_del(&mm->ppgtt_mm.lru_list);
+		invalidate_ppgtt_mm(mm);
+	} else {
+		vfree(mm->ggtt_mm.virtual_ggtt);
+	}
+
+	vgpu_free_mm(mm);
 }
 
 /**
@@ -1680,9 +1645,6 @@ fail:
  */
 void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
 {
-	if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT))
-		return;
-
 	atomic_dec(&mm->pincount);
 }
 
@@ -1701,36 +1663,34 @@ int intel_vgpu_pin_mm(struct intel_vgpu_
 {
 	int ret;
 
-	if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT))
-		return 0;
+	atomic_inc(&mm->pincount);
 
-	if (!mm->shadowed) {
-		ret = shadow_mm(mm);
+	if (mm->type == INTEL_GVT_MM_PPGTT) {
+		ret = shadow_ppgtt_mm(mm);
 		if (ret)
 			return ret;
+
+		list_move_tail(&mm->ppgtt_mm.lru_list,
+			       &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
+
 	}
 
-	atomic_inc(&mm->pincount);
-	list_del_init(&mm->lru_list);
-	list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head);
 	return 0;
 }
 
-static int reclaim_one_mm(struct intel_gvt *gvt)
+static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
 {
 	struct intel_vgpu_mm *mm;
 	struct list_head *pos, *n;
 
-	list_for_each_safe(pos, n, &gvt->gtt.mm_lru_list_head) {
-		mm = container_of(pos, struct intel_vgpu_mm, lru_list);
+	list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
 
-		if (mm->type != INTEL_GVT_MM_PPGTT)
-			continue;
 		if (atomic_read(&mm->pincount))
 			continue;
 
-		list_del_init(&mm->lru_list);
-		invalidate_mm(mm);
+		list_del_init(&mm->ppgtt_mm.lru_list);
+		invalidate_ppgtt_mm(mm);
 		return 1;
 	}
 	return 0;
@@ -1746,9 +1706,6 @@ static inline int ppgtt_get_next_level_e
 	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 	struct intel_vgpu_ppgtt_spt *s;
 
-	if (WARN_ON(!mm->has_shadow_page_table))
-		return -EINVAL;
-
 	s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e));
 	if (!s)
 		return -ENXIO;
@@ -1780,78 +1737,65 @@ unsigned long intel_vgpu_gma_to_gpa(stru
 	unsigned long gpa = INTEL_GVT_INVALID_ADDR;
 	unsigned long gma_index[4];
 	struct intel_gvt_gtt_entry e;
-	int i, index;
+	int i, levels = 0;
 	int ret;
 
-	if (mm->type != INTEL_GVT_MM_GGTT && mm->type != INTEL_GVT_MM_PPGTT)
-		return INTEL_GVT_INVALID_ADDR;
+	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
+		   mm->type != INTEL_GVT_MM_PPGTT);
 
 	if (mm->type == INTEL_GVT_MM_GGTT) {
 		if (!vgpu_gmadr_is_valid(vgpu, gma))
 			goto err;
 
-		ret = ggtt_get_guest_entry(mm, &e,
-				gma_ops->gma_to_ggtt_pte_index(gma));
-		if (ret)
-			goto err;
+		ggtt_get_guest_entry(mm, &e,
+			gma_ops->gma_to_ggtt_pte_index(gma));
+
 		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
 			+ (gma & ~I915_GTT_PAGE_MASK);
 
 		trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
-		return gpa;
-	}
-
-	switch (mm->page_table_level) {
-	case 4:
-		ret = ppgtt_get_shadow_root_entry(mm, &e, 0);
-		if (ret)
-			goto err;
-		gma_index[0] = gma_ops->gma_to_pml4_index(gma);
-		gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
-		gma_index[2] = gma_ops->gma_to_pde_index(gma);
-		gma_index[3] = gma_ops->gma_to_pte_index(gma);
-		index = 4;
-		break;
-	case 3:
-		ret = ppgtt_get_shadow_root_entry(mm, &e,
-				gma_ops->gma_to_l3_pdp_index(gma));
-		if (ret)
-			goto err;
-		gma_index[0] = gma_ops->gma_to_pde_index(gma);
-		gma_index[1] = gma_ops->gma_to_pte_index(gma);
-		index = 2;
-		break;
-	case 2:
-		ret = ppgtt_get_shadow_root_entry(mm, &e,
-				gma_ops->gma_to_pde_index(gma));
-		if (ret)
-			goto err;
-		gma_index[0] = gma_ops->gma_to_pte_index(gma);
-		index = 1;
-		break;
-	default:
-		WARN_ON(1);
-		goto err;
-	}
+	} else {
+		switch (mm->ppgtt_mm.root_entry_type) {
+		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
+			ppgtt_get_shadow_root_entry(mm, &e, 0);
+
+			gma_index[0] = gma_ops->gma_to_pml4_index(gma);
+			gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
+			gma_index[2] = gma_ops->gma_to_pde_index(gma);
+			gma_index[3] = gma_ops->gma_to_pte_index(gma);
+			levels = 4;
+			break;
+		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
+			ppgtt_get_shadow_root_entry(mm, &e,
+					gma_ops->gma_to_l3_pdp_index(gma));
+
+			gma_index[0] = gma_ops->gma_to_pde_index(gma);
+			gma_index[1] = gma_ops->gma_to_pte_index(gma);
+			levels = 2;
+			break;
+		default:
+			GEM_BUG_ON(1);
+		}
 
-	/* walk into the shadow page table and get gpa from guest entry */
-	for (i = 0; i < index; i++) {
-		ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
-			(i == index - 1));
-		if (ret)
-			goto err;
+		/* walk the shadow page table and get gpa from guest entry */
+		for (i = 0; i < levels; i++) {
+			ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
+				(i == levels - 1));
+			if (ret)
+				goto err;
 
-		if (!pte_ops->test_present(&e)) {
-			gvt_dbg_core("GMA 0x%lx is not present\n", gma);
-			goto err;
+			if (!pte_ops->test_present(&e)) {
+				gvt_dbg_core("GMA 0x%lx is not present\n", gma);
+				goto err;
+			}
 		}
-	}
 
-	gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
-		+ (gma & ~I915_GTT_PAGE_MASK);
+		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
+					(gma & ~I915_GTT_PAGE_MASK);
+		trace_gma_translate(vgpu->id, "ppgtt", 0,
+				    mm->ppgtt_mm.root_entry_type, gma, gpa);
+	}
 
-	trace_gma_translate(vgpu->id, "ppgtt", 0,
-			mm->page_table_level, gma, gpa);
 	return gpa;
 err:
 	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
@@ -2131,43 +2075,48 @@ err:
 int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
 {
 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
-	struct intel_vgpu_mm *ggtt_mm;
 
 	hash_init(gtt->tracked_guest_page_hash_table);
 	hash_init(gtt->shadow_page_hash_table);
 
-	INIT_LIST_HEAD(&gtt->mm_list_head);
+	INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
 	INIT_LIST_HEAD(&gtt->oos_page_list_head);
 	INIT_LIST_HEAD(&gtt->post_shadow_list_head);
 
-	intel_vgpu_reset_ggtt(vgpu);
-
-	ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT,
-			NULL, 1, 0);
-	if (IS_ERR(ggtt_mm)) {
+	gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
+	if (IS_ERR(gtt->ggtt_mm)) {
 		gvt_vgpu_err("fail to create mm for ggtt.\n");
-		return PTR_ERR(ggtt_mm);
+		return PTR_ERR(gtt->ggtt_mm);
 	}
 
-	gtt->ggtt_mm = ggtt_mm;
+	intel_vgpu_reset_ggtt(vgpu);
 
 	return create_scratch_page_tree(vgpu);
 }
 
-static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type)
+static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
 {
 	struct list_head *pos, *n;
 	struct intel_vgpu_mm *mm;
 
-	list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) {
-		mm = container_of(pos, struct intel_vgpu_mm, list);
-		if (mm->type == type) {
-			vgpu->gvt->gtt.mm_free_page_table(mm);
-			list_del(&mm->list);
-			list_del(&mm->lru_list);
-			kfree(mm);
-		}
+	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
+		intel_vgpu_destroy_mm(&mm->ref);
 	}
+
+	if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
+		gvt_err("vgpu ppgtt mm is not fully destoried\n");
+
+	if (GEM_WARN_ON(!hlist_empty(vgpu->gtt.shadow_page_hash_table))) {
+		gvt_err("Why we still has spt not freed?\n");
+		ppgtt_free_all_shadow_page(vgpu);
+	}
+}
+
+static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
+{
+	intel_vgpu_destroy_mm(&vgpu->gtt.ggtt_mm->ref);
+	vgpu->gtt.ggtt_mm = NULL;
 }
 
 /**
@@ -2182,11 +2131,9 @@ static void intel_vgpu_free_mm(struct in
  */
 void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
 {
-	ppgtt_free_all_shadow_page(vgpu);
+	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
+	intel_vgpu_destroy_ggtt_mm(vgpu);
 	release_scratch_page_tree(vgpu);
-
-	intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT);
-	intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT);
 }
 
 static void clean_spt_oos(struct intel_gvt *gvt)
@@ -2248,32 +2195,26 @@ fail:
  * pointer to mm object on success, NULL if failed.
  */
 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
-		int page_table_level, void *root_entry)
+		u64 pdps[])
 {
-	struct list_head *pos;
 	struct intel_vgpu_mm *mm;
-	u64 *src, *dst;
-
-	list_for_each(pos, &vgpu->gtt.mm_list_head) {
-		mm = container_of(pos, struct intel_vgpu_mm, list);
-		if (mm->type != INTEL_GVT_MM_PPGTT)
-			continue;
-
-		if (mm->page_table_level != page_table_level)
-			continue;
+	struct list_head *pos;
 
-		src = root_entry;
-		dst = mm->virtual_page_table;
+	list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
 
-		if (page_table_level == 3) {
-			if (src[0] == dst[0]
-					&& src[1] == dst[1]
-					&& src[2] == dst[2]
-					&& src[3] == dst[3])
+		switch (mm->ppgtt_mm.root_entry_type) {
+		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
+			if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
 				return mm;
-		} else {
-			if (src[0] == dst[0])
+			break;
+		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
+			if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
+				    sizeof(mm->ppgtt_mm.guest_pdps)))
 				return mm;
+			break;
+		default:
+			GEM_BUG_ON(1);
 		}
 	}
 	return NULL;
@@ -2283,7 +2224,8 @@ struct intel_vgpu_mm *intel_vgpu_find_pp
  * intel_vgpu_g2v_create_ppgtt_mm - create a PPGTT mm object from
  * g2v notification
  * @vgpu: a vGPU
- * @page_table_level: PPGTT page table level
+ * @root_entry_type: ppgtt root entry type
+ * @pdps: guest pdps
  *
  * This function is used to create a PPGTT mm object from a guest to GVT-g
  * notification.
@@ -2292,20 +2234,15 @@ struct intel_vgpu_mm *intel_vgpu_find_pp
  * Zero on success, negative error code if failed.
  */
 int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
-		int page_table_level)
+		intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
 {
-	u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
 	struct intel_vgpu_mm *mm;
 
-	if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
-		return -EINVAL;
-
-	mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp);
+	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
 	if (mm) {
 		intel_gvt_mm_reference(mm);
 	} else {
-		mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT,
-				pdp, page_table_level, 0);
+		mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
 		if (IS_ERR(mm)) {
 			gvt_vgpu_err("fail to create mm\n");
 			return PTR_ERR(mm);
@@ -2318,7 +2255,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struc
  * intel_vgpu_g2v_destroy_ppgtt_mm - destroy a PPGTT mm object from
  * g2v notification
  * @vgpu: a vGPU
- * @page_table_level: PPGTT page table level
+ * @pdps: guest pdps
  *
  * This function is used to create a PPGTT mm object from a guest to GVT-g
  * notification.
@@ -2327,15 +2264,11 @@ int intel_vgpu_g2v_create_ppgtt_mm(struc
  * Zero on success, negative error code if failed.
  */
 int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
-		int page_table_level)
+		u64 pdps[])
 {
-	u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
 	struct intel_vgpu_mm *mm;
 
-	if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
-		return -EINVAL;
-
-	mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp);
+	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
 	if (!mm) {
 		gvt_vgpu_err("fail to find ppgtt instance.\n");
 		return -EINVAL;
@@ -2367,8 +2300,6 @@ int intel_gvt_init_gtt(struct intel_gvt
 		|| IS_KABYLAKE(gvt->dev_priv)) {
 		gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
 		gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
-		gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table;
-		gvt->gtt.mm_free_page_table = gen8_mm_free_page_table;
 	} else {
 		return -ENODEV;
 	}
@@ -2399,7 +2330,7 @@ int intel_gvt_init_gtt(struct intel_gvt
 			return ret;
 		}
 	}
-	INIT_LIST_HEAD(&gvt->gtt.mm_lru_list_head);
+	INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
 	return 0;
 }
 
@@ -2471,13 +2402,10 @@ void intel_vgpu_reset_ggtt(struct intel_
  */
 void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
 {
-	ppgtt_free_all_shadow_page(vgpu);
-
 	/* Shadow pages are only created when there is no page
 	 * table tracking data, so remove page tracking data after
 	 * removing the shadow pages.
 	 */
-	intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT);
-
+	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
 	intel_vgpu_reset_ggtt(vgpu);
 }
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -84,17 +84,12 @@ struct intel_gvt_gtt {
 	void (*mm_free_page_table)(struct intel_vgpu_mm *mm);
 	struct list_head oos_page_use_list_head;
 	struct list_head oos_page_free_list_head;
-	struct list_head mm_lru_list_head;
+	struct list_head ppgtt_mm_lru_list_head;
 
 	struct page *scratch_page;
 	unsigned long scratch_mfn;
 };
 
-enum {
-	INTEL_GVT_MM_GGTT = 0,
-	INTEL_GVT_MM_PPGTT,
-};
-
 typedef enum {
 	GTT_TYPE_INVALID = -1,
 
@@ -125,26 +120,39 @@ typedef enum {
 	GTT_TYPE_MAX,
 } intel_gvt_gtt_type_t;
 
-struct intel_vgpu_mm {
-	int type;
-	bool initialized;
-	bool shadowed;
+enum intel_gvt_mm_type {
+	INTEL_GVT_MM_GGTT,
+	INTEL_GVT_MM_PPGTT,
+};
 
-	int page_table_entry_type;
-	u32 page_table_entry_size;
-	u32 page_table_entry_cnt;
-	void *virtual_page_table;
-	void *shadow_page_table;
-
-	int page_table_level;
-	bool has_shadow_page_table;
-	u32 pde_base_index;
+#define GVT_RING_CTX_NR_PDPS	GEN8_3LVL_PDPES
+
+struct intel_vgpu_mm {
+	enum intel_gvt_mm_type type;
+	struct intel_vgpu *vgpu;
 
-	struct list_head list;
 	struct kref ref;
 	atomic_t pincount;
-	struct list_head lru_list;
-	struct intel_vgpu *vgpu;
+
+	union {
+		struct {
+			intel_gvt_gtt_type_t root_entry_type;
+			/*
+			 * The 4 PDPs in ring context. For 48bit addressing,
+			 * only PDP0 is valid and point to PML4. For 32it
+			 * addressing, all 4 are used as true PDPs.
+			 */
+			u64 guest_pdps[GVT_RING_CTX_NR_PDPS];
+			u64 shadow_pdps[GVT_RING_CTX_NR_PDPS];
+			bool shadowed;
+
+			struct list_head list;
+			struct list_head lru_list;
+		} ppgtt_mm;
+		struct {
+			void *virtual_ggtt;
+		} ggtt_mm;
+	};
 };
 
 extern int intel_vgpu_mm_get_entry(
@@ -158,32 +166,31 @@ extern int intel_vgpu_mm_set_entry(
 		unsigned long index);
 
 #define ggtt_get_guest_entry(mm, e, index) \
-	intel_vgpu_mm_get_entry(mm, mm->virtual_page_table, e, index)
+	intel_vgpu_mm_get_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index)
 
 #define ggtt_set_guest_entry(mm, e, index) \
-	intel_vgpu_mm_set_entry(mm, mm->virtual_page_table, e, index)
+	intel_vgpu_mm_set_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index)
 
 #define ggtt_get_shadow_entry(mm, e, index) \
-	intel_vgpu_mm_get_entry(mm, mm->shadow_page_table, e, index)
+	intel_vgpu_mm_get_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index)
 
 #define ggtt_set_shadow_entry(mm, e, index) \
-	intel_vgpu_mm_set_entry(mm, mm->shadow_page_table, e, index)
+	intel_vgpu_mm_set_entry(mm, mm->ggtt_mm.virtual_ggtt, e, index)
 
 #define ppgtt_get_guest_root_entry(mm, e, index) \
-	intel_vgpu_mm_get_entry(mm, mm->virtual_page_table, e, index)
+	intel_vgpu_mm_get_entry(mm, mm->ppgtt_mm.guest_pdps, e, index)
 
 #define ppgtt_set_guest_root_entry(mm, e, index) \
-	intel_vgpu_mm_set_entry(mm, mm->virtual_page_table, e, index)
+	intel_vgpu_mm_set_entry(mm, mm->ppgtt_mm.guest_pdps, e, index)
 
 #define ppgtt_get_shadow_root_entry(mm, e, index) \
-	intel_vgpu_mm_get_entry(mm, mm->shadow_page_table, e, index)
+	intel_vgpu_mm_get_entry(mm, mm->ppgtt_mm.shadow_pdps, e, index)
 
 #define ppgtt_set_shadow_root_entry(mm, e, index) \
-	intel_vgpu_mm_set_entry(mm, mm->shadow_page_table, e, index)
+	intel_vgpu_mm_set_entry(mm, mm->ppgtt_mm.shadow_pdps, e, index)
 
-extern struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu,
-		int mm_type, void *virtual_page_table, int page_table_level,
-		u32 pde_base_index);
+struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
+		intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
 extern void intel_vgpu_destroy_mm(struct kref *mm_ref);
 
 struct intel_vgpu_guest_page;
@@ -196,7 +203,7 @@ struct intel_vgpu_scratch_pt {
 struct intel_vgpu_gtt {
 	struct intel_vgpu_mm *ggtt_mm;
 	unsigned long active_ppgtt_mm_bitmap;
-	struct list_head mm_list_head;
+	struct list_head ppgtt_mm_list_head;
 	DECLARE_HASHTABLE(shadow_page_hash_table, INTEL_GVT_GTT_HASH_BITS);
 	DECLARE_HASHTABLE(tracked_guest_page_hash_table, INTEL_GVT_GTT_HASH_BITS);
 	atomic_t n_tracked_guest_page;
@@ -294,13 +301,12 @@ unsigned long intel_vgpu_gma_to_gpa(stru
 		unsigned long gma);
 
 struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
-		int page_table_level, void *root_entry);
+		u64 pdps[]);
 
 int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
-		int page_table_level);
+		intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
 
-int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
-		int page_table_level);
+int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]);
 
 int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu,
 	unsigned int off, void *p_data, unsigned int bytes);
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1139,20 +1139,27 @@ static int pvinfo_mmio_read(struct intel
 
 static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
 {
+	u64 *pdps;
 	int ret = 0;
 
+	pdps = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
+
 	switch (notification) {
 	case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE:
-		ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, 3);
+		ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu,
+				GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
+				pdps);
 		break;
 	case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY:
-		ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, 3);
+		ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, pdps);
 		break;
 	case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE:
-		ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu, 4);
+		ret = intel_vgpu_g2v_create_ppgtt_mm(vgpu,
+				GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
+				pdps);
 		break;
 	case VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY:
-		ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, 4);
+		ret = intel_vgpu_g2v_destroy_ppgtt_mm(vgpu, pdps);
 		break;
 	case VGT_G2V_EXECLIST_CONTEXT_CREATE:
 	case VGT_G2V_EXECLIST_CONTEXT_DESTROY:
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -76,10 +76,9 @@ static void failsafe_emulate_mmio_rw(str
 		else
 			intel_vgpu_default_mmio_write(vgpu, offset, p_data,
 					bytes);
-	} else if (reg_is_gtt(gvt, offset) &&
-			vgpu->gtt.ggtt_mm->virtual_page_table) {
+	} else if (reg_is_gtt(gvt, offset)) {
 		offset -= gvt->device_info.gtt_start_offset;
-		pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset;
+		pt = vgpu->gtt.ggtt_mm->ggtt_mm.virtual_ggtt + offset;
 		if (read)
 			memcpy(p_data, pt, bytes);
 		else
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -162,7 +162,7 @@ static int populate_shadow_context(struc
 #undef COPY_REG
 
 	set_context_pdp_root_pointer(shadow_ring_context,
-				     workload->shadow_mm->shadow_page_table);
+				     (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
 
 	intel_gvt_hypervisor_read_gpa(vgpu,
 			workload->ring_context_gpa +
@@ -1239,27 +1239,30 @@ static int prepare_mm(struct intel_vgpu_
 	struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
 	struct intel_vgpu_mm *mm;
 	struct intel_vgpu *vgpu = workload->vgpu;
-	int page_table_level;
-	u32 pdp[8];
+	intel_gvt_gtt_type_t root_entry_type;
+	u64 pdps[GVT_RING_CTX_NR_PDPS];
 
-	if (desc->addressing_mode == 1) { /* legacy 32-bit */
-		page_table_level = 3;
-	} else if (desc->addressing_mode == 3) { /* legacy 64 bit */
-		page_table_level = 4;
-	} else {
+	switch (desc->addressing_mode) {
+	case 1: /* legacy 32-bit */
+		root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
+		break;
+	case 3: /* legacy 64-bit */
+		root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
+		break;
+	default:
 		gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
 		return -EINVAL;
 	}
 
-	read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp);
+	read_guest_pdps(workload->vgpu, workload->ring_context_gpa, (void *)pdps);
 
-	mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp);
+	mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, pdps);
 	if (mm) {
 		intel_gvt_mm_reference(mm);
 	} else {
 
-		mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
-				pdp, page_table_level, 0);
+		mm = intel_vgpu_create_ppgtt_mm(workload->vgpu, root_entry_type,
+						pdps);
 		if (IS_ERR(mm)) {
 			gvt_vgpu_err("fail to create mm object.\n");
 			return PTR_ERR(mm);
--- a/drivers/gpu/drm/i915/gvt/trace.h
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -113,10 +113,10 @@ TRACE_EVENT(gma_index,
 );
 
 TRACE_EVENT(gma_translate,
-	TP_PROTO(int id, char *type, int ring_id, int pt_level,
+	TP_PROTO(int id, char *type, int ring_id, int root_entry_type,
 		unsigned long gma, unsigned long gpa),
 
-	TP_ARGS(id, type, ring_id, pt_level, gma, gpa),
+	TP_ARGS(id, type, ring_id, root_entry_type, gma, gpa),
 
 	TP_STRUCT__entry(
 		__array(char, buf, MAX_BUF_LEN)
@@ -124,8 +124,8 @@ TRACE_EVENT(gma_translate,
 
 	TP_fast_assign(
 		snprintf(__entry->buf, MAX_BUF_LEN,
-			"VM%d %s ring %d pt_level %d gma 0x%lx -> gpa 0x%lx\n",
-				id, type, ring_id, pt_level, gma, gpa);
+			"VM%d %s ring %d root_entry_type %d gma 0x%lx -> gpa 0x%lx\n",
+			id, type, ring_id, root_entry_type, gma, gpa);
 	),
 
 	TP_printk("%s", __entry->buf)