From: Joerg Roedel <jroedel@suse.de>
Date: Mon, 4 May 2020 14:54:09 +0200
Subject: iommu/amd: Fix race in increase_address_space()/fetch_pte()
Git-commit: eb791aa70b90c559eeb371d807c8813d569393f0
Patch-mainline: v5.7-rc5
References: bsc#1172065
The 'pt_root' and 'mode' struct members of 'struct protection_domain'
need to be get/set atomically, otherwise the page-table of the domain
can get corrupted.
Merge the fields into one atomic64_t struct member which can be
get/set atomically.
Fixes: 92d420ec028d ("iommu/amd: Relax locking in dma_ops path")
Reported-by: Qian Cai <cai@lca.pw>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Tested-by: Qian Cai <cai@lca.pw>
Link: https://lore.kernel.org/r/20200504125413.16798-2-joro@8bytes.org
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/amd_iommu.c | 140 ++++++++++++++++++++++++++++++----------
drivers/iommu/amd_iommu_types.h | 9 ++-
2 files changed, 112 insertions(+), 37 deletions(-)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -193,6 +193,26 @@ static struct dma_ops_domain* to_dma_ops
return container_of(domain, struct dma_ops_domain, domain);
}
+static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
+ struct domain_pgtable *pgtable)
+{
+ u64 pt_root = atomic64_read(&domain->pt_root);
+
+ pgtable->root = (u64 *)(pt_root & PAGE_MASK);
+ pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
+}
+
+static u64 amd_iommu_domain_encode_pgtable(u64 *root, int mode)
+{
+ u64 pt_root;
+
+ /* lowest 3 bits encode pgtable mode */
+ pt_root = mode & 7;
+ pt_root |= (u64)root;
+
+ return pt_root;
+}
+
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -1447,13 +1467,18 @@ static struct page *free_sub_pt(unsigned
static void free_pagetable(struct protection_domain *domain)
{
- unsigned long root = (unsigned long)domain->pt_root;
+ struct domain_pgtable pgtable;
struct page *freelist = NULL;
+ unsigned long root;
+
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ atomic64_set(&domain->pt_root, 0);
- BUG_ON(domain->mode < PAGE_MODE_NONE ||
- domain->mode > PAGE_MODE_6_LEVEL);
+ BUG_ON(pgtable.mode < PAGE_MODE_NONE ||
+ pgtable.mode > PAGE_MODE_6_LEVEL);
- freelist = free_sub_pt(root, domain->mode, freelist);
+ root = (unsigned long)pgtable.root;
+ freelist = free_sub_pt(root, pgtable.mode, freelist);
free_page_list(freelist);
}
@@ -1467,24 +1492,28 @@ static bool increase_address_space(struc
unsigned long address,
gfp_t gfp)
{
+ struct domain_pgtable pgtable;
unsigned long flags;
bool ret = false;
- u64 *pte;
+ u64 *pte, root;
spin_lock_irqsave(&domain->lock, flags);
- if (address <= PM_LEVEL_SIZE(domain->mode) ||
- WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (address <= PM_LEVEL_SIZE(pgtable.mode) ||
+ WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
goto out;
pte = (void *)get_zeroed_page(gfp);
if (!pte)
goto out;
- *pte = PM_LEVEL_PDE(domain->mode,
- iommu_virt_to_phys(domain->pt_root));
- domain->pt_root = pte;
- domain->mode += 1;
+ *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
+
+ root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode + 1);
+
+ atomic64_set(&domain->pt_root, root);
ret = true;
@@ -1501,16 +1530,22 @@ static u64 *alloc_pte(struct protection_
gfp_t gfp,
bool *updated)
{
+ struct domain_pgtable pgtable;
int level, end_lvl;
u64 *pte, *page;
BUG_ON(!is_power_of_2(page_size));
- while (address > PM_LEVEL_SIZE(domain->mode))
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ while (address > PM_LEVEL_SIZE(pgtable.mode)) {
*updated = increase_address_space(domain, address, gfp) || *updated;
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ }
+
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+ level = pgtable.mode - 1;
+ pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
address = PAGE_SIZE_ALIGN(address, page_size);
end_lvl = PAGE_SIZE_LEVEL(page_size);
@@ -1586,16 +1621,19 @@ static u64 *fetch_pte(struct protection_
unsigned long address,
unsigned long *page_size)
{
+ struct domain_pgtable pgtable;
int level;
u64 *pte;
*page_size = 0;
- if (address > PM_LEVEL_SIZE(domain->mode))
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (address > PM_LEVEL_SIZE(pgtable.mode))
return NULL;
- level = domain->mode - 1;
- pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
+ level = pgtable.mode - 1;
+ pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
*page_size = PTE_LEVEL_PAGE_SIZE(level);
while (level > 0) {
@@ -1911,7 +1949,9 @@ static void dma_ops_domain_free(struct d
*/
static struct dma_ops_domain *dma_ops_domain_alloc(void)
{
+ struct protection_domain *domain;
struct dma_ops_domain *dma_dom;
+ u64 *pt_root, root;
dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
if (!dma_dom)
@@ -1920,12 +1960,16 @@ static struct dma_ops_domain *dma_ops_do
if (protection_domain_init(&dma_dom->domain))
goto free_dma_dom;
- dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
- dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- dma_dom->domain.flags = PD_DMA_OPS_MASK;
- if (!dma_dom->domain.pt_root)
+ domain = &dma_dom->domain;
+
+ pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!pt_root)
goto free_dma_dom;
+ root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
+ atomic64_set(&domain->pt_root, root);
+ domain->flags = PD_DMA_OPS_MASK;
+
init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN);
if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
@@ -1954,14 +1998,17 @@ static bool dma_ops_domain(struct protec
static void set_dte_entry(u16 devid, struct protection_domain *domain,
bool ats, bool ppr)
{
+ struct domain_pgtable pgtable;
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
- if (domain->mode != PAGE_MODE_NONE)
- pte_root = iommu_virt_to_phys(domain->pt_root);
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (pgtable.mode != PAGE_MODE_NONE)
+ pte_root = iommu_virt_to_phys(pgtable.root);
- pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
+ pte_root |= (pgtable.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
@@ -2968,6 +3015,7 @@ static struct iommu_domain *amd_iommu_do
{
struct protection_domain *pdomain;
struct dma_ops_domain *dma_domain;
+ u64 *pt_root, root;
switch (type) {
case IOMMU_DOMAIN_UNMANAGED:
@@ -2975,13 +3023,15 @@ static struct iommu_domain *amd_iommu_do
if (!pdomain)
return NULL;
- pdomain->mode = PAGE_MODE_3_LEVEL;
- pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pdomain->pt_root) {
+ pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!pt_root) {
protection_domain_free(pdomain);
return NULL;
}
+ root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL);
+ atomic64_set(&pdomain->pt_root, root);
+
pdomain->domain.geometry.aperture_start = 0;
pdomain->domain.geometry.aperture_end = ~0ULL;
pdomain->domain.geometry.force_aperture = true;
@@ -3000,7 +3050,7 @@ static struct iommu_domain *amd_iommu_do
if (!pdomain)
return NULL;
- pdomain->mode = PAGE_MODE_NONE;
+ atomic64_set(&pdomain->pt_root, PAGE_MODE_NONE);
break;
default:
return NULL;
@@ -3013,6 +3063,7 @@ static void amd_iommu_domain_free(struct
{
struct protection_domain *domain;
struct dma_ops_domain *dma_dom;
+ struct domain_pgtable pgtable;
domain = to_pdomain(dom);
@@ -3031,7 +3082,9 @@ static void amd_iommu_domain_free(struct
dma_ops_domain_free(dma_dom);
break;
default:
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
+ if (pgtable.mode != PAGE_MODE_NONE)
free_pagetable(domain);
if (domain->flags & PD_IOMMUV2_MASK)
@@ -3112,10 +3165,12 @@ static int amd_iommu_map(struct iommu_do
phys_addr_t paddr, size_t page_size, int iommu_prot)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
int prot = 0;
int ret;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return -EINVAL;
if (iommu_prot & IOMMU_READ)
@@ -3136,9 +3191,11 @@ static size_t amd_iommu_unmap(struct iom
size_t page_size)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
size_t unmap_size;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return 0;
mutex_lock(&domain->api_lock);
@@ -3153,9 +3210,11 @@ static phys_addr_t amd_iommu_iova_to_phy
{
struct protection_domain *domain = to_pdomain(dom);
unsigned long offset_mask, pte_pgsize;
+ struct domain_pgtable pgtable;
u64 *pte, __pte;
- if (domain->mode == PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode == PAGE_MODE_NONE)
return iova;
pte = fetch_pte(domain, iova, &pte_pgsize);
@@ -3330,16 +3389,26 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_n
void amd_iommu_domain_direct_map(struct iommu_domain *dom)
{
struct protection_domain *domain = to_pdomain(dom);
+ struct domain_pgtable pgtable;
unsigned long flags;
+ u64 pt_root;
spin_lock_irqsave(&domain->lock, flags);
+ /* First save pgtable configuration*/
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+
/* Update data structure */
- domain->mode = PAGE_MODE_NONE;
+ pt_root = amd_iommu_domain_encode_pgtable(NULL, PAGE_MODE_NONE);
+ atomic64_set(&domain->pt_root, pt_root);
/* Make changes visible to IOMMUs */
update_domain(domain);
+ /* Restore old pgtable in domain->ptroot to free page-table */
+ pt_root = amd_iommu_domain_encode_pgtable(pgtable.root, pgtable.mode);
+ atomic64_set(&domain->pt_root, pt_root);
+
/* Page-table is not visible to IOMMU anymore, so free it */
free_pagetable(domain);
@@ -3530,9 +3599,11 @@ static u64 *__get_gcr3_pte(u64 *root, in
static int __set_gcr3(struct protection_domain *domain, int pasid,
unsigned long cr3)
{
+ struct domain_pgtable pgtable;
u64 *pte;
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
@@ -3546,9 +3617,11 @@ static int __set_gcr3(struct protection_
static int __clear_gcr3(struct protection_domain *domain, int pasid)
{
+ struct domain_pgtable pgtable;
u64 *pte;
- if (domain->mode != PAGE_MODE_NONE)
+ amd_iommu_domain_get_pgtable(domain, &pgtable);
+ if (pgtable.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -470,8 +470,7 @@ struct protection_domain {
spinlock_t lock; /* mostly used to lock the page table*/
struct mutex api_lock; /* protect page tables in the iommu-api path */
u16 id; /* the domain id written to the device table */
- int mode; /* paging mode (0-6 levels) */
- u64 *pt_root; /* page table root pointer */
+ atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
@@ -479,6 +478,12 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
+/* For decocded pt_root */
+struct domain_pgtable {
+ int mode;
+ u64 *root;
+};
+
/*
* Structure where we save information about one hardware AMD IOMMU in the
* system.