diff --git a/patches.suse/RDMA-device-Fix-a-race-between-mad_client-and-cm_cli.patch b/patches.suse/RDMA-device-Fix-a-race-between-mad_client-and-cm_cli.patch new file mode 100644 index 0000000..4827f96 --- /dev/null +++ b/patches.suse/RDMA-device-Fix-a-race-between-mad_client-and-cm_cli.patch @@ -0,0 +1,133 @@ +From 7a8bccd8b29c321ac181369b42b04fecf05f98e2 Mon Sep 17 00:00:00 2001 +From: Shifeng Li +Date: Fri, 2 Feb 2024 19:53:13 -0800 +Subject: [PATCH 1/1] RDMA/device: Fix a race between mad_client and cm_client + init +Git-commit: 7a8bccd8b29c321ac181369b42b04fecf05f98e2 +Patch-mainline: v6.9-rc1 +References: git-fixes + +The mad_client will be initialized in enable_device_and_get(), while the +devices_rwsem will be downgraded to a read semaphore. There is a window +that leads to the failed initialization for cm_client, since it can not +get matched mad port from ib_mad_port_list, and the matched mad port will +be added to the list after that. + + mad_client | cm_client +------------------|-------------------------------------------------------- +ib_register_device| +enable_device_and_get +down_write(&devices_rwsem) +xa_set_mark(&devices, DEVICE_REGISTERED) +downgrade_write(&devices_rwsem) + | + |ib_cm_init + |ib_register_client(&cm_client) + |down_read(&devices_rwsem) + |xa_for_each_marked (&devices, DEVICE_REGISTERED) + |add_client_context + |cm_add_one + |ib_register_mad_agent + |ib_get_mad_port + |__ib_get_mad_port + |list_for_each_entry(entry, &ib_mad_port_list, port_list) + |return NULL + |up_read(&devices_rwsem) + | +add_client_context| +ib_mad_init_device| +ib_mad_port_open | +list_add_tail(&port_priv->port_list, &ib_mad_port_list) +up_read(&devices_rwsem) + | + +Fix it by using down_write(&devices_rwsem) in ib_register_client(). + +Fixes: d0899892edd0 ("RDMA/device: Provide APIs from the core code to help unregistration") +Link: https://lore.kernel.org/r/20240203035313.98991-1-lishifeng@sangfor.com.cn +Suggested-by: Jason Gunthorpe +Signed-off-by: Shifeng Li +Signed-off-by: Jason Gunthorpe +Acked-by: Nicolas Morey +--- + drivers/infiniband/core/device.c | 37 +++++++++++++++++++------------- + 1 file changed, 22 insertions(+), 15 deletions(-) + +diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c +index 67bcea7a153c..07cb6c5ffda0 100644 +--- a/drivers/infiniband/core/device.c ++++ b/drivers/infiniband/core/device.c +@@ -1730,7 +1730,7 @@ static int assign_client_id(struct ib_client *client) + { + int ret; + +- down_write(&clients_rwsem); ++ lockdep_assert_held(&clients_rwsem); + /* + * The add/remove callbacks must be called in FIFO/LIFO order. To + * achieve this we assign client_ids so they are sorted in +@@ -1739,14 +1739,11 @@ static int assign_client_id(struct ib_client *client) + client->client_id = highest_client_id; + ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL); + if (ret) +- goto out; ++ return ret; + + highest_client_id++; + xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); +- +-out: +- up_write(&clients_rwsem); +- return ret; ++ return 0; + } + + static void remove_client_id(struct ib_client *client) +@@ -1776,25 +1773,35 @@ int ib_register_client(struct ib_client *client) + { + struct ib_device *device; + unsigned long index; ++ bool need_unreg = false; + int ret; + + refcount_set(&client->uses, 1); + init_completion(&client->uses_zero); ++ ++ /* ++ * The devices_rwsem is held in write mode to ensure that a racing ++ * ib_register_device() sees a consisent view of clients and devices. ++ */ ++ down_write(&devices_rwsem); ++ down_write(&clients_rwsem); + ret = assign_client_id(client); + if (ret) +- return ret; ++ goto out; + +- down_read(&devices_rwsem); ++ need_unreg = true; + xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { + ret = add_client_context(device, client); +- if (ret) { +- up_read(&devices_rwsem); +- ib_unregister_client(client); +- return ret; +- } ++ if (ret) ++ goto out; + } +- up_read(&devices_rwsem); +- return 0; ++ ret = 0; ++out: ++ up_write(&clients_rwsem); ++ up_write(&devices_rwsem); ++ if (need_unreg && ret) ++ ib_unregister_client(client); ++ return ret; + } + EXPORT_SYMBOL(ib_register_client); + +-- +2.39.1.1.gbe015eda0162 + diff --git a/patches.suse/RDMA-hns-Fix-mis-modifying-default-congestion-contro.patch b/patches.suse/RDMA-hns-Fix-mis-modifying-default-congestion-contro.patch new file mode 100644 index 0000000..3bb3c29 --- /dev/null +++ b/patches.suse/RDMA-hns-Fix-mis-modifying-default-congestion-contro.patch @@ -0,0 +1,134 @@ +From d20a7cf9f714f0763efb56f0f2eeca1cb91315ed Mon Sep 17 00:00:00 2001 +From: Luoyouming +Date: Mon, 19 Feb 2024 14:18:05 +0800 +Subject: [PATCH 1/1] RDMA/hns: Fix mis-modifying default congestion control + algorithm +Git-commit: d20a7cf9f714f0763efb56f0f2eeca1cb91315ed +Patch-mainline: v6.9-rc1 +References: git-fixes + +Commit 27c5fd271d8b ("RDMA/hns: The UD mode can only be configured +with DCQCN") adds a check of congest control alorithm for UD. But +that patch causes a problem: hr_dev->caps.congest_type is global, +used by all QPs, so modifying this field to DCQCN for UD QPs causes +other QPs unable to use any other algorithm except DCQCN. + +Revert the modification in commit 27c5fd271d8b ("RDMA/hns: The UD +mode can only be configured with DCQCN"). Add a new field cong_type +to struct hns_roce_qp and configure DCQCN for UD QPs. + +Fixes: 27c5fd271d8b ("RDMA/hns: The UD mode can only be configured with DCQCN") +Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") +Signed-off-by: Luoyouming +Signed-off-by: Junxian Huang +Link: https://lore.kernel.org/r/20240219061805.668170-1-huangjunxian6@hisilicon.com +Signed-off-by: Leon Romanovsky +Acked-by: Nicolas Morey +--- + drivers/infiniband/hw/hns/hns_roce_device.h | 17 +++++++++-------- + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 16 ++++++++++------ + 2 files changed, 19 insertions(+), 14 deletions(-) + +diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h +index 1a8516019516..1d062c522d69 100644 +--- a/drivers/infiniband/hw/hns/hns_roce_device.h ++++ b/drivers/infiniband/hw/hns/hns_roce_device.h +@@ -638,6 +638,13 @@ enum { + HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5), + }; + ++enum hns_roce_cong_type { ++ CONG_TYPE_DCQCN, ++ CONG_TYPE_LDCP, ++ CONG_TYPE_HC3, ++ CONG_TYPE_DIP, ++}; ++ + struct hns_roce_qp { + struct ib_qp ibqp; + struct hns_roce_wq rq; +@@ -682,6 +689,7 @@ struct hns_roce_qp { + struct list_head node; /* all qps are on a list */ + struct list_head rq_node; /* all recv qps are on a list */ + struct list_head sq_node; /* all send qps are on a list */ ++ enum hns_roce_cong_type cong_type; + }; + + struct hns_roce_ib_iboe { +@@ -750,13 +758,6 @@ struct hns_roce_eq_table { + void __iomem **eqc_base; /* only for hw v1 */ + }; + +-enum cong_type { +- CONG_TYPE_DCQCN, +- CONG_TYPE_LDCP, +- CONG_TYPE_HC3, +- CONG_TYPE_DIP, +-}; +- + struct hns_roce_caps { + u64 fw_ver; + u8 num_ports; +@@ -844,7 +845,7 @@ struct hns_roce_caps { + u16 default_aeq_period; + u16 default_aeq_arm_st; + u16 default_ceq_arm_st; +- enum cong_type cong_type; ++ enum hns_roce_cong_type cong_type; + }; + + struct hns_roce_dfx_hw { +diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +index de56dc6e3226..42e28586cefa 100644 +--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c ++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +@@ -4738,12 +4738,15 @@ static int check_cong_type(struct ib_qp *ibqp, + struct hns_roce_congestion_algorithm *cong_alg) + { + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); ++ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + +- if (ibqp->qp_type == IB_QPT_UD) +- hr_dev->caps.cong_type = CONG_TYPE_DCQCN; ++ if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_GSI) ++ hr_qp->cong_type = CONG_TYPE_DCQCN; ++ else ++ hr_qp->cong_type = hr_dev->caps.cong_type; + + /* different congestion types match different configurations */ +- switch (hr_dev->caps.cong_type) { ++ switch (hr_qp->cong_type) { + case CONG_TYPE_DCQCN: + cong_alg->alg_sel = CONG_DCQCN; + cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; +@@ -4771,8 +4774,8 @@ static int check_cong_type(struct ib_qp *ibqp, + default: + ibdev_warn(&hr_dev->ib_dev, + "invalid type(%u) for congestion selection.\n", +- hr_dev->caps.cong_type); +- hr_dev->caps.cong_type = CONG_TYPE_DCQCN; ++ hr_qp->cong_type); ++ hr_qp->cong_type = CONG_TYPE_DCQCN; + cong_alg->alg_sel = CONG_DCQCN; + cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; + cong_alg->dip_vld = DIP_INVALID; +@@ -4791,6 +4794,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + struct hns_roce_congestion_algorithm cong_field; + struct ib_device *ibdev = ibqp->device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); ++ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + u32 dip_idx = 0; + int ret; + +@@ -4803,7 +4807,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + return ret; + + hr_reg_write(context, QPC_CONG_ALGO_TMPL_ID, hr_dev->cong_algo_tmpl_id + +- hr_dev->caps.cong_type * HNS_ROCE_CONG_SIZE); ++ hr_qp->cong_type * HNS_ROCE_CONG_SIZE); + hr_reg_clear(qpc_mask, QPC_CONG_ALGO_TMPL_ID); + hr_reg_write(&context->ext, QPCEX_CONG_ALG_SEL, cong_field.alg_sel); + hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SEL); +-- +2.39.1.1.gbe015eda0162 + diff --git a/patches.suse/RDMA-irdma-Remove-duplicate-assignment.patch b/patches.suse/RDMA-irdma-Remove-duplicate-assignment.patch new file mode 100644 index 0000000..5cf7f2c --- /dev/null +++ b/patches.suse/RDMA-irdma-Remove-duplicate-assignment.patch @@ -0,0 +1,46 @@ +From 926e8ea4b8dac84f6d14a4b60d0653f1f2ba9431 Mon Sep 17 00:00:00 2001 +From: Mustafa Ismail +Date: Wed, 31 Jan 2024 17:39:53 -0600 +Subject: [PATCH 1/1] RDMA/irdma: Remove duplicate assignment +Git-commit: 926e8ea4b8dac84f6d14a4b60d0653f1f2ba9431 +Patch-mainline: v6.9-rc1 +References: git-fixes + +Remove the unneeded assignment of the qp_num which is already +set in irdma_create_qp(). + +Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") +Signed-off-by: Mustafa Ismail +Signed-off-by: Shiraz Saleem +Signed-off-by: Sindhu Devale +Link: https://lore.kernel.org/r/20240131233953.400483-1-sindhu.devale@intel.com +Signed-off-by: Leon Romanovsky +Acked-by: Nicolas Morey +--- + drivers/infiniband/hw/irdma/verbs.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c +index b5eb8d421988..44b03bc061fa 100644 +--- a/drivers/infiniband/hw/irdma/verbs.c ++++ b/drivers/infiniband/hw/irdma/verbs.c +@@ -642,7 +642,6 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, + info->shadow_area_pa = info->rq_pa + (rqdepth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->sq_size = sqdepth >> sqshift; + ukinfo->rq_size = rqdepth >> rqshift; +- ukinfo->qp_id = iwqp->ibqp.qp_num; + + init_attr->cap.max_send_wr = (sqdepth - IRDMA_SQ_RSVD) >> sqshift; + init_attr->cap.max_recv_wr = (rqdepth - IRDMA_RQ_RSVD) >> rqshift; +@@ -872,7 +871,7 @@ static int irdma_create_qp(struct ib_qp *ibqp, + iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE; + + init_info.pd = &iwpd->sc_pd; +- init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num; ++ init_info.qp_uk_init_info.qp_id = qp_num; + if (!rdma_protocol_roce(&iwdev->ibdev, 1)) + init_info.qp_uk_init_info.first_sq_wq = 1; + iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp; +-- +2.39.1.1.gbe015eda0162 + diff --git a/patches.suse/RDMA-rtrs-clt-Check-strnlen-return-len-in-sysfs-mpat.patch b/patches.suse/RDMA-rtrs-clt-Check-strnlen-return-len-in-sysfs-mpat.patch new file mode 100644 index 0000000..771d977 --- /dev/null +++ b/patches.suse/RDMA-rtrs-clt-Check-strnlen-return-len-in-sysfs-mpat.patch @@ -0,0 +1,41 @@ +From 7a7b7f575a25aa68ee934ee8107294487efcb3fe Mon Sep 17 00:00:00 2001 +From: Alexey Kodanev +Date: Wed, 21 Feb 2024 11:32:04 +0000 +Subject: [PATCH 1/1] RDMA/rtrs-clt: Check strnlen return len in sysfs + mpath_policy_store() +Git-commit: 7a7b7f575a25aa68ee934ee8107294487efcb3fe +Patch-mainline: v6.9-rc1 +References: git-fixes + +strnlen() may return 0 (e.g. for "\0\n" string), it's better to +check the result of strnlen() before using 'len - 1' expression +for the 'buf' array index. + +Detected using the static analysis tool - Svace. + +Fixes: dc3b66a0ce70 ("RDMA/rtrs-clt: Add a minimum latency multipath policy") +Signed-off-by: Alexey Kodanev +Link: https://lore.kernel.org/r/20240221113204.147478-1-aleksei.kodanev@bell-sw.com +Acked-by: Jack Wang +Signed-off-by: Leon Romanovsky +Acked-by: Nicolas Morey +--- + drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +index d3c436ead694..4aa80c9388f0 100644 +--- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c ++++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +@@ -133,7 +133,7 @@ static ssize_t mpath_policy_store(struct device *dev, + + /* distinguish "mi" and "min-latency" with length */ + len = strnlen(buf, NAME_MAX); +- if (buf[len - 1] == '\n') ++ if (len && buf[len - 1] == '\n') + len--; + + if (!strncasecmp(buf, "round-robin", 11) || +-- +2.39.1.1.gbe015eda0162 + diff --git a/patches.suse/RDMA-srpt-Do-not-register-event-handler-until-srpt-d.patch b/patches.suse/RDMA-srpt-Do-not-register-event-handler-until-srpt-d.patch new file mode 100644 index 0000000..f4e67b9 --- /dev/null +++ b/patches.suse/RDMA-srpt-Do-not-register-event-handler-until-srpt-d.patch @@ -0,0 +1,60 @@ +From c21a8870c98611e8f892511825c9607f1e2cd456 Mon Sep 17 00:00:00 2001 +From: William Kucharski +Date: Fri, 2 Feb 2024 02:15:49 -0700 +Subject: [PATCH 1/1] RDMA/srpt: Do not register event handler until srpt + device is fully setup +Git-commit: c21a8870c98611e8f892511825c9607f1e2cd456 +Patch-mainline: v6.9-rc1 +References: git-fixes + +Upon rare occasions, KASAN reports a use-after-free Write +in srpt_refresh_port(). + +This seems to be because an event handler is registered before the +srpt device is fully setup and a race condition upon error may leave a +partially setup event handler in place. + +Instead, only register the event handler after srpt device initialization +is complete. + +Fixes: a42d985bd5b2 ("ib_srpt: Initial SRP Target merge for v3.3-rc1") +Signed-off-by: William Kucharski +Link: https://lore.kernel.org/r/20240202091549.991784-2-william.kucharski@oracle.com +Reviewed-by: Bart Van Assche +Signed-off-by: Leon Romanovsky +Acked-by: Nicolas Morey +--- + drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c +index 58f70cfec45a..d35f021f154b 100644 +--- a/drivers/infiniband/ulp/srpt/ib_srpt.c ++++ b/drivers/infiniband/ulp/srpt/ib_srpt.c +@@ -3204,7 +3204,6 @@ static int srpt_add_one(struct ib_device *device) + + INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, + srpt_event_handler); +- ib_register_event_handler(&sdev->event_handler); + + for (i = 1; i <= sdev->device->phys_port_cnt; i++) { + sport = &sdev->port[i - 1]; +@@ -3227,6 +3226,7 @@ static int srpt_add_one(struct ib_device *device) + } + } + ++ ib_register_event_handler(&sdev->event_handler); + spin_lock(&srpt_dev_lock); + list_add_tail(&sdev->list, &srpt_dev_list); + spin_unlock(&srpt_dev_lock); +@@ -3237,7 +3237,6 @@ static int srpt_add_one(struct ib_device *device) + + err_port: + srpt_unregister_mad_agent(sdev, i); +- ib_unregister_event_handler(&sdev->event_handler); + err_cm: + if (sdev->cm_id) + ib_destroy_cm_id(sdev->cm_id); +-- +2.39.1.1.gbe015eda0162 + diff --git a/series.conf b/series.conf index 0879ea2..76371c3 100644 --- a/series.conf +++ b/series.conf @@ -45701,6 +45701,11 @@ patches.suse/NFSv4.2-fix-nfs4_listxattr-kernel-BUG-at-mm-usercopy.patch patches.suse/NFSv4.2-fix-listxattr-maximum-XDR-buffer-size.patch patches.suse/NFS-Fix-an-off-by-one-in-root_nfs_cat.patch + patches.suse/RDMA-irdma-Remove-duplicate-assignment.patch + patches.suse/RDMA-srpt-Do-not-register-event-handler-until-srpt-d.patch + patches.suse/RDMA-hns-Fix-mis-modifying-default-congestion-contro.patch + patches.suse/RDMA-device-Fix-a-race-between-mad_client-and-cm_cli.patch + patches.suse/RDMA-rtrs-clt-Check-strnlen-return-len-in-sysfs-mpat.patch # jejb/scsi for-next patches.suse/scsi-lpfc-Initialize-status-local-variable-in-lpfc_s.patch