From b90f16038aa43bb45607743505637170ee33572d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Oct 05 2021 15:25:53 +0000 Subject: Merge branch 'SLE12-SP5' (d50b9e8392e0) into 'SLE12-SP5-RT' - Modified -rt config. config/x86_64/rt_debug --- diff --git a/blacklist.conf b/blacklist.conf index bca4b5a..ab03a3b 100644 --- a/blacklist.conf +++ b/blacklist.conf @@ -1978,3 +1978,13 @@ c60b93cd4862d108214a14e655358ea714d7a12a # Duplicate of b7eeb2b4132ccf1a7d38f434 00cb645fd7e29bdd20967cd20fa8f77bcdf422f9 # Duplicate of 6fdb335f1c9c0845b50625de1624d8445c4c4a07: drm/i915/dsi: Use unconditional msleep for the panel_on_delay when there is no reset-deassert MIPI-sequence c499696e7901bda18385ac723b7bd27c3a4af624 # net: dsa: b53: Stop using dev->cpu_port incorrectly - breaks kABI 40013ff20b1beed31184935fc0aea6a859d4d4ef # net: dsa: Fix functional dsa-loop dependency on FIXED_PHY - doesn't apply +5297cfa6bdf93e3889f78f9b482e2a595a376083 # CONFIG_EDAC_SYNOPSYS not enabled +211f323768a25b30c106fd38f15a0f62c7c2b5f4 # cosmetic fix +d563131ef23cbc756026f839a82598c8445bc45f # misattributed. Introduced in d26a9559403c7c3ec3b430f5825bc22c3d40abdb in v4.14, which we don't have +9b8d7072d6552ee5c57e5765f211f267041f9557 # feature, not a bug fix +b61156fba74f659d0bc2de8f2dbf5bad9f4b8faf # cosmetic fix +a0761a301746ec2d92d7fcb82af69c0a6a4339aa # kABI and quite intrusive +b16798f5b907733966fd1a558fca823b3c67e4a1 # prerequisite breaks kABI +f8914a14623a79b73f72b2b1ee4cd9b2cb91b735 # numerous prerequisites that break kABI +a7fb107b7d8982ac76c958a0d2838a151b03e97e # net: phy: Re-parent menus for MDIO bus drivers correctly Cosmetic fix +51e1bb9eeaf7868db56e58f47848e364ab4c4129 # would impact KABI (lockdown_reason) diff --git a/config/x86_64/rt b/config/x86_64/rt index f3f28fc..eeae914 100644 --- a/config/x86_64/rt +++ b/config/x86_64/rt @@ -2695,6 +2695,8 @@ CONFIG_ICE=m CONFIG_FM10K=m CONFIG_IGC=m CONFIG_NET_VENDOR_I825XX=y +CONFIG_NET_VENDOR_MICROSOFT=y +CONFIG_MICROSOFT_MANA=m CONFIG_JME=m CONFIG_NET_VENDOR_MARVELL=y # CONFIG_MVMDIO is not set diff --git a/config/x86_64/rt_debug b/config/x86_64/rt_debug index c5d1ee3..5fe5d1e 100644 --- a/config/x86_64/rt_debug +++ b/config/x86_64/rt_debug @@ -2712,6 +2712,8 @@ CONFIG_ICE=m CONFIG_FM10K=m CONFIG_IGC=m CONFIG_NET_VENDOR_I825XX=y +CONFIG_NET_VENDOR_MICROSOFT=y +CONFIG_MICROSOFT_MANA=m CONFIG_JME=m CONFIG_NET_VENDOR_MARVELL=y # CONFIG_MVMDIO is not set diff --git a/kabi/severities b/kabi/severities index b22543b..541039c 100644 --- a/kabi/severities +++ b/kabi/severities @@ -130,3 +130,5 @@ drivers/net/ethernet/chelsio/libcxgb/* PASS # IOMMU related modules and symbols get_dev_data PASS +# ath9k local symbols +drivers/net/wireless/ath/ath9k/* PASS diff --git a/patches.kabi/ath_key_delete-kABI-fix.patch b/patches.kabi/ath_key_delete-kABI-fix.patch new file mode 100644 index 0000000..e6eee85 --- /dev/null +++ b/patches.kabi/ath_key_delete-kABI-fix.patch @@ -0,0 +1,53 @@ +From: Takashi Iwai +Subject: kABI compatibility for ath_key_delete() changes +Patch-mainline: Never, kABI compatibility +References: CVE-2020-3702 bsc#1191193 + +patches.suse/ath-Modify-ath_key_delete-to-not-need-full-key-entry.patch +broke kABI due to the exported ath_key_delete() function change. +This patch papers over it by re-defining the old function. + +Signed-off-by: Takashi Iwai + +--- + drivers/net/wireless/ath/ath.h | 4 +++- + drivers/net/wireless/ath/key.c | 10 +++++++++- + 2 files changed, 12 insertions(+), 2 deletions(-) + +--- a/drivers/net/wireless/ath/ath.h ++++ b/drivers/net/wireless/ath/ath.h +@@ -197,7 +197,9 @@ struct sk_buff *ath_rxbuf_alloc(struct a + bool ath_is_mybeacon(struct ath_common *common, struct ieee80211_hdr *hdr); + + void ath_hw_setbssidmask(struct ath_common *common); +-void ath_key_delete(struct ath_common *common, u8 hw_key_idx); ++/* XXX renamed for kABI compatibility */ ++void __ath_key_delete(struct ath_common *common, u8 hw_key_idx); ++#define ath_key_delete __ath_key_delete + int ath_key_config(struct ath_common *common, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, +--- a/drivers/net/wireless/ath/key.c ++++ b/drivers/net/wireless/ath/key.c +@@ -581,7 +581,7 @@ EXPORT_SYMBOL(ath_key_config); + /* + * Delete Key. + */ +-void ath_key_delete(struct ath_common *common, u8 hw_key_idx) ++void __ath_key_delete(struct ath_common *common, u8 hw_key_idx) + { + /* Leave CCMP and TKIP (main key) configured to avoid disabling + * encryption for potentially pending frames already in a TXQ with the +@@ -615,4 +615,12 @@ void ath_key_delete(struct ath_common *c + clear_bit(hw_key_idx + 64 + 32, common->tkip_keymap); + } + } ++EXPORT_SYMBOL(__ath_key_delete); ++ ++/* XXX kABI compatibility */ ++#undef ath_key_delete ++void ath_key_delete(struct ath_common *common, struct ieee80211_key_conf *key) ++{ ++ __ath_key_delete(common, key->hw_key_idx); ++} + EXPORT_SYMBOL(ath_key_delete); diff --git a/patches.suse/Bluetooth-check-for-zapped-sk-before-connecting.patch b/patches.suse/Bluetooth-check-for-zapped-sk-before-connecting.patch new file mode 100644 index 0000000..9896bbc --- /dev/null +++ b/patches.suse/Bluetooth-check-for-zapped-sk-before-connecting.patch @@ -0,0 +1,63 @@ +From 3af70b39fa2d415dc86c370e5b24ddb9fdacbd6f Mon Sep 17 00:00:00 2001 +From: Archie Pusaka +Date: Tue, 23 Mar 2021 16:32:20 +0800 +Subject: [PATCH] Bluetooth: check for zapped sk before connecting +Git-commit: 3af70b39fa2d415dc86c370e5b24ddb9fdacbd6f +Patch-mainline: v5.13-rc1 +References: CVE-2021-3752 bsc#1190023 + +There is a possibility of receiving a zapped sock on +l2cap_sock_connect(). This could lead to interesting crashes, one +such case is tearing down an already tore l2cap_sock as is happened +with this call trace: + +__dump_stack lib/dump_stack.c:15 [inline] +dump_stack+0xc4/0x118 lib/dump_stack.c:56 +register_lock_class kernel/locking/lockdep.c:792 [inline] +register_lock_class+0x239/0x6f6 kernel/locking/lockdep.c:742 +__lock_acquire+0x209/0x1e27 kernel/locking/lockdep.c:3105 +lock_acquire+0x29c/0x2fb kernel/locking/lockdep.c:3599 +__raw_spin_lock_bh include/linux/spinlock_api_smp.h:137 [inline] +_raw_spin_lock_bh+0x38/0x47 kernel/locking/spinlock.c:175 +spin_lock_bh include/linux/spinlock.h:307 [inline] +lock_sock_nested+0x44/0xfa net/core/sock.c:2518 +l2cap_sock_teardown_cb+0x88/0x2fb net/bluetooth/l2cap_sock.c:1345 +l2cap_chan_del+0xa3/0x383 net/bluetooth/l2cap_core.c:598 +l2cap_chan_close+0x537/0x5dd net/bluetooth/l2cap_core.c:756 +l2cap_chan_timeout+0x104/0x17e net/bluetooth/l2cap_core.c:429 +process_one_work+0x7e3/0xcb0 kernel/workqueue.c:2064 +worker_thread+0x5a5/0x773 kernel/workqueue.c:2196 +kthread+0x291/0x2a6 kernel/kthread.c:211 +ret_from_fork+0x4e/0x80 arch/x86/entry/entry_64.S:604 + +Signed-off-by: Archie Pusaka +Reported-by: syzbot+abfc0f5e668d4099af73@syzkaller.appspotmail.com +Reviewed-by: Alain Michaud +Reviewed-by: Abhishek Pandit-Subedi +Reviewed-by: Guenter Roeck +Signed-off-by: Marcel Holtmann +Acked-by: Cho, Yu-Chen +--- + net/bluetooth/l2cap_sock.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/net/bluetooth/l2cap_sock.c ++++ b/net/bluetooth/l2cap_sock.c +@@ -178,9 +178,17 @@ static int l2cap_sock_connect(struct soc + struct l2cap_chan *chan = l2cap_pi(sk)->chan; + struct sockaddr_l2 la; + int len, err = 0; ++ bool zapped; + + BT_DBG("sk %p", sk); + ++ lock_sock(sk); ++ zapped = sock_flag(sk, SOCK_ZAPPED); ++ release_sock(sk); ++ ++ if (zapped) ++ return -EINVAL; ++ + if (!addr || alen < sizeof(addr->sa_family) || + addr->sa_family != AF_BLUETOOTH) + return -EINVAL; diff --git a/patches.suse/SUNRPC-Ensure-to-ratelimit-the-server-not-responding.patch b/patches.suse/SUNRPC-Ensure-to-ratelimit-the-server-not-responding.patch new file mode 100644 index 0000000..0c8b4ca --- /dev/null +++ b/patches.suse/SUNRPC-Ensure-to-ratelimit-the-server-not-responding.patch @@ -0,0 +1,54 @@ +From: Trond Myklebust +Date: Sun, 7 Apr 2019 13:58:57 -0400 +Subject: [PATCH] SUNRPC: Ensure to ratelimit the "server not responding" + syslog messages +Git-commit: 0729d995f2a2726598642d552ebe916b43aef73d +Patch-mainline: v5.2 +References: bsc#1191136 + +In particular, the timeout messages can be very noisy, so we ought to +ratelimit them in order to avoid spamming the syslog. + +Signed-off-by: Trond Myklebust +Signed-off-by: Anna Schumaker +Acked-by: NeilBrown + +--- + net/sunrpc/clnt.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/net/sunrpc/clnt.c ++++ b/net/sunrpc/clnt.c +@@ -2245,7 +2245,8 @@ call_timeout(struct rpc_task *task) + } + if (RPC_IS_SOFT(task)) { + if (clnt->cl_chatty) { +- printk(KERN_NOTICE "%s: server %s not responding, timed out\n", ++ pr_notice_ratelimited( ++ "%s: server %s not responding, timed out\n", + clnt->cl_program->name, + task->tk_xprt->servername); + } +@@ -2259,9 +2260,10 @@ call_timeout(struct rpc_task *task) + if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) { + task->tk_flags |= RPC_CALL_MAJORSEEN; + if (clnt->cl_chatty) { +- printk(KERN_NOTICE "%s: server %s not responding, still trying\n", +- clnt->cl_program->name, +- task->tk_xprt->servername); ++ pr_notice_ratelimited( ++ "%s: server %s not responding, still trying\n", ++ clnt->cl_program->name, ++ task->tk_xprt->servername); + } + } + rpc_force_rebind(clnt); +@@ -2291,7 +2293,7 @@ call_decode(struct rpc_task *task) + + if (task->tk_flags & RPC_CALL_MAJORSEEN) { + if (clnt->cl_chatty) { +- printk(KERN_NOTICE "%s: server %s OK\n", ++ pr_notice_ratelimited("%s: server %s OK\n", + clnt->cl_program->name, + task->tk_xprt->servername); + } diff --git a/patches.suse/USB-serial-option-remove-duplicate-USB-device-ID.patch b/patches.suse/USB-serial-option-remove-duplicate-USB-device-ID.patch new file mode 100644 index 0000000..231c93e --- /dev/null +++ b/patches.suse/USB-serial-option-remove-duplicate-USB-device-ID.patch @@ -0,0 +1,34 @@ +From 1ca200a8c6f079950a04ea3c3380fe8cf78e95a2 Mon Sep 17 00:00:00 2001 +From: Krzysztof Kozlowski +Date: Fri, 17 Sep 2021 11:18:48 +0200 +Subject: [PATCH] USB: serial: option: remove duplicate USB device ID +Git-commit: 1ca200a8c6f079950a04ea3c3380fe8cf78e95a2 +References: git-fixes +Patch-mainline: v5.15-rc3 + +The device ZTE 0x0094 is already on the list. + +Signed-off-by: Krzysztof Kozlowski +Fixes: b9e44fe5ecda ("USB: option: cleanup zte 3g-dongle's pid in option.c") +Cc: stable@vger.kernel.org +Signed-off-by: Johan Hovold +Signed-off-by: Oliver Neukum +--- + drivers/usb/serial/option.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c +index a79f51e35115..02a35f26ee82 100644 +--- a/drivers/usb/serial/option.c ++++ b/drivers/usb/serial/option.c +@@ -1658,7 +1658,6 @@ static const struct usb_device_id option_ids[] = { + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0060, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0070, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0073, 0xff, 0xff, 0xff) }, +- { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0094, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0130, 0xff, 0xff, 0xff), + .driver_info = RSVD(1) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0133, 0xff, 0xff, 0xff), +-- +2.26.2 + diff --git a/patches.suse/ath-Export-ath_hw_keysetmac.patch b/patches.suse/ath-Export-ath_hw_keysetmac.patch new file mode 100644 index 0000000..c0e985e --- /dev/null +++ b/patches.suse/ath-Export-ath_hw_keysetmac.patch @@ -0,0 +1,57 @@ +From d2d3e36498dd8e0c83ea99861fac5cf9e8671226 Mon Sep 17 00:00:00 2001 +From: Jouni Malinen +Date: Mon, 14 Dec 2020 19:21:16 +0200 +Subject: [PATCH] ath: Export ath_hw_keysetmac() +Git-commit: d2d3e36498dd8e0c83ea99861fac5cf9e8671226 +Patch-mainline: v5.12-rc1 +References: CVE-2020-3702 bsc#1191193 + +ath9k is going to use this for safer management of key cache entries. + +Signed-off-by: Jouni Malinen +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20201214172118.18100-4-jouni@codeaurora.org +Acked-by: Takashi Iwai + +--- + drivers/net/wireless/ath/ath.h | 1 + + drivers/net/wireless/ath/key.c | 4 ++-- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h +index 7a364eca46d6..9d18105c449f 100644 +--- a/drivers/net/wireless/ath/ath.h ++++ b/drivers/net/wireless/ath/ath.h +@@ -203,6 +203,7 @@ int ath_key_config(struct ath_common *common, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key); + bool ath_hw_keyreset(struct ath_common *common, u16 entry); ++bool ath_hw_keysetmac(struct ath_common *common, u16 entry, const u8 *mac); + void ath_hw_cycle_counters_update(struct ath_common *common); + int32_t ath_hw_get_listen_time(struct ath_common *common); + +diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c +index 59618bb41f6c..cb266cf3c77c 100644 +--- a/drivers/net/wireless/ath/key.c ++++ b/drivers/net/wireless/ath/key.c +@@ -84,8 +84,7 @@ bool ath_hw_keyreset(struct ath_common *common, u16 entry) + } + EXPORT_SYMBOL(ath_hw_keyreset); + +-static bool ath_hw_keysetmac(struct ath_common *common, +- u16 entry, const u8 *mac) ++bool ath_hw_keysetmac(struct ath_common *common, u16 entry, const u8 *mac) + { + u32 macHi, macLo; + u32 unicast_flag = AR_KEYTABLE_VALID; +@@ -125,6 +124,7 @@ static bool ath_hw_keysetmac(struct ath_common *common, + + return true; + } ++EXPORT_SYMBOL(ath_hw_keysetmac); + + static bool ath_hw_set_keycache_entry(struct ath_common *common, u16 entry, + const struct ath_keyval *k, +-- +2.26.2 + diff --git a/patches.suse/ath-Modify-ath_key_delete-to-not-need-full-key-entry.patch b/patches.suse/ath-Modify-ath_key_delete-to-not-need-full-key-entry.patch new file mode 100644 index 0000000..ef3d724 --- /dev/null +++ b/patches.suse/ath-Modify-ath_key_delete-to-not-need-full-key-entry.patch @@ -0,0 +1,156 @@ +From 144cd24dbc36650a51f7fe3bf1424a1432f1f480 Mon Sep 17 00:00:00 2001 +From: Jouni Malinen +Date: Mon, 14 Dec 2020 19:21:17 +0200 +Subject: [PATCH] ath: Modify ath_key_delete() to not need full key entry +Git-commit: 144cd24dbc36650a51f7fe3bf1424a1432f1f480 +Patch-mainline: v5.12-rc1 +References: CVE-2020-3702 bsc#1191193 + +tkip_keymap can be used internally to avoid the reference to key->cipher +and with this, only the key index value itself is needed. This allows +ath_key_delete() call to be postponed to be handled after the upper +layer STA and key entry have already been removed. This is needed to +make ath9k key cache management safer. + +Signed-off-by: Jouni Malinen +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20201214172118.18100-5-jouni@codeaurora.org +Acked-by: Takashi Iwai + +--- + drivers/net/wireless/ath/ath.h | 2 +- + drivers/net/wireless/ath/ath5k/mac80211-ops.c | 2 +- + drivers/net/wireless/ath/ath9k/htc_drv_main.c | 2 +- + drivers/net/wireless/ath/ath9k/main.c | 5 ++- + drivers/net/wireless/ath/key.c | 34 +++++++++---------- + 5 files changed, 22 insertions(+), 23 deletions(-) + +diff --git a/drivers/net/wireless/ath/ath.h b/drivers/net/wireless/ath/ath.h +index 9d18105c449f..f083fb9038c3 100644 +--- a/drivers/net/wireless/ath/ath.h ++++ b/drivers/net/wireless/ath/ath.h +@@ -197,7 +197,7 @@ struct sk_buff *ath_rxbuf_alloc(struct ath_common *common, + bool ath_is_mybeacon(struct ath_common *common, struct ieee80211_hdr *hdr); + + void ath_hw_setbssidmask(struct ath_common *common); +-void ath_key_delete(struct ath_common *common, struct ieee80211_key_conf *key); ++void ath_key_delete(struct ath_common *common, u8 hw_key_idx); + int ath_key_config(struct ath_common *common, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, +diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c +index 8f2719ff463c..532eeac9e83e 100644 +--- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c ++++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c +@@ -522,7 +522,7 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, + } + break; + case DISABLE_KEY: +- ath_key_delete(common, key); ++ ath_key_delete(common, key->hw_key_idx); + break; + default: + ret = -EINVAL; +diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c +index 2b7832b1c800..72ef319feeda 100644 +--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c ++++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c +@@ -1461,7 +1461,7 @@ static int ath9k_htc_set_key(struct ieee80211_hw *hw, + } + break; + case DISABLE_KEY: +- ath_key_delete(common, key); ++ ath_key_delete(common, key->hw_key_idx); + break; + default: + ret = -EINVAL; +diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c +index 10b87aa1d289..bcdf150060f2 100644 +--- a/drivers/net/wireless/ath/ath9k/main.c ++++ b/drivers/net/wireless/ath/ath9k/main.c +@@ -1543,12 +1543,11 @@ static void ath9k_del_ps_key(struct ath_softc *sc, + { + struct ath_common *common = ath9k_hw_common(sc->sc_ah); + struct ath_node *an = (struct ath_node *) sta->drv_priv; +- struct ieee80211_key_conf ps_key = { .hw_key_idx = an->ps_key }; + + if (!an->ps_key) + return; + +- ath_key_delete(common, &ps_key); ++ ath_key_delete(common, an->ps_key); + an->ps_key = 0; + an->key_idx[0] = 0; + } +@@ -1748,7 +1747,7 @@ static int ath9k_set_key(struct ieee80211_hw *hw, + } + break; + case DISABLE_KEY: +- ath_key_delete(common, key); ++ ath_key_delete(common, key->hw_key_idx); + if (an) { + for (i = 0; i < ARRAY_SIZE(an->key_idx); i++) { + if (an->key_idx[i] != key->hw_key_idx) +diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c +index cb266cf3c77c..61b59a804e30 100644 +--- a/drivers/net/wireless/ath/key.c ++++ b/drivers/net/wireless/ath/key.c +@@ -581,38 +581,38 @@ EXPORT_SYMBOL(ath_key_config); + /* + * Delete Key. + */ +-void ath_key_delete(struct ath_common *common, struct ieee80211_key_conf *key) ++void ath_key_delete(struct ath_common *common, u8 hw_key_idx) + { + /* Leave CCMP and TKIP (main key) configured to avoid disabling + * encryption for potentially pending frames already in a TXQ with the + * keyix pointing to this key entry. Instead, only clear the MAC address + * to prevent RX processing from using this key cache entry. + */ +- if (test_bit(key->hw_key_idx, common->ccmp_keymap) || +- test_bit(key->hw_key_idx, common->tkip_keymap)) +- ath_hw_keysetmac(common, key->hw_key_idx, NULL); ++ if (test_bit(hw_key_idx, common->ccmp_keymap) || ++ test_bit(hw_key_idx, common->tkip_keymap)) ++ ath_hw_keysetmac(common, hw_key_idx, NULL); + else +- ath_hw_keyreset(common, key->hw_key_idx); +- if (key->hw_key_idx < IEEE80211_WEP_NKID) ++ ath_hw_keyreset(common, hw_key_idx); ++ if (hw_key_idx < IEEE80211_WEP_NKID) + return; + +- clear_bit(key->hw_key_idx, common->keymap); +- clear_bit(key->hw_key_idx, common->ccmp_keymap); +- if (key->cipher != WLAN_CIPHER_SUITE_TKIP) ++ clear_bit(hw_key_idx, common->keymap); ++ clear_bit(hw_key_idx, common->ccmp_keymap); ++ if (!test_bit(hw_key_idx, common->tkip_keymap)) + return; + +- clear_bit(key->hw_key_idx + 64, common->keymap); ++ clear_bit(hw_key_idx + 64, common->keymap); + +- clear_bit(key->hw_key_idx, common->tkip_keymap); +- clear_bit(key->hw_key_idx + 64, common->tkip_keymap); ++ clear_bit(hw_key_idx, common->tkip_keymap); ++ clear_bit(hw_key_idx + 64, common->tkip_keymap); + + if (!(common->crypt_caps & ATH_CRYPT_CAP_MIC_COMBINED)) { +- ath_hw_keyreset(common, key->hw_key_idx + 32); +- clear_bit(key->hw_key_idx + 32, common->keymap); +- clear_bit(key->hw_key_idx + 64 + 32, common->keymap); ++ ath_hw_keyreset(common, hw_key_idx + 32); ++ clear_bit(hw_key_idx + 32, common->keymap); ++ clear_bit(hw_key_idx + 64 + 32, common->keymap); + +- clear_bit(key->hw_key_idx + 32, common->tkip_keymap); +- clear_bit(key->hw_key_idx + 64 + 32, common->tkip_keymap); ++ clear_bit(hw_key_idx + 32, common->tkip_keymap); ++ clear_bit(hw_key_idx + 64 + 32, common->tkip_keymap); + } + } + EXPORT_SYMBOL(ath_key_delete); +-- +2.26.2 + diff --git a/patches.suse/ath-Use-safer-key-clearing-with-key-cache-entries.patch b/patches.suse/ath-Use-safer-key-clearing-with-key-cache-entries.patch new file mode 100644 index 0000000..60e997c --- /dev/null +++ b/patches.suse/ath-Use-safer-key-clearing-with-key-cache-entries.patch @@ -0,0 +1,57 @@ +From 56c5485c9e444c2e85e11694b6c44f1338fc20fd Mon Sep 17 00:00:00 2001 +From: Jouni Malinen +Date: Mon, 14 Dec 2020 19:21:14 +0200 +Subject: [PATCH] ath: Use safer key clearing with key cache entries +Git-commit: 56c5485c9e444c2e85e11694b6c44f1338fc20fd +Patch-mainline: v5.12-rc1 +References: CVE-2020-3702 bsc#1191193 + +It is possible for there to be pending frames in TXQs with a reference +to the key cache entry that is being deleted. If such a key cache entry +is cleared, those pending frame in TXQ might get transmitted without +proper encryption. It is safer to leave the previously used key into the +key cache in such cases. Instead, only clear the MAC address to prevent +RX processing from using this key cache entry. + +This is needed in particularly in AP mode where the TXQs cannot be +flushed on station disconnection. This change alone may not be able to +address all cases where the key cache entry might get reused for other +purposes immediately (the key cache entry should be released for reuse +only once the TXQs do not have any remaining references to them), but +this makes it less likely to get unprotected frames and the more +complete changes may end up being significantly more complex. + +Signed-off-by: Jouni Malinen +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20201214172118.18100-2-jouni@codeaurora.org +Acked-by: Takashi Iwai + +--- + drivers/net/wireless/ath/key.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/wireless/ath/key.c b/drivers/net/wireless/ath/key.c +index 1816b4e7dc26..59618bb41f6c 100644 +--- a/drivers/net/wireless/ath/key.c ++++ b/drivers/net/wireless/ath/key.c +@@ -583,7 +583,16 @@ EXPORT_SYMBOL(ath_key_config); + */ + void ath_key_delete(struct ath_common *common, struct ieee80211_key_conf *key) + { +- ath_hw_keyreset(common, key->hw_key_idx); ++ /* Leave CCMP and TKIP (main key) configured to avoid disabling ++ * encryption for potentially pending frames already in a TXQ with the ++ * keyix pointing to this key entry. Instead, only clear the MAC address ++ * to prevent RX processing from using this key cache entry. ++ */ ++ if (test_bit(key->hw_key_idx, common->ccmp_keymap) || ++ test_bit(key->hw_key_idx, common->tkip_keymap)) ++ ath_hw_keysetmac(common, key->hw_key_idx, NULL); ++ else ++ ath_hw_keyreset(common, key->hw_key_idx); + if (key->hw_key_idx < IEEE80211_WEP_NKID) + return; + +-- +2.26.2 + diff --git a/patches.suse/ath9k-Clear-key-cache-explicitly-on-disabling-hardwa.patch b/patches.suse/ath9k-Clear-key-cache-explicitly-on-disabling-hardwa.patch new file mode 100644 index 0000000..cc26815 --- /dev/null +++ b/patches.suse/ath9k-Clear-key-cache-explicitly-on-disabling-hardwa.patch @@ -0,0 +1,40 @@ +From 73488cb2fa3bb1ef9f6cf0d757f76958bd4deaca Mon Sep 17 00:00:00 2001 +From: Jouni Malinen +Date: Mon, 14 Dec 2020 19:21:15 +0200 +Subject: [PATCH] ath9k: Clear key cache explicitly on disabling hardware +Git-commit: 73488cb2fa3bb1ef9f6cf0d757f76958bd4deaca +Patch-mainline: v5.12-rc1 +References: CVE-2020-3702 bsc#1191193 + +Now that ath/key.c may not be explicitly clearing keys from the key +cache, clear all key cache entries when disabling hardware to make sure +no keys are left behind beyond this point. + +Signed-off-by: Jouni Malinen +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20201214172118.18100-3-jouni@codeaurora.org +Acked-by: Takashi Iwai + +--- + drivers/net/wireless/ath/ath9k/main.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c +index caebe3fd6869..10b87aa1d289 100644 +--- a/drivers/net/wireless/ath/ath9k/main.c ++++ b/drivers/net/wireless/ath/ath9k/main.c +@@ -894,6 +894,11 @@ static void ath9k_stop(struct ieee80211_hw *hw) + + spin_unlock_bh(&sc->sc_pcu_lock); + ++ /* Clear key cache entries explicitly to get rid of any potentially ++ * remaining keys. ++ */ ++ ath9k_cmn_init_crypto(sc->sc_ah); ++ + ath9k_ps_restore(sc); + + sc->ps_idle = prev_idle; +-- +2.26.2 + diff --git a/patches.suse/ath9k-Postpone-key-cache-entry-deletion-for-TXQ-fram.patch b/patches.suse/ath9k-Postpone-key-cache-entry-deletion-for-TXQ-fram.patch new file mode 100644 index 0000000..016c997 --- /dev/null +++ b/patches.suse/ath9k-Postpone-key-cache-entry-deletion-for-TXQ-fram.patch @@ -0,0 +1,168 @@ +From ca2848022c12789685d3fab3227df02b863f9696 Mon Sep 17 00:00:00 2001 +From: Jouni Malinen +Date: Mon, 14 Dec 2020 19:21:18 +0200 +Subject: [PATCH] ath9k: Postpone key cache entry deletion for TXQ frames reference it +Git-commit: ca2848022c12789685d3fab3227df02b863f9696 +Patch-mainline: v5.12-rc1 +References: CVE-2020-3702 bsc#1191193 + +Do not delete a key cache entry that is still being referenced by +pending frames in TXQs. This avoids reuse of the key cache entry while a +frame might still be transmitted using it. + +To avoid having to do any additional operations during the main TX path +operations, track pending key cache entries in a new bitmap and check +whether any pending entries can be deleted before every new key +add/remove operation. Also clear any remaining entries when stopping the +interface. + +Signed-off-by: Jouni Malinen +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20201214172118.18100-6-jouni@codeaurora.org +Acked-by: Takashi Iwai + +--- + drivers/net/wireless/ath/ath9k/hw.h | 1 + + drivers/net/wireless/ath/ath9k/main.c | 87 ++++++++++++++++++++++++++- + 2 files changed, 87 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h +index 023599e10dd5..b7b65b1c90e8 100644 +--- a/drivers/net/wireless/ath/ath9k/hw.h ++++ b/drivers/net/wireless/ath/ath9k/hw.h +@@ -820,6 +820,7 @@ struct ath_hw { + struct ath9k_pacal_info pacal_info; + struct ar5416Stats stats; + struct ath9k_tx_queue_info txq[ATH9K_NUM_TX_QUEUES]; ++ DECLARE_BITMAP(pending_del_keymap, ATH_KEYMAX); + + enum ath9k_int imask; + u32 imrs2_reg; +diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c +index bcdf150060f2..45f6402478b5 100644 +--- a/drivers/net/wireless/ath/ath9k/main.c ++++ b/drivers/net/wireless/ath/ath9k/main.c +@@ -821,12 +821,80 @@ static void ath9k_tx(struct ieee80211_hw *hw, + ieee80211_free_txskb(hw, skb); + } + ++static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix) ++{ ++ struct ath_buf *bf; ++ struct ieee80211_tx_info *txinfo; ++ struct ath_frame_info *fi; ++ ++ list_for_each_entry(bf, txq_list, list) { ++ if (bf->bf_state.stale || !bf->bf_mpdu) ++ continue; ++ ++ txinfo = IEEE80211_SKB_CB(bf->bf_mpdu); ++ fi = (struct ath_frame_info *)&txinfo->rate_driver_data[0]; ++ if (fi->keyix == keyix) ++ return true; ++ } ++ ++ return false; ++} ++ ++static bool ath9k_txq_has_key(struct ath_softc *sc, u32 keyix) ++{ ++ struct ath_hw *ah = sc->sc_ah; ++ int i; ++ struct ath_txq *txq; ++ bool key_in_use = false; ++ ++ for (i = 0; !key_in_use && i < ATH9K_NUM_TX_QUEUES; i++) { ++ if (!ATH_TXQ_SETUP(sc, i)) ++ continue; ++ txq = &sc->tx.txq[i]; ++ if (!txq->axq_depth) ++ continue; ++ if (!ath9k_hw_numtxpending(ah, txq->axq_qnum)) ++ continue; ++ ++ ath_txq_lock(sc, txq); ++ key_in_use = ath9k_txq_list_has_key(&txq->axq_q, keyix); ++ if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) { ++ int idx = txq->txq_tailidx; ++ ++ while (!key_in_use && ++ !list_empty(&txq->txq_fifo[idx])) { ++ key_in_use = ath9k_txq_list_has_key( ++ &txq->txq_fifo[idx], keyix); ++ INCR(idx, ATH_TXFIFO_DEPTH); ++ } ++ } ++ ath_txq_unlock(sc, txq); ++ } ++ ++ return key_in_use; ++} ++ ++static void ath9k_pending_key_del(struct ath_softc *sc, u8 keyix) ++{ ++ struct ath_hw *ah = sc->sc_ah; ++ struct ath_common *common = ath9k_hw_common(ah); ++ ++ if (!test_bit(keyix, ah->pending_del_keymap) || ++ ath9k_txq_has_key(sc, keyix)) ++ return; ++ ++ /* No more TXQ frames point to this key cache entry, so delete it. */ ++ clear_bit(keyix, ah->pending_del_keymap); ++ ath_key_delete(common, keyix); ++} ++ + static void ath9k_stop(struct ieee80211_hw *hw) + { + struct ath_softc *sc = hw->priv; + struct ath_hw *ah = sc->sc_ah; + struct ath_common *common = ath9k_hw_common(ah); + bool prev_idle; ++ int i; + + ath9k_deinit_channel_context(sc); + +@@ -894,6 +962,9 @@ static void ath9k_stop(struct ieee80211_hw *hw) + + spin_unlock_bh(&sc->sc_pcu_lock); + ++ for (i = 0; i < ATH_KEYMAX; i++) ++ ath9k_pending_key_del(sc, i); ++ + /* Clear key cache entries explicitly to get rid of any potentially + * remaining keys. + */ +@@ -1718,6 +1789,12 @@ static int ath9k_set_key(struct ieee80211_hw *hw, + if (sta) + an = (struct ath_node *)sta->drv_priv; + ++ /* Delete pending key cache entries if no more frames are pointing to ++ * them in TXQs. ++ */ ++ for (i = 0; i < ATH_KEYMAX; i++) ++ ath9k_pending_key_del(sc, i); ++ + switch (cmd) { + case SET_KEY: + if (sta) +@@ -1747,7 +1824,15 @@ static int ath9k_set_key(struct ieee80211_hw *hw, + } + break; + case DISABLE_KEY: +- ath_key_delete(common, key->hw_key_idx); ++ if (ath9k_txq_has_key(sc, key->hw_key_idx)) { ++ /* Delay key cache entry deletion until there are no ++ * remaining TXQ frames pointing to this entry. ++ */ ++ set_bit(key->hw_key_idx, sc->sc_ah->pending_del_keymap); ++ ath_hw_keysetmac(common, key->hw_key_idx, NULL); ++ } else { ++ ath_key_delete(common, key->hw_key_idx); ++ } + if (an) { + for (i = 0; i < ARRAY_SIZE(an->key_idx); i++) { + if (an->key_idx[i] != key->hw_key_idx) +-- +2.26.2 + diff --git a/patches.suse/btrfs-prevent-rename2-from-exchanging-a-subvol-with-a-directory-from-different-parents.patch b/patches.suse/btrfs-prevent-rename2-from-exchanging-a-subvol-with-a-directory-from-different-parents.patch new file mode 100644 index 0000000..61002b7 --- /dev/null +++ b/patches.suse/btrfs-prevent-rename2-from-exchanging-a-subvol-with-a-directory-from-different-parents.patch @@ -0,0 +1,117 @@ +From: NeilBrown +Date: Fri, 6 Aug 2021 14:26:24 +1000 +Subject: btrfs: prevent rename2 from exchanging a subvol with a directory from + different parents +Git-commit: 3f79f6f6247c83f448c8026c3ee16d4636ef8d4f +Patch-mainline: v5.14-rc7 +References: bsc#1190626 + +Cross-rename lacks a check when that would prevent exchanging a +directory and subvolume from different parent subvolume. This causes +data inconsistencies and is caught before commit by tree-checker, +turning the filesystem to read-only. + +Calling the renameat2 with RENAME_EXCHANGE flags like + + renameat2(AT_FDCWD, namesrc, AT_FDCWD, namedest, (1 << 1)) + +on two paths: + + namesrc = dir1/subvol1/dir2 + namedest = subvol2/subvol3 + +will cause key order problem with following write time tree-checker +report: + + [1194842.307890] BTRFS critical (device loop1): corrupt leaf: root=5 block=27574272 slot=10 ino=258, invalid previous key objectid, have 257 expect 258 + [1194842.322221] BTRFS info (device loop1): leaf 27574272 gen 8 total ptrs 11 free space 15444 owner 5 + [1194842.331562] BTRFS info (device loop1): refs 2 lock_owner 0 current 26561 + [1194842.338772] item 0 key (256 1 0) itemoff 16123 itemsize 160 + [1194842.338793] inode generation 3 size 16 mode 40755 + [1194842.338801] item 1 key (256 12 256) itemoff 16111 itemsize 12 + [1194842.338809] item 2 key (256 84 2248503653) itemoff 16077 itemsize 34 + [1194842.338817] dir oid 258 type 2 + [1194842.338823] item 3 key (256 84 2363071922) itemoff 16043 itemsize 34 + [1194842.338830] dir oid 257 type 2 + [1194842.338836] item 4 key (256 96 2) itemoff 16009 itemsize 34 + [1194842.338843] item 5 key (256 96 3) itemoff 15975 itemsize 34 + [1194842.338852] item 6 key (257 1 0) itemoff 15815 itemsize 160 + [1194842.338863] inode generation 6 size 8 mode 40755 + [1194842.338869] item 7 key (257 12 256) itemoff 15801 itemsize 14 + [1194842.338876] item 8 key (257 84 2505409169) itemoff 15767 itemsize 34 + [1194842.338883] dir oid 256 type 2 + [1194842.338888] item 9 key (257 96 2) itemoff 15733 itemsize 34 + [1194842.338895] item 10 key (258 12 256) itemoff 15719 itemsize 14 + [1194842.339163] BTRFS error (device loop1): block=27574272 write time tree block corruption detected + [1194842.339245] ------------[ cut here ]------------ + [1194842.443422] WARNING: CPU: 6 PID: 26561 at fs/btrfs/disk-io.c:449 csum_one_extent_buffer+0xed/0x100 [btrfs] + [1194842.511863] CPU: 6 PID: 26561 Comm: kworker/u17:2 Not tainted 5.14.0-rc3-git+ #793 + [1194842.511870] Hardware name: empty empty/S3993, BIOS PAQEX0-3 02/24/2008 + [1194842.511876] Workqueue: btrfs-worker-high btrfs_work_helper [btrfs] + [1194842.511976] RIP: 0010:csum_one_extent_buffer+0xed/0x100 [btrfs] + [1194842.512068] RSP: 0018:ffffa2c284d77da0 EFLAGS: 00010282 + [1194842.512074] RAX: 0000000000000000 RBX: 0000000000001000 RCX: ffff928867bd9978 + [1194842.512078] RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff928867bd9970 + [1194842.512081] RBP: ffff92876b958000 R08: 0000000000000001 R09: 00000000000c0003 + [1194842.512085] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 + [1194842.512088] R13: ffff92875f989f98 R14: 0000000000000000 R15: 0000000000000000 + [1194842.512092] FS: 0000000000000000(0000) GS:ffff928867a00000(0000) knlGS:0000000000000000 + [1194842.512095] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [1194842.512099] CR2: 000055f5384da1f0 CR3: 0000000102fe4000 CR4: 00000000000006e0 + [1194842.512103] Call Trace: + [1194842.512128] ? run_one_async_free+0x10/0x10 [btrfs] + [1194842.631729] btree_csum_one_bio+0x1ac/0x1d0 [btrfs] + [1194842.631837] run_one_async_start+0x18/0x30 [btrfs] + [1194842.631938] btrfs_work_helper+0xd5/0x1d0 [btrfs] + [1194842.647482] process_one_work+0x262/0x5e0 + [1194842.647520] worker_thread+0x4c/0x320 + [1194842.655935] ? process_one_work+0x5e0/0x5e0 + [1194842.655946] kthread+0x135/0x160 + [1194842.655953] ? set_kthread_struct+0x40/0x40 + [1194842.655965] ret_from_fork+0x1f/0x30 + [1194842.672465] irq event stamp: 1729 + [1194842.672469] hardirqs last enabled at (1735): [] console_trylock_spinning+0x185/0x1a0 + [1194842.672477] hardirqs last disabled at (1740): [] console_trylock_spinning+0x15c/0x1a0 + [1194842.672482] softirqs last enabled at (1666): [] __do_softirq+0x2e1/0x50a + [1194842.672491] softirqs last disabled at (1651): [] __irq_exit_rcu+0xa7/0xd0 + +The corrupted data will not be written, and filesystem can be unmounted +and mounted again (all changes since the last commit will be lost). + +Add the missing check for new_ino so that all non-subvolumes must reside +under the same parent subvolume. There's an exception allowing to +exchange two subvolumes from any parents as the directory representing a +subvolume is only a logical link and does not have any other structures +related to the parent subvolume, unlike files, directories etc, that +are always in the inode namespace of the parent subvolume. + +Fixes: cdd1fedf8261 ("btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT") +CC: stable@vger.kernel.org # 4.7+ +Reviewed-by: Nikolay Borisov +Signed-off-by: NeilBrown +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Acked-by: Nikolay Borisov +--- + fs/btrfs/inode.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -9578,8 +9578,14 @@ static int btrfs_rename_exchange(struct + bool sync_log_dest = false; + bool commit_transaction = false; + +- /* we only allow rename subvolume link between subvolumes */ +- if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) ++ /* ++ * For non-subvolumes allow exchange only within one subvolume, in the ++ * same inode namespace. Two subvolumes (represented as directory) can ++ * be exchanged as they're a logical link and have a fixed inode number. ++ */ ++ if (root != dest && ++ (old_ino != BTRFS_FIRST_FREE_OBJECTID || ++ new_ino != BTRFS_FIRST_FREE_OBJECTID)) + return -EXDEV; + + btrfs_init_log_ctx(&ctx_root, old_inode); diff --git a/patches.suse/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch b/patches.suse/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch new file mode 100644 index 0000000..64f98da --- /dev/null +++ b/patches.suse/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch @@ -0,0 +1,269 @@ +From: Ard Biesheuvel +Date: Thu, 31 Dec 2020 17:41:54 +0100 +Subject: crypto: x86/aes-ni-xts - use direct calls to and 4-way stride +Git-commit: 86ad60a65f29dd862a11c22bb4b5be28d6c5cef1 +Patch-mainline: v5.12-rc1 +References: bsc#1114648 + +The XTS asm helper arrangement is a bit odd: the 8-way stride helper +consists of back-to-back calls to the 4-way core transforms, which +are called indirectly, based on a boolean that indicates whether we +are performing encryption or decryption. + +Given how costly indirect calls are on x86, let's switch to direct +calls, and given how the 8-way stride doesn't really add anything +substantial, use a 4-way stride instead, and make the asm core +routine deal with any multiple of 4 blocks. Since 512 byte sectors +or 4 KB blocks are the typical quantities XTS operates on, increase +the stride exported to the glue helper to 512 bytes as well. + +As a result, the number of indirect calls is reduced from 3 per 64 bytes +of in/output to 1 per 512 bytes of in/output, which produces a 65% speedup +when operating on 1 KB blocks (measured on a Intel(R) Core(TM) i7-8650U CPU) + +Fixes: 9697fa39efd3f ("x86/retpoline/crypto: Convert crypto assembler indirect jumps") +Tested-by: Eric Biggers # x86_64 +Signed-off-by: Ard Biesheuvel +Signed-off-by: Herbert Xu +Acked-by: Borislav Petkov +--- + arch/x86/crypto/aesni-intel_asm.S | 115 ++++++++++++++++++++++--------------- + arch/x86/crypto/aesni-intel_glue.c | 25 ++++---- + 2 files changed, 84 insertions(+), 56 deletions(-) + +--- a/arch/x86/crypto/aesni-intel_asm.S ++++ b/arch/x86/crypto/aesni-intel_asm.S +@@ -2714,25 +2714,18 @@ ENDPROC(aesni_ctr_enc) + pxor CTR, IV; + + /* +- * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, +- * bool enc, u8 *iv) ++ * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, ++ * const u8 *src, unsigned int len, le128 *iv) + */ +-ENTRY(aesni_xts_crypt8) ++ENTRY(aesni_xts_encrypt) + FRAME_BEGIN +- cmpb $0, %cl +- movl $0, %ecx +- movl $240, %r10d +- leaq _aesni_enc4, %r11 +- leaq _aesni_dec4, %rax +- cmovel %r10d, %ecx +- cmoveq %rax, %r11 + + movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK + movups (IVP), IV + + mov 480(KEYP), KLEN +- addq %rcx, KEYP + ++.Lxts_enc_loop4: + movdqa IV, STATE1 + movdqu 0x00(INP), INC + pxor INC, STATE1 +@@ -2756,71 +2749,103 @@ ENTRY(aesni_xts_crypt8) + pxor INC, STATE4 + movdqu IV, 0x30(OUTP) + +- CALL_NOSPEC %r11 ++ call _aesni_enc4 + + movdqu 0x00(OUTP), INC + pxor INC, STATE1 + movdqu STATE1, 0x00(OUTP) + +- _aesni_gf128mul_x_ble() +- movdqa IV, STATE1 +- movdqu 0x40(INP), INC +- pxor INC, STATE1 +- movdqu IV, 0x40(OUTP) +- + movdqu 0x10(OUTP), INC + pxor INC, STATE2 + movdqu STATE2, 0x10(OUTP) + +- _aesni_gf128mul_x_ble() +- movdqa IV, STATE2 +- movdqu 0x50(INP), INC +- pxor INC, STATE2 +- movdqu IV, 0x50(OUTP) +- + movdqu 0x20(OUTP), INC + pxor INC, STATE3 + movdqu STATE3, 0x20(OUTP) + +- _aesni_gf128mul_x_ble() +- movdqa IV, STATE3 +- movdqu 0x60(INP), INC +- pxor INC, STATE3 +- movdqu IV, 0x60(OUTP) +- + movdqu 0x30(OUTP), INC + pxor INC, STATE4 + movdqu STATE4, 0x30(OUTP) + + _aesni_gf128mul_x_ble() +- movdqa IV, STATE4 +- movdqu 0x70(INP), INC +- pxor INC, STATE4 +- movdqu IV, 0x70(OUTP) + +- _aesni_gf128mul_x_ble() ++ add $64, INP ++ add $64, OUTP ++ sub $64, LEN ++ ja .Lxts_enc_loop4 ++ + movups IV, (IVP) + +- CALL_NOSPEC %r11 ++ FRAME_END ++ ret ++ENDPROC(aesni_xts_encrypt) ++ ++/* ++ * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, ++ * const u8 *src, unsigned int len, le128 *iv) ++ */ ++ENTRY(aesni_xts_decrypt) ++ FRAME_BEGIN ++ ++ movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK ++ movups (IVP), IV ++ ++ mov 480(KEYP), KLEN ++ add $240, KEYP ++ ++.Lxts_dec_loop4: ++ movdqa IV, STATE1 ++ movdqu 0x00(INP), INC ++ pxor INC, STATE1 ++ movdqu IV, 0x00(OUTP) ++ ++ _aesni_gf128mul_x_ble() ++ movdqa IV, STATE2 ++ movdqu 0x10(INP), INC ++ pxor INC, STATE2 ++ movdqu IV, 0x10(OUTP) ++ ++ _aesni_gf128mul_x_ble() ++ movdqa IV, STATE3 ++ movdqu 0x20(INP), INC ++ pxor INC, STATE3 ++ movdqu IV, 0x20(OUTP) ++ ++ _aesni_gf128mul_x_ble() ++ movdqa IV, STATE4 ++ movdqu 0x30(INP), INC ++ pxor INC, STATE4 ++ movdqu IV, 0x30(OUTP) ++ ++ call _aesni_dec4 + +- movdqu 0x40(OUTP), INC ++ movdqu 0x00(OUTP), INC + pxor INC, STATE1 +- movdqu STATE1, 0x40(OUTP) ++ movdqu STATE1, 0x00(OUTP) + +- movdqu 0x50(OUTP), INC ++ movdqu 0x10(OUTP), INC + pxor INC, STATE2 +- movdqu STATE2, 0x50(OUTP) ++ movdqu STATE2, 0x10(OUTP) + +- movdqu 0x60(OUTP), INC ++ movdqu 0x20(OUTP), INC + pxor INC, STATE3 +- movdqu STATE3, 0x60(OUTP) ++ movdqu STATE3, 0x20(OUTP) + +- movdqu 0x70(OUTP), INC ++ movdqu 0x30(OUTP), INC + pxor INC, STATE4 +- movdqu STATE4, 0x70(OUTP) ++ movdqu STATE4, 0x30(OUTP) ++ ++ _aesni_gf128mul_x_ble() ++ ++ add $64, INP ++ add $64, OUTP ++ sub $64, LEN ++ ja .Lxts_dec_loop4 ++ ++ movups IV, (IVP) + + FRAME_END + ret +-ENDPROC(aesni_xts_crypt8) ++ENDPROC(aesni_xts_decrypt) + + #endif +--- a/arch/x86/crypto/aesni-intel_glue.c ++++ b/arch/x86/crypto/aesni-intel_glue.c +@@ -88,6 +88,12 @@ void crypto_fpu_exit(void); + #define AVX_GEN2_OPTSIZE 640 + #define AVX_GEN4_OPTSIZE 4096 + ++asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, ++ const u8 *in, unsigned int len, u8 *iv); ++ ++asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, ++ const u8 *in, unsigned int len, u8 *iv); ++ + #ifdef CONFIG_X86_64 + + static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, +@@ -95,9 +101,6 @@ static void (*aesni_ctr_enc_tfm)(struct + asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +-asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, +- const u8 *in, bool enc, u8 *iv); +- + /* asmlinkage void aesni_gcm_enc() + * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * u8 *out, Ciphertext output. Encrypt in-place is allowed. +@@ -553,14 +556,14 @@ static void aesni_xts_dec(void *ctx, u12 + glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec)); + } + +-static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv) ++static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) + { +- aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv); ++ aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); + } + +-static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv) ++static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) + { +- aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv); ++ aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); + } + + static const struct common_glue_ctx aesni_enc_xts = { +@@ -568,8 +571,8 @@ static const struct common_glue_ctx aesn + .fpu_blocks_limit = 1, + + .funcs = { { +- .num_blocks = 8, +- .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) } ++ .num_blocks = 32, ++ .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc32) } + }, { + .num_blocks = 1, + .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) } +@@ -581,8 +584,8 @@ static const struct common_glue_ctx aesn + .fpu_blocks_limit = 1, + + .funcs = { { +- .num_blocks = 8, +- .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) } ++ .num_blocks = 32, ++ .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec32) } + }, { + .num_blocks = 1, + .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) } diff --git a/patches.suse/drm-qxl-lost-qxl_bo_kunmap_atomic_page-in-qxl_image_.patch b/patches.suse/drm-qxl-lost-qxl_bo_kunmap_atomic_page-in-qxl_image_.patch new file mode 100644 index 0000000..ebcfafa --- /dev/null +++ b/patches.suse/drm-qxl-lost-qxl_bo_kunmap_atomic_page-in-qxl_image_.patch @@ -0,0 +1,36 @@ +From 5b5703dbafae74adfbe298a56a81694172caf5e6 Mon Sep 17 00:00:00 2001 +From: Vasily Averin +Date: Wed, 29 Apr 2020 12:34:36 +0300 +Subject: [PATCH] drm/qxl: lost qxl_bo_kunmap_atomic_page in + qxl_image_init_helper() +Patch-mainline: v5.7-rc4 +Git-commit: 5b5703dbafae74adfbe298a56a81694172caf5e6 +References: bsc#1186785 + +v2: removed TODO reminder + +Signed-off-by: Vasily Averin +Link: http://patchwork.freedesktop.org/patch/msgid/a4e0ae09-a73c-1c62-04ef-3f990d41bea9@virtuozzo.com +Signed-off-by: Gerd Hoffmann +Signed-off-by: Oscar Salvador +--- + drivers/gpu/drm/qxl/qxl_image.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/qxl/qxl_image.c b/drivers/gpu/drm/qxl/qxl_image.c +index 43688ecdd8a0..60ab7151b84d 100644 +--- a/drivers/gpu/drm/qxl/qxl_image.c ++++ b/drivers/gpu/drm/qxl/qxl_image.c +@@ -212,7 +212,8 @@ qxl_image_init_helper(struct qxl_device *qdev, + break; + default: + DRM_ERROR("unsupported image bit depth\n"); +- return -EINVAL; /* TODO: cleanup */ ++ qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr); ++ return -EINVAL; + } + image->u.bitmap.flags = QXL_BITMAP_TOP_DOWN; + image->u.bitmap.x = width; +-- +2.26.2 + diff --git a/patches.suse/fuse-truncate-pagecache-on-atomic_o_trunc.patch b/patches.suse/fuse-truncate-pagecache-on-atomic_o_trunc.patch new file mode 100644 index 0000000..c430f9f --- /dev/null +++ b/patches.suse/fuse-truncate-pagecache-on-atomic_o_trunc.patch @@ -0,0 +1,56 @@ +From: Miklos Szeredi +Date: Tue, 17 Aug 2021 21:05:16 +0200 +Subject: fuse: truncate pagecache on atomic_o_trunc +Git-commit: 76224355db7570cbe6b6f75c8929a1558828dd55 +Patch-mainline: v5.15-rc1 +References: bsc#1191051 + +fuse_finish_open() will be called with FUSE_NOWRITE in case of atomic +O_TRUNC. This can deadlock with fuse_wait_on_page_writeback() in +fuse_launder_page() triggered by invalidate_inode_pages2(). + +Fix by replacing invalidate_inode_pages2() in fuse_finish_open() with a +truncate_pagecache() call. This makes sense regardless of FOPEN_KEEP_CACHE +or fc->writeback cache, so do it unconditionally. + +Reported-by: Xie Yongji +Reported-and-tested-by: syzbot+bea44a5189836d956894@syzkaller.appspotmail.com +Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC") +Cc: +Signed-off-by: Miklos Szeredi +Acked-by: Luis Henriques + +--- + fs/fuse/file.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/fs/fuse/file.c ++++ b/fs/fuse/file.c +@@ -177,10 +177,9 @@ void fuse_finish_open(struct inode *inod + + if (ff->open_flags & FOPEN_DIRECT_IO) + file->f_op = &fuse_direct_io_file_operations; +- if (!(ff->open_flags & FOPEN_KEEP_CACHE)) +- invalidate_inode_pages2(inode->i_mapping); + if (ff->open_flags & FOPEN_NONSEEKABLE) + nonseekable_open(inode, file); ++ + if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { + struct fuse_inode *fi = get_fuse_inode(inode); + +@@ -188,10 +187,14 @@ void fuse_finish_open(struct inode *inod + fi->attr_version = ++fc->attr_version; + i_size_write(inode, 0); + spin_unlock(&fc->lock); ++ truncate_pagecache(inode, 0); + fuse_invalidate_attr(inode); + if (fc->writeback_cache) + file_update_time(file); ++ } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { ++ invalidate_inode_pages2(inode->i_mapping); + } ++ + if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) + fuse_link_write_file(file); + } + diff --git a/patches.suse/ipc-remove-memcg-accounting-for-sops-objects-in-do_semtimedop.patch b/patches.suse/ipc-remove-memcg-accounting-for-sops-objects-in-do_semtimedop.patch new file mode 100644 index 0000000..10b5a87 --- /dev/null +++ b/patches.suse/ipc-remove-memcg-accounting-for-sops-objects-in-do_semtimedop.patch @@ -0,0 +1,56 @@ +From: Vasily Averin +Date: Sat, 11 Sep 2021 10:40:08 +0300 +Subject: ipc: remove memcg accounting for sops objects in do_semtimedop() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Git-commit: 6a4746ba06191e23d30230738e94334b26590a8a +Patch-mainline: v5.15-rc2 +References: bsc#1190115 + +Linus proposes to revert an accounting for sops objects in +do_semtimedop() because it's really just a temporary buffer +for a single semtimedop() system call. + +This object can consume up to 2 pages, syscall is sleeping +one, size and duration can be controlled by user, and this +allocation can be repeated by many thread at the same time. + +However Shakeel Butt pointed that there are much more popular +objects with the same life time and similar memory +consumption, the accounting of which was decided to be +rejected for performance reasons. + +Considering at least 2 pages for task_struct and 2 pages for +the kernel stack, a back of the envelope calculation gives a +footprint amplification of <1.5 so this temporal buffer can be +safely ignored. + +The factor would IMO be interesting if it was >> 2 (from the +PoV of excessive (ab)use, fine-grained accounting seems to be +currently unfeasible due to performance impact). + +Link: https://lore.kernel.org/lkml/90e254df-0dfe-f080-011e-b7c53ee7fd20@virtuozzo.com/ +Fixes: 18319498fdd4 ("memcg: enable accounting of ipc resources") +Signed-off-by: Vasily Averin +Acked-by: Michal Hocko +Reviewed-by: Michal Koutný +Acked-by: Shakeel Butt +Signed-off-by: Linus Torvalds +[mkoutny: Adjust context, use upstream kvmalloc_array helper] +Acked-by: Michal Koutný +--- + ipc/sem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/ipc/sem.c ++++ b/ipc/sem.c +@@ -1786,7 +1786,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, + if (nsops > ns->sc_semopm) + return -E2BIG; + if (nsops > SEMOPM_FAST) { +- sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL_ACCOUNT); ++ sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); + if (sops == NULL) + return -ENOMEM; + } diff --git a/patches.suse/ipc-remove-memcg-accounting-for-sops-objects.patch b/patches.suse/ipc-remove-memcg-accounting-for-sops-objects.patch deleted file mode 100644 index 3dbaf77..0000000 --- a/patches.suse/ipc-remove-memcg-accounting-for-sops-objects.patch +++ /dev/null @@ -1,47 +0,0 @@ -From: Vasily Averin -Subject: ipc: remove memcg accounting for sops objects in do_semtimedop() -Date: Sat, 11 Sep 2021 10:40:08 +0300 -Message-ID: <90e254df-0dfe-f080-011e-b7c53ee7fd20@virtuozzo.com> -Patch-mainline: Not yet, too soon -References: bsc#1190115 - -Linus proposes to revert an accounting for sops objects in -do_semtimedop() because it's really just a temporary buffer -for a single semtimedop() system call. - -This object can consume up to 2 pages, syscall is sleeping one, -size and duration can be controlled by user, and this allocation -can be repeated by many thread at the same time. - -However Shakeel Butt pointed that there are much more popular objects -with the same life time and similar memory consumption, the accounting -of which was decided to be rejected for performance reasons. - -In addition, any usual task consumes much more accounted memory, -so 2 pages of this temporal buffer can be safely ignored. - -Link: https://patchwork.kernel.org/project/linux-fsdevel/patch/20171005222144.123797-1-shakeelb@google.com/ - -Fixes: 18319498fdd4 ("memcg: enable accounting of ipc resources") -Signed-off-by: Vasily Averin -Acked-by: Michal Koutný ---- - ipc/sem.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/ipc/sem.c b/ipc/sem.c -index f833238df1ce..6693daf4fe11 100644 ---- a/ipc/sem.c -+++ b/ipc/sem.c -@@ -2238,7 +2238,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, - if (nsops > ns->sc_semopm) - return -E2BIG; - if (nsops > SEMOPM_FAST) { -- sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL_ACCOUNT); -+ sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); - if (sops == NULL) - return -ENOMEM; - } --- -2.25.1 - diff --git a/patches.suse/msft-hv-2332-net-mana-Add-a-driver-for-Microsoft-Azure-Network-Ad.patch b/patches.suse/msft-hv-2332-net-mana-Add-a-driver-for-Microsoft-Azure-Network-Ad.patch new file mode 100644 index 0000000..6684acd --- /dev/null +++ b/patches.suse/msft-hv-2332-net-mana-Add-a-driver-for-Microsoft-Azure-Network-Ad.patch @@ -0,0 +1,6293 @@ +From: Dexuan Cui +Date: Fri, 16 Apr 2021 13:11:59 -0700 +Patch-mainline: v5.13-rc1 +Subject: net: mana: Add a driver for Microsoft Azure Network Adapter (MANA) +Git-commit: ca9c54d2d6a5ab2430c4eda364c77125d62e5e0f +References: jsc#SLE-18779, bsc#1185727 + +Add a VF driver for Microsoft Azure Network Adapter (MANA) that will be +available in the future. + +Co-developed-by: Haiyang Zhang +Signed-off-by: Haiyang Zhang +Co-developed-by: Shachar Raindel +Signed-off-by: Shachar Raindel +Signed-off-by: Dexuan Cui +Reviewed-by: Stephen Hemminger +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + MAINTAINERS | 4 +- + drivers/net/ethernet/Kconfig | 1 + + drivers/net/ethernet/Makefile | 1 + + drivers/net/ethernet/microsoft/Kconfig | 29 + + drivers/net/ethernet/microsoft/Makefile | 5 + + drivers/net/ethernet/microsoft/mana/Makefile | 6 + + drivers/net/ethernet/microsoft/mana/gdma.h | 673 ++++++++ + drivers/net/ethernet/microsoft/mana/gdma_main.c | 1415 ++++++++++++++++ + drivers/net/ethernet/microsoft/mana/hw_channel.c | 843 ++++++++++ + drivers/net/ethernet/microsoft/mana/hw_channel.h | 190 +++ + drivers/net/ethernet/microsoft/mana/mana.h | 533 ++++++ + drivers/net/ethernet/microsoft/mana/mana_en.c | 1895 ++++++++++++++++++++++ + drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 250 +++ + drivers/net/ethernet/microsoft/mana/shm_channel.c | 291 ++++ + drivers/net/ethernet/microsoft/mana/shm_channel.h | 21 + + 15 files changed, 6156 insertions(+), 1 deletion(-) + create mode 100644 drivers/net/ethernet/microsoft/Kconfig + create mode 100644 drivers/net/ethernet/microsoft/Makefile + create mode 100644 drivers/net/ethernet/microsoft/mana/Makefile + create mode 100644 drivers/net/ethernet/microsoft/mana/gdma.h + create mode 100644 drivers/net/ethernet/microsoft/mana/gdma_main.c + create mode 100644 drivers/net/ethernet/microsoft/mana/hw_channel.c + create mode 100644 drivers/net/ethernet/microsoft/mana/hw_channel.h + create mode 100644 drivers/net/ethernet/microsoft/mana/mana.h + create mode 100644 drivers/net/ethernet/microsoft/mana/mana_en.c + create mode 100644 drivers/net/ethernet/microsoft/mana/mana_ethtool.c + create mode 100644 drivers/net/ethernet/microsoft/mana/shm_channel.c + create mode 100644 drivers/net/ethernet/microsoft/mana/shm_channel.h + +diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig +--- a/drivers/net/ethernet/Kconfig ++++ b/drivers/net/ethernet/Kconfig +@@ -82,6 +82,7 @@ source "drivers/net/ethernet/huawei/Kconfig" + source "drivers/net/ethernet/ibm/Kconfig" + source "drivers/net/ethernet/intel/Kconfig" + source "drivers/net/ethernet/i825xx/Kconfig" ++source "drivers/net/ethernet/microsoft/Kconfig" + source "drivers/net/ethernet/xscale/Kconfig" + + config JME +diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile +--- a/drivers/net/ethernet/Makefile ++++ b/drivers/net/ethernet/Makefile +@@ -45,6 +45,7 @@ obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/ + obj-$(CONFIG_NET_VENDOR_IBM) += ibm/ + obj-$(CONFIG_NET_VENDOR_INTEL) += intel/ + obj-$(CONFIG_NET_VENDOR_I825XX) += i825xx/ ++obj-$(CONFIG_NET_VENDOR_MICROSOFT) += microsoft/ + obj-$(CONFIG_NET_VENDOR_XSCALE) += xscale/ + obj-$(CONFIG_JME) += jme.o + obj-$(CONFIG_KORINA) += korina.o +diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig +new file mode 100644 +index 000000000000..e1ac0a5d808d +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/Kconfig +@@ -0,0 +1,29 @@ ++# ++# Microsoft Azure network device configuration ++# ++ ++config NET_VENDOR_MICROSOFT ++ bool "Microsoft Network Devices" ++ default y ++ help ++ If you have a network (Ethernet) device belonging to this class, say Y. ++ ++ Note that the answer to this question doesn't directly affect the ++ kernel: saying N will just cause the configurator to skip the ++ question about Microsoft network devices. If you say Y, you will be ++ asked for your specific device in the following question. ++ ++if NET_VENDOR_MICROSOFT ++ ++config MICROSOFT_MANA ++ tristate "Microsoft Azure Network Adapter (MANA) support" ++ depends on PCI_MSI && X86_64 ++ select PCI_HYPERV ++ help ++ This driver supports Microsoft Azure Network Adapter (MANA). ++ So far, the driver is only supported on X86_64. ++ ++ To compile this driver as a module, choose M here. ++ The module will be called mana. ++ ++endif #NET_VENDOR_MICROSOFT +diff --git a/drivers/net/ethernet/microsoft/Makefile b/drivers/net/ethernet/microsoft/Makefile +new file mode 100644 +index 000000000000..d2ddc218135f +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/Makefile +@@ -0,0 +1,5 @@ ++# ++# Makefile for the Microsoft Azure network device driver. ++# ++ ++obj-$(CONFIG_MICROSOFT_MANA) += mana/ +diff --git a/drivers/net/ethernet/microsoft/mana/Makefile b/drivers/net/ethernet/microsoft/mana/Makefile +new file mode 100644 +index 000000000000..0edd5bb685f3 +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/Makefile +@@ -0,0 +1,6 @@ ++# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause ++# ++# Makefile for the Microsoft Azure Network Adapter driver ++ ++obj-$(CONFIG_MICROSOFT_MANA) += mana.o ++mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o +diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h +new file mode 100644 +index 000000000000..33e53d32e891 +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/gdma.h +@@ -0,0 +1,673 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ ++/* Copyright (c) 2021, Microsoft Corporation. */ ++ ++#ifndef _GDMA_H ++#define _GDMA_H ++ ++#include ++#include ++ ++#include "shm_channel.h" ++ ++/* Structures labeled with "HW DATA" are exchanged with the hardware. All of ++ * them are naturally aligned and hence don't need __packed. ++ */ ++ ++enum gdma_request_type { ++ GDMA_VERIFY_VF_DRIVER_VERSION = 1, ++ GDMA_QUERY_MAX_RESOURCES = 2, ++ GDMA_LIST_DEVICES = 3, ++ GDMA_REGISTER_DEVICE = 4, ++ GDMA_DEREGISTER_DEVICE = 5, ++ GDMA_GENERATE_TEST_EQE = 10, ++ GDMA_CREATE_QUEUE = 12, ++ GDMA_DISABLE_QUEUE = 13, ++ GDMA_CREATE_DMA_REGION = 25, ++ GDMA_DMA_REGION_ADD_PAGES = 26, ++ GDMA_DESTROY_DMA_REGION = 27, ++}; ++ ++enum gdma_queue_type { ++ GDMA_INVALID_QUEUE, ++ GDMA_SQ, ++ GDMA_RQ, ++ GDMA_CQ, ++ GDMA_EQ, ++}; ++ ++enum gdma_work_request_flags { ++ GDMA_WR_NONE = 0, ++ GDMA_WR_OOB_IN_SGL = BIT(0), ++ GDMA_WR_PAD_BY_SGE0 = BIT(1), ++}; ++ ++enum gdma_eqe_type { ++ GDMA_EQE_COMPLETION = 3, ++ GDMA_EQE_TEST_EVENT = 64, ++ GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, ++ GDMA_EQE_HWC_INIT_DATA = 130, ++ GDMA_EQE_HWC_INIT_DONE = 131, ++}; ++ ++enum { ++ GDMA_DEVICE_NONE = 0, ++ GDMA_DEVICE_HWC = 1, ++ GDMA_DEVICE_MANA = 2, ++}; ++ ++struct gdma_resource { ++ /* Protect the bitmap */ ++ spinlock_t lock; ++ ++ /* The bitmap size in bits. */ ++ u32 size; ++ ++ /* The bitmap tracks the resources. */ ++ unsigned long *map; ++}; ++ ++union gdma_doorbell_entry { ++ u64 as_uint64; ++ ++ struct { ++ u64 id : 24; ++ u64 reserved : 8; ++ u64 tail_ptr : 31; ++ u64 arm : 1; ++ } cq; ++ ++ struct { ++ u64 id : 24; ++ u64 wqe_cnt : 8; ++ u64 tail_ptr : 32; ++ } rq; ++ ++ struct { ++ u64 id : 24; ++ u64 reserved : 8; ++ u64 tail_ptr : 32; ++ } sq; ++ ++ struct { ++ u64 id : 16; ++ u64 reserved : 16; ++ u64 tail_ptr : 31; ++ u64 arm : 1; ++ } eq; ++}; /* HW DATA */ ++ ++struct gdma_msg_hdr { ++ u32 hdr_type; ++ u32 msg_type; ++ u16 msg_version; ++ u16 hwc_msg_id; ++ u32 msg_size; ++}; /* HW DATA */ ++ ++struct gdma_dev_id { ++ union { ++ struct { ++ u16 type; ++ u16 instance; ++ }; ++ ++ u32 as_uint32; ++ }; ++}; /* HW DATA */ ++ ++struct gdma_req_hdr { ++ struct gdma_msg_hdr req; ++ struct gdma_msg_hdr resp; /* The expected response */ ++ struct gdma_dev_id dev_id; ++ u32 activity_id; ++}; /* HW DATA */ ++ ++struct gdma_resp_hdr { ++ struct gdma_msg_hdr response; ++ struct gdma_dev_id dev_id; ++ u32 activity_id; ++ u32 status; ++ u32 reserved; ++}; /* HW DATA */ ++ ++struct gdma_general_req { ++ struct gdma_req_hdr hdr; ++}; /* HW DATA */ ++ ++#define GDMA_MESSAGE_V1 1 ++ ++struct gdma_general_resp { ++ struct gdma_resp_hdr hdr; ++}; /* HW DATA */ ++ ++#define GDMA_STANDARD_HEADER_TYPE 0 ++ ++static inline void mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, u32 code, ++ u32 req_size, u32 resp_size) ++{ ++ hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE; ++ hdr->req.msg_type = code; ++ hdr->req.msg_version = GDMA_MESSAGE_V1; ++ hdr->req.msg_size = req_size; ++ ++ hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE; ++ hdr->resp.msg_type = code; ++ hdr->resp.msg_version = GDMA_MESSAGE_V1; ++ hdr->resp.msg_size = resp_size; ++} ++ ++/* The 16-byte struct is part of the GDMA work queue entry (WQE). */ ++struct gdma_sge { ++ u64 address; ++ u32 mem_key; ++ u32 size; ++}; /* HW DATA */ ++ ++struct gdma_wqe_request { ++ struct gdma_sge *sgl; ++ u32 num_sge; ++ ++ u32 inline_oob_size; ++ const void *inline_oob_data; ++ ++ u32 flags; ++ u32 client_data_unit; ++}; ++ ++enum gdma_page_type { ++ GDMA_PAGE_TYPE_4K, ++}; ++ ++#define GDMA_INVALID_DMA_REGION 0 ++ ++struct gdma_mem_info { ++ struct device *dev; ++ ++ dma_addr_t dma_handle; ++ void *virt_addr; ++ u64 length; ++ ++ /* Allocated by the PF driver */ ++ u64 gdma_region; ++}; ++ ++#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 ++ ++struct gdma_dev { ++ struct gdma_context *gdma_context; ++ ++ struct gdma_dev_id dev_id; ++ ++ u32 pdid; ++ u32 doorbell; ++ u32 gpa_mkey; ++ ++ /* GDMA driver specific pointer */ ++ void *driver_data; ++}; ++ ++#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE ++ ++#define GDMA_CQE_SIZE 64 ++#define GDMA_EQE_SIZE 16 ++#define GDMA_MAX_SQE_SIZE 512 ++#define GDMA_MAX_RQE_SIZE 256 ++ ++#define GDMA_COMP_DATA_SIZE 0x3C ++ ++#define GDMA_EVENT_DATA_SIZE 0xC ++ ++/* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */ ++#define GDMA_WQE_BU_SIZE 32 ++ ++#define INVALID_PDID UINT_MAX ++#define INVALID_DOORBELL UINT_MAX ++#define INVALID_MEM_KEY UINT_MAX ++#define INVALID_QUEUE_ID UINT_MAX ++#define INVALID_PCI_MSIX_INDEX UINT_MAX ++ ++struct gdma_comp { ++ u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; ++ u32 wq_num; ++ bool is_sq; ++}; ++ ++struct gdma_event { ++ u32 details[GDMA_EVENT_DATA_SIZE / 4]; ++ u8 type; ++}; ++ ++struct gdma_queue; ++ ++#define CQE_POLLING_BUFFER 512 ++struct mana_eq { ++ struct gdma_queue *eq; ++ struct gdma_comp cqe_poll[CQE_POLLING_BUFFER]; ++}; ++ ++typedef void gdma_eq_callback(void *context, struct gdma_queue *q, ++ struct gdma_event *e); ++ ++typedef void gdma_cq_callback(void *context, struct gdma_queue *q); ++ ++/* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE ++ * (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the ++ * driver increases the 'head' in BUs rather than in bytes, and notifies ++ * the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track ++ * the HW head, and increases the 'head' by 1 for every processed EQE/CQE. ++ * ++ * The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is ++ * processed, the driver increases the 'tail' to indicate that WQEs have ++ * been consumed by the HW, so the driver can post new WQEs into the SQ/RQ. ++ * ++ * The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures ++ * that the EQ/CQ is big enough so they can't overflow, and the driver uses ++ * the owner bits mechanism to detect if the queue has become empty. ++ */ ++struct gdma_queue { ++ struct gdma_dev *gdma_dev; ++ ++ enum gdma_queue_type type; ++ u32 id; ++ ++ struct gdma_mem_info mem_info; ++ ++ void *queue_mem_ptr; ++ u32 queue_size; ++ ++ bool monitor_avl_buf; ++ ++ u32 head; ++ u32 tail; ++ ++ /* Extra fields specific to EQ/CQ. */ ++ union { ++ struct { ++ bool disable_needed; ++ ++ gdma_eq_callback *callback; ++ void *context; ++ ++ unsigned int msix_index; ++ ++ u32 log2_throttle_limit; ++ ++ /* NAPI data */ ++ struct napi_struct napi; ++ int work_done; ++ int budget; ++ } eq; ++ ++ struct { ++ gdma_cq_callback *callback; ++ void *context; ++ ++ struct gdma_queue *parent; /* For CQ/EQ relationship */ ++ } cq; ++ }; ++}; ++ ++struct gdma_queue_spec { ++ enum gdma_queue_type type; ++ bool monitor_avl_buf; ++ unsigned int queue_size; ++ ++ /* Extra fields specific to EQ/CQ. */ ++ union { ++ struct { ++ gdma_eq_callback *callback; ++ void *context; ++ ++ unsigned long log2_throttle_limit; ++ ++ /* Only used by the MANA device. */ ++ struct net_device *ndev; ++ } eq; ++ ++ struct { ++ gdma_cq_callback *callback; ++ void *context; ++ ++ struct gdma_queue *parent_eq; ++ ++ } cq; ++ }; ++}; ++ ++struct gdma_irq_context { ++ void (*handler)(void *arg); ++ void *arg; ++}; ++ ++struct gdma_context { ++ struct device *dev; ++ ++ /* Per-vPort max number of queues */ ++ unsigned int max_num_queues; ++ unsigned int max_num_msix; ++ unsigned int num_msix_usable; ++ struct gdma_resource msix_resource; ++ struct gdma_irq_context *irq_contexts; ++ ++ /* This maps a CQ index to the queue structure. */ ++ unsigned int max_num_cqs; ++ struct gdma_queue **cq_table; ++ ++ /* Protect eq_test_event and test_event_eq_id */ ++ struct mutex eq_test_event_mutex; ++ struct completion eq_test_event; ++ u32 test_event_eq_id; ++ ++ void __iomem *bar0_va; ++ void __iomem *shm_base; ++ void __iomem *db_page_base; ++ u32 db_page_size; ++ ++ /* Shared memory chanenl (used to bootstrap HWC) */ ++ struct shm_channel shm_channel; ++ ++ /* Hardware communication channel (HWC) */ ++ struct gdma_dev hwc; ++ ++ /* Azure network adapter */ ++ struct gdma_dev mana; ++}; ++ ++#define MAX_NUM_GDMA_DEVICES 4 ++ ++static inline bool mana_gd_is_mana(struct gdma_dev *gd) ++{ ++ return gd->dev_id.type == GDMA_DEVICE_MANA; ++} ++ ++static inline bool mana_gd_is_hwc(struct gdma_dev *gd) ++{ ++ return gd->dev_id.type == GDMA_DEVICE_HWC; ++} ++ ++u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset); ++u32 mana_gd_wq_avail_space(struct gdma_queue *wq); ++ ++int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq); ++ ++int mana_gd_create_hwc_queue(struct gdma_dev *gd, ++ const struct gdma_queue_spec *spec, ++ struct gdma_queue **queue_ptr); ++ ++int mana_gd_create_mana_eq(struct gdma_dev *gd, ++ const struct gdma_queue_spec *spec, ++ struct gdma_queue **queue_ptr); ++ ++int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, ++ const struct gdma_queue_spec *spec, ++ struct gdma_queue **queue_ptr); ++ ++void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue); ++ ++int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe); ++ ++void mana_gd_arm_cq(struct gdma_queue *cq); ++ ++struct gdma_wqe { ++ u32 reserved :24; ++ u32 last_vbytes :8; ++ ++ union { ++ u32 flags; ++ ++ struct { ++ u32 num_sge :8; ++ u32 inline_oob_size_div4:3; ++ u32 client_oob_in_sgl :1; ++ u32 reserved1 :4; ++ u32 client_data_unit :14; ++ u32 reserved2 :2; ++ }; ++ }; ++}; /* HW DATA */ ++ ++#define INLINE_OOB_SMALL_SIZE 8 ++#define INLINE_OOB_LARGE_SIZE 24 ++ ++#define MAX_TX_WQE_SIZE 512 ++#define MAX_RX_WQE_SIZE 256 ++ ++struct gdma_cqe { ++ u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; ++ ++ union { ++ u32 as_uint32; ++ ++ struct { ++ u32 wq_num : 24; ++ u32 is_sq : 1; ++ u32 reserved : 4; ++ u32 owner_bits : 3; ++ }; ++ } cqe_info; ++}; /* HW DATA */ ++ ++#define GDMA_CQE_OWNER_BITS 3 ++ ++#define GDMA_CQE_OWNER_MASK ((1 << GDMA_CQE_OWNER_BITS) - 1) ++ ++#define SET_ARM_BIT 1 ++ ++#define GDMA_EQE_OWNER_BITS 3 ++ ++union gdma_eqe_info { ++ u32 as_uint32; ++ ++ struct { ++ u32 type : 8; ++ u32 reserved1 : 8; ++ u32 client_id : 2; ++ u32 reserved2 : 11; ++ u32 owner_bits : 3; ++ }; ++}; /* HW DATA */ ++ ++#define GDMA_EQE_OWNER_MASK ((1 << GDMA_EQE_OWNER_BITS) - 1) ++#define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries)) ++ ++struct gdma_eqe { ++ u32 details[GDMA_EVENT_DATA_SIZE / 4]; ++ u32 eqe_info; ++}; /* HW DATA */ ++ ++#define GDMA_REG_DB_PAGE_OFFSET 8 ++#define GDMA_REG_DB_PAGE_SIZE 0x10 ++#define GDMA_REG_SHM_OFFSET 0x18 ++ ++struct gdma_posted_wqe_info { ++ u32 wqe_size_in_bu; ++}; ++ ++/* GDMA_GENERATE_TEST_EQE */ ++struct gdma_generate_test_event_req { ++ struct gdma_req_hdr hdr; ++ u32 queue_index; ++}; /* HW DATA */ ++ ++/* GDMA_VERIFY_VF_DRIVER_VERSION */ ++enum { ++ GDMA_PROTOCOL_V1 = 1, ++ GDMA_PROTOCOL_FIRST = GDMA_PROTOCOL_V1, ++ GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1, ++}; ++ ++struct gdma_verify_ver_req { ++ struct gdma_req_hdr hdr; ++ ++ /* Mandatory fields required for protocol establishment */ ++ u64 protocol_ver_min; ++ u64 protocol_ver_max; ++ u64 drv_cap_flags1; ++ u64 drv_cap_flags2; ++ u64 drv_cap_flags3; ++ u64 drv_cap_flags4; ++ ++ /* Advisory fields */ ++ u64 drv_ver; ++ u32 os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */ ++ u32 reserved; ++ u32 os_ver_major; ++ u32 os_ver_minor; ++ u32 os_ver_build; ++ u32 os_ver_platform; ++ u64 reserved_2; ++ u8 os_ver_str1[128]; ++ u8 os_ver_str2[128]; ++ u8 os_ver_str3[128]; ++ u8 os_ver_str4[128]; ++}; /* HW DATA */ ++ ++struct gdma_verify_ver_resp { ++ struct gdma_resp_hdr hdr; ++ u64 gdma_protocol_ver; ++ u64 pf_cap_flags1; ++ u64 pf_cap_flags2; ++ u64 pf_cap_flags3; ++ u64 pf_cap_flags4; ++}; /* HW DATA */ ++ ++/* GDMA_QUERY_MAX_RESOURCES */ ++struct gdma_query_max_resources_resp { ++ struct gdma_resp_hdr hdr; ++ u32 status; ++ u32 max_sq; ++ u32 max_rq; ++ u32 max_cq; ++ u32 max_eq; ++ u32 max_db; ++ u32 max_mst; ++ u32 max_cq_mod_ctx; ++ u32 max_mod_cq; ++ u32 max_msix; ++}; /* HW DATA */ ++ ++/* GDMA_LIST_DEVICES */ ++struct gdma_list_devices_resp { ++ struct gdma_resp_hdr hdr; ++ u32 num_of_devs; ++ u32 reserved; ++ struct gdma_dev_id devs[64]; ++}; /* HW DATA */ ++ ++/* GDMA_REGISTER_DEVICE */ ++struct gdma_register_device_resp { ++ struct gdma_resp_hdr hdr; ++ u32 pdid; ++ u32 gpa_mkey; ++ u32 db_id; ++}; /* HW DATA */ ++ ++/* GDMA_CREATE_QUEUE */ ++struct gdma_create_queue_req { ++ struct gdma_req_hdr hdr; ++ u32 type; ++ u32 reserved1; ++ u32 pdid; ++ u32 doolbell_id; ++ u64 gdma_region; ++ u32 reserved2; ++ u32 queue_size; ++ u32 log2_throttle_limit; ++ u32 eq_pci_msix_index; ++ u32 cq_mod_ctx_id; ++ u32 cq_parent_eq_id; ++ u8 rq_drop_on_overrun; ++ u8 rq_err_on_wqe_overflow; ++ u8 rq_chain_rec_wqes; ++ u8 sq_hw_db; ++ u32 reserved3; ++}; /* HW DATA */ ++ ++struct gdma_create_queue_resp { ++ struct gdma_resp_hdr hdr; ++ u32 queue_index; ++}; /* HW DATA */ ++ ++/* GDMA_DISABLE_QUEUE */ ++struct gdma_disable_queue_req { ++ struct gdma_req_hdr hdr; ++ u32 type; ++ u32 queue_index; ++ u32 alloc_res_id_on_creation; ++}; /* HW DATA */ ++ ++/* GDMA_CREATE_DMA_REGION */ ++struct gdma_create_dma_region_req { ++ struct gdma_req_hdr hdr; ++ ++ /* The total size of the DMA region */ ++ u64 length; ++ ++ /* The offset in the first page */ ++ u32 offset_in_page; ++ ++ /* enum gdma_page_type */ ++ u32 gdma_page_type; ++ ++ /* The total number of pages */ ++ u32 page_count; ++ ++ /* If page_addr_list_len is smaller than page_count, ++ * the remaining page addresses will be added via the ++ * message GDMA_DMA_REGION_ADD_PAGES. ++ */ ++ u32 page_addr_list_len; ++ u64 page_addr_list[]; ++}; /* HW DATA */ ++ ++struct gdma_create_dma_region_resp { ++ struct gdma_resp_hdr hdr; ++ u64 gdma_region; ++}; /* HW DATA */ ++ ++/* GDMA_DMA_REGION_ADD_PAGES */ ++struct gdma_dma_region_add_pages_req { ++ struct gdma_req_hdr hdr; ++ ++ u64 gdma_region; ++ ++ u32 page_addr_list_len; ++ u32 reserved3; ++ ++ u64 page_addr_list[]; ++}; /* HW DATA */ ++ ++/* GDMA_DESTROY_DMA_REGION */ ++struct gdma_destroy_dma_region_req { ++ struct gdma_req_hdr hdr; ++ ++ u64 gdma_region; ++}; /* HW DATA */ ++ ++int mana_gd_verify_vf_version(struct pci_dev *pdev); ++ ++int mana_gd_register_device(struct gdma_dev *gd); ++int mana_gd_deregister_device(struct gdma_dev *gd); ++ ++int mana_gd_post_work_request(struct gdma_queue *wq, ++ const struct gdma_wqe_request *wqe_req, ++ struct gdma_posted_wqe_info *wqe_info); ++ ++int mana_gd_post_and_ring(struct gdma_queue *queue, ++ const struct gdma_wqe_request *wqe, ++ struct gdma_posted_wqe_info *wqe_info); ++ ++int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r); ++void mana_gd_free_res_map(struct gdma_resource *r); ++ ++void mana_gd_wq_ring_doorbell(struct gdma_context *gc, ++ struct gdma_queue *queue); ++ ++int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, ++ struct gdma_mem_info *gmi); ++ ++void mana_gd_free_memory(struct gdma_mem_info *gmi); ++ ++int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, ++ u32 resp_len, void *resp); ++#endif /* _GDMA_H */ +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +new file mode 100644 +index 000000000000..2f87bf90f8ec +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -0,0 +1,1415 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause ++/* Copyright (c) 2021, Microsoft Corporation. */ ++ ++#include ++#include ++ ++#include "mana.h" ++ ++static u32 mana_gd_r32(struct gdma_context *g, u64 offset) ++{ ++ return readl(g->bar0_va + offset); ++} ++ ++static u64 mana_gd_r64(struct gdma_context *g, u64 offset) ++{ ++ return readq(g->bar0_va + offset); ++} ++ ++static void mana_gd_init_registers(struct pci_dev *pdev) ++{ ++ struct gdma_context *gc = pci_get_drvdata(pdev); ++ ++ gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; ++ ++ gc->db_page_base = gc->bar0_va + ++ mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); ++ ++ gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); ++} ++ ++static int mana_gd_query_max_resources(struct pci_dev *pdev) ++{ ++ struct gdma_context *gc = pci_get_drvdata(pdev); ++ struct gdma_query_max_resources_resp resp = {}; ++ struct gdma_general_req req = {}; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES, ++ sizeof(req), sizeof(resp)); ++ ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err || resp.hdr.status) { ++ dev_err(gc->dev, "Failed to query resource info: %d, 0x%x\n", ++ err, resp.hdr.status); ++ return err ? err : -EPROTO; ++ } ++ ++ if (gc->num_msix_usable > resp.max_msix) ++ gc->num_msix_usable = resp.max_msix; ++ ++ if (gc->num_msix_usable <= 1) ++ return -ENOSPC; ++ ++ gc->max_num_queues = num_online_cpus(); ++ if (gc->max_num_queues > MANA_MAX_NUM_QUEUES) ++ gc->max_num_queues = MANA_MAX_NUM_QUEUES; ++ ++ if (gc->max_num_queues > resp.max_eq) ++ gc->max_num_queues = resp.max_eq; ++ ++ if (gc->max_num_queues > resp.max_cq) ++ gc->max_num_queues = resp.max_cq; ++ ++ if (gc->max_num_queues > resp.max_sq) ++ gc->max_num_queues = resp.max_sq; ++ ++ if (gc->max_num_queues > resp.max_rq) ++ gc->max_num_queues = resp.max_rq; ++ ++ return 0; ++} ++ ++static int mana_gd_detect_devices(struct pci_dev *pdev) ++{ ++ struct gdma_context *gc = pci_get_drvdata(pdev); ++ struct gdma_list_devices_resp resp = {}; ++ struct gdma_general_req req = {}; ++ struct gdma_dev_id dev; ++ u32 i, max_num_devs; ++ u16 dev_type; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), ++ sizeof(resp)); ++ ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err || resp.hdr.status) { ++ dev_err(gc->dev, "Failed to detect devices: %d, 0x%x\n", err, ++ resp.hdr.status); ++ return err ? err : -EPROTO; ++ } ++ ++ max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); ++ ++ for (i = 0; i < max_num_devs; i++) { ++ dev = resp.devs[i]; ++ dev_type = dev.type; ++ ++ /* HWC is already detected in mana_hwc_create_channel(). */ ++ if (dev_type == GDMA_DEVICE_HWC) ++ continue; ++ ++ if (dev_type == GDMA_DEVICE_MANA) { ++ gc->mana.gdma_context = gc; ++ gc->mana.dev_id = dev; ++ } ++ } ++ ++ return gc->mana.dev_id.type == 0 ? -ENODEV : 0; ++} ++ ++int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, ++ u32 resp_len, void *resp) ++{ ++ struct hw_channel_context *hwc = gc->hwc.driver_data; ++ ++ return mana_hwc_send_request(hwc, req_len, req, resp_len, resp); ++} ++ ++int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, ++ struct gdma_mem_info *gmi) ++{ ++ dma_addr_t dma_handle; ++ void *buf; ++ ++ if (length < PAGE_SIZE || !is_power_of_2(length)) ++ return -EINVAL; ++ ++ gmi->dev = gc->dev; ++ buf = dma_alloc_coherent(gmi->dev, length, &dma_handle, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ ++ gmi->dma_handle = dma_handle; ++ gmi->virt_addr = buf; ++ gmi->length = length; ++ ++ return 0; ++} ++ ++void mana_gd_free_memory(struct gdma_mem_info *gmi) ++{ ++ dma_free_coherent(gmi->dev, gmi->length, gmi->virt_addr, ++ gmi->dma_handle); ++} ++ ++static int mana_gd_create_hw_eq(struct gdma_context *gc, ++ struct gdma_queue *queue) ++{ ++ struct gdma_create_queue_resp resp = {}; ++ struct gdma_create_queue_req req = {}; ++ int err; ++ ++ if (queue->type != GDMA_EQ) ++ return -EINVAL; ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_QUEUE, ++ sizeof(req), sizeof(resp)); ++ ++ req.hdr.dev_id = queue->gdma_dev->dev_id; ++ req.type = queue->type; ++ req.pdid = queue->gdma_dev->pdid; ++ req.doolbell_id = queue->gdma_dev->doorbell; ++ req.gdma_region = queue->mem_info.gdma_region; ++ req.queue_size = queue->queue_size; ++ req.log2_throttle_limit = queue->eq.log2_throttle_limit; ++ req.eq_pci_msix_index = queue->eq.msix_index; ++ ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err || resp.hdr.status) { ++ dev_err(gc->dev, "Failed to create queue: %d, 0x%x\n", err, ++ resp.hdr.status); ++ return err ? err : -EPROTO; ++ } ++ ++ queue->id = resp.queue_index; ++ queue->eq.disable_needed = true; ++ queue->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; ++ return 0; ++} ++ ++static int mana_gd_disable_queue(struct gdma_queue *queue) ++{ ++ struct gdma_context *gc = queue->gdma_dev->gdma_context; ++ struct gdma_disable_queue_req req = {}; ++ struct gdma_general_resp resp = {}; ++ int err; ++ ++ WARN_ON(queue->type != GDMA_EQ); ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_DISABLE_QUEUE, ++ sizeof(req), sizeof(resp)); ++ ++ req.hdr.dev_id = queue->gdma_dev->dev_id; ++ req.type = queue->type; ++ req.queue_index = queue->id; ++ req.alloc_res_id_on_creation = 1; ++ ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err || resp.hdr.status) { ++ dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, ++ resp.hdr.status); ++ return err ? err : -EPROTO; ++ } ++ ++ return 0; ++} ++ ++#define DOORBELL_OFFSET_SQ 0x0 ++#define DOORBELL_OFFSET_RQ 0x400 ++#define DOORBELL_OFFSET_CQ 0x800 ++#define DOORBELL_OFFSET_EQ 0xFF8 ++ ++static void mana_gd_ring_doorbell(struct gdma_context *gc, u32 db_index, ++ enum gdma_queue_type q_type, u32 qid, ++ u32 tail_ptr, u8 num_req) ++{ ++ void __iomem *addr = gc->db_page_base + gc->db_page_size * db_index; ++ union gdma_doorbell_entry e = {}; ++ ++ switch (q_type) { ++ case GDMA_EQ: ++ e.eq.id = qid; ++ e.eq.tail_ptr = tail_ptr; ++ e.eq.arm = num_req; ++ ++ addr += DOORBELL_OFFSET_EQ; ++ break; ++ ++ case GDMA_CQ: ++ e.cq.id = qid; ++ e.cq.tail_ptr = tail_ptr; ++ e.cq.arm = num_req; ++ ++ addr += DOORBELL_OFFSET_CQ; ++ break; ++ ++ case GDMA_RQ: ++ e.rq.id = qid; ++ e.rq.tail_ptr = tail_ptr; ++ e.rq.wqe_cnt = num_req; ++ ++ addr += DOORBELL_OFFSET_RQ; ++ break; ++ ++ case GDMA_SQ: ++ e.sq.id = qid; ++ e.sq.tail_ptr = tail_ptr; ++ ++ addr += DOORBELL_OFFSET_SQ; ++ break; ++ ++ default: ++ WARN_ON(1); ++ return; ++ } ++ ++ /* Ensure all writes are done before ring doorbell */ ++ wmb(); ++ ++ writeq(e.as_uint64, addr); ++} ++ ++void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue) ++{ ++ mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type, ++ queue->id, queue->head * GDMA_WQE_BU_SIZE, 1); ++} ++ ++void mana_gd_arm_cq(struct gdma_queue *cq) ++{ ++ struct gdma_context *gc = cq->gdma_dev->gdma_context; ++ ++ u32 num_cqe = cq->queue_size / GDMA_CQE_SIZE; ++ ++ u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS); ++ ++ mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id, ++ head, SET_ARM_BIT); ++} ++ ++static void mana_gd_process_eqe(struct gdma_queue *eq) ++{ ++ u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); ++ struct gdma_context *gc = eq->gdma_dev->gdma_context; ++ struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; ++ union gdma_eqe_info eqe_info; ++ enum gdma_eqe_type type; ++ struct gdma_event event; ++ struct gdma_queue *cq; ++ struct gdma_eqe *eqe; ++ u32 cq_id; ++ ++ eqe = &eq_eqe_ptr[head]; ++ eqe_info.as_uint32 = eqe->eqe_info; ++ type = eqe_info.type; ++ ++ switch (type) { ++ case GDMA_EQE_COMPLETION: ++ cq_id = eqe->details[0] & 0xFFFFFF; ++ if (WARN_ON_ONCE(cq_id >= gc->max_num_cqs)) ++ break; ++ ++ cq = gc->cq_table[cq_id]; ++ if (WARN_ON_ONCE(!cq || cq->type != GDMA_CQ || cq->id != cq_id)) ++ break; ++ ++ if (cq->cq.callback) ++ cq->cq.callback(cq->cq.context, cq); ++ ++ break; ++ ++ case GDMA_EQE_TEST_EVENT: ++ gc->test_event_eq_id = eq->id; ++ complete(&gc->eq_test_event); ++ break; ++ ++ case GDMA_EQE_HWC_INIT_EQ_ID_DB: ++ case GDMA_EQE_HWC_INIT_DATA: ++ case GDMA_EQE_HWC_INIT_DONE: ++ if (!eq->eq.callback) ++ break; ++ ++ event.type = type; ++ memcpy(&event.details, &eqe->details, GDMA_EVENT_DATA_SIZE); ++ eq->eq.callback(eq->eq.context, eq, &event); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++static void mana_gd_process_eq_events(void *arg) ++{ ++ u32 owner_bits, new_bits, old_bits; ++ union gdma_eqe_info eqe_info; ++ struct gdma_eqe *eq_eqe_ptr; ++ struct gdma_queue *eq = arg; ++ struct gdma_context *gc; ++ struct gdma_eqe *eqe; ++ unsigned int arm_bit; ++ u32 head, num_eqe; ++ int i; ++ ++ gc = eq->gdma_dev->gdma_context; ++ ++ num_eqe = eq->queue_size / GDMA_EQE_SIZE; ++ eq_eqe_ptr = eq->queue_mem_ptr; ++ ++ /* Process up to 5 EQEs at a time, and update the HW head. */ ++ for (i = 0; i < 5; i++) { ++ eqe = &eq_eqe_ptr[eq->head % num_eqe]; ++ eqe_info.as_uint32 = eqe->eqe_info; ++ owner_bits = eqe_info.owner_bits; ++ ++ old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK; ++ /* No more entries */ ++ if (owner_bits == old_bits) ++ break; ++ ++ new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK; ++ if (owner_bits != new_bits) { ++ dev_err(gc->dev, "EQ %d: overflow detected\n", eq->id); ++ break; ++ } ++ ++ mana_gd_process_eqe(eq); ++ ++ eq->head++; ++ } ++ ++ /* Always rearm the EQ for HWC. For MANA, rearm it when NAPI is done. */ ++ if (mana_gd_is_hwc(eq->gdma_dev)) { ++ arm_bit = SET_ARM_BIT; ++ } else if (eq->eq.work_done < eq->eq.budget && ++ napi_complete_done(&eq->eq.napi, eq->eq.work_done)) { ++ arm_bit = SET_ARM_BIT; ++ } else { ++ arm_bit = 0; ++ } ++ ++ head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS); ++ ++ mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id, ++ head, arm_bit); ++} ++ ++static int mana_poll(struct napi_struct *napi, int budget) ++{ ++ struct gdma_queue *eq = container_of(napi, struct gdma_queue, eq.napi); ++ ++ eq->eq.work_done = 0; ++ eq->eq.budget = budget; ++ ++ mana_gd_process_eq_events(eq); ++ ++ return min(eq->eq.work_done, budget); ++} ++ ++static void mana_gd_schedule_napi(void *arg) ++{ ++ struct gdma_queue *eq = arg; ++ struct napi_struct *napi; ++ ++ napi = &eq->eq.napi; ++ napi_schedule_irqoff(napi); ++} ++ ++static int mana_gd_register_irq(struct gdma_queue *queue, ++ const struct gdma_queue_spec *spec) ++{ ++ struct gdma_dev *gd = queue->gdma_dev; ++ bool is_mana = mana_gd_is_mana(gd); ++ struct gdma_irq_context *gic; ++ struct gdma_context *gc; ++ struct gdma_resource *r; ++ unsigned int msi_index; ++ unsigned long flags; ++ int err; ++ ++ gc = gd->gdma_context; ++ r = &gc->msix_resource; ++ ++ spin_lock_irqsave(&r->lock, flags); ++ ++ msi_index = find_first_zero_bit(r->map, r->size); ++ if (msi_index >= r->size) { ++ err = -ENOSPC; ++ } else { ++ bitmap_set(r->map, msi_index, 1); ++ queue->eq.msix_index = msi_index; ++ err = 0; ++ } ++ ++ spin_unlock_irqrestore(&r->lock, flags); ++ ++ if (err) ++ return err; ++ ++ WARN_ON(msi_index >= gc->num_msix_usable); ++ ++ gic = &gc->irq_contexts[msi_index]; ++ ++ if (is_mana) { ++ netif_napi_add(spec->eq.ndev, &queue->eq.napi, mana_poll, ++ NAPI_POLL_WEIGHT); ++ napi_enable(&queue->eq.napi); ++ } ++ ++ WARN_ON(gic->handler || gic->arg); ++ ++ gic->arg = queue; ++ ++ if (is_mana) ++ gic->handler = mana_gd_schedule_napi; ++ else ++ gic->handler = mana_gd_process_eq_events; ++ ++ return 0; ++} ++ ++static void mana_gd_deregiser_irq(struct gdma_queue *queue) ++{ ++ struct gdma_dev *gd = queue->gdma_dev; ++ struct gdma_irq_context *gic; ++ struct gdma_context *gc; ++ struct gdma_resource *r; ++ unsigned int msix_index; ++ unsigned long flags; ++ ++ gc = gd->gdma_context; ++ r = &gc->msix_resource; ++ ++ /* At most num_online_cpus() + 1 interrupts are used. */ ++ msix_index = queue->eq.msix_index; ++ if (WARN_ON(msix_index >= gc->num_msix_usable)) ++ return; ++ ++ gic = &gc->irq_contexts[msix_index]; ++ gic->handler = NULL; ++ gic->arg = NULL; ++ ++ spin_lock_irqsave(&r->lock, flags); ++ bitmap_clear(r->map, msix_index, 1); ++ spin_unlock_irqrestore(&r->lock, flags); ++ ++ queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; ++} ++ ++int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) ++{ ++ struct gdma_generate_test_event_req req = {}; ++ struct gdma_general_resp resp = {}; ++ struct device *dev = gc->dev; ++ int err; ++ ++ mutex_lock(&gc->eq_test_event_mutex); ++ ++ init_completion(&gc->eq_test_event); ++ gc->test_event_eq_id = INVALID_QUEUE_ID; ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_GENERATE_TEST_EQE, ++ sizeof(req), sizeof(resp)); ++ ++ req.hdr.dev_id = eq->gdma_dev->dev_id; ++ req.queue_index = eq->id; ++ ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err) { ++ dev_err(dev, "test_eq failed: %d\n", err); ++ goto out; ++ } ++ ++ err = -EPROTO; ++ ++ if (resp.hdr.status) { ++ dev_err(dev, "test_eq failed: 0x%x\n", resp.hdr.status); ++ goto out; ++ } ++ ++ if (!wait_for_completion_timeout(&gc->eq_test_event, 30 * HZ)) { ++ dev_err(dev, "test_eq timed out on queue %d\n", eq->id); ++ goto out; ++ } ++ ++ if (eq->id != gc->test_event_eq_id) { ++ dev_err(dev, "test_eq got an event on wrong queue %d (%d)\n", ++ gc->test_event_eq_id, eq->id); ++ goto out; ++ } ++ ++ err = 0; ++out: ++ mutex_unlock(&gc->eq_test_event_mutex); ++ return err; ++} ++ ++static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, ++ struct gdma_queue *queue) ++{ ++ int err; ++ ++ if (flush_evenets) { ++ err = mana_gd_test_eq(gc, queue); ++ if (err) ++ dev_warn(gc->dev, "Failed to flush EQ: %d\n", err); ++ } ++ ++ mana_gd_deregiser_irq(queue); ++ ++ if (mana_gd_is_mana(queue->gdma_dev)) { ++ napi_disable(&queue->eq.napi); ++ netif_napi_del(&queue->eq.napi); ++ } ++ ++ if (queue->eq.disable_needed) ++ mana_gd_disable_queue(queue); ++} ++ ++static int mana_gd_create_eq(struct gdma_dev *gd, ++ const struct gdma_queue_spec *spec, ++ bool create_hwq, struct gdma_queue *queue) ++{ ++ struct gdma_context *gc = gd->gdma_context; ++ struct device *dev = gc->dev; ++ u32 log2_num_entries; ++ int err; ++ ++ queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; ++ ++ log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE); ++ ++ if (spec->eq.log2_throttle_limit > log2_num_entries) { ++ dev_err(dev, "EQ throttling limit (%lu) > maximum EQE (%u)\n", ++ spec->eq.log2_throttle_limit, log2_num_entries); ++ return -EINVAL; ++ } ++ ++ err = mana_gd_register_irq(queue, spec); ++ if (err) { ++ dev_err(dev, "Failed to register irq: %d\n", err); ++ return err; ++ } ++ ++ queue->eq.callback = spec->eq.callback; ++ queue->eq.context = spec->eq.context; ++ queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); ++ queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1; ++ ++ if (create_hwq) { ++ err = mana_gd_create_hw_eq(gc, queue); ++ if (err) ++ goto out; ++ ++ err = mana_gd_test_eq(gc, queue); ++ if (err) ++ goto out; ++ } ++ ++ return 0; ++out: ++ dev_err(dev, "Failed to create EQ: %d\n", err); ++ mana_gd_destroy_eq(gc, false, queue); ++ return err; ++} ++ ++static void mana_gd_create_cq(const struct gdma_queue_spec *spec, ++ struct gdma_queue *queue) ++{ ++ u32 log2_num_entries = ilog2(spec->queue_size / GDMA_CQE_SIZE); ++ ++ queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); ++ queue->cq.parent = spec->cq.parent_eq; ++ queue->cq.context = spec->cq.context; ++ queue->cq.callback = spec->cq.callback; ++} ++ ++static void mana_gd_destroy_cq(struct gdma_context *gc, ++ struct gdma_queue *queue) ++{ ++ u32 id = queue->id; ++ ++ if (id >= gc->max_num_cqs) ++ return; ++ ++ if (!gc->cq_table[id]) ++ return; ++ ++ gc->cq_table[id] = NULL; ++} ++ ++int mana_gd_create_hwc_queue(struct gdma_dev *gd, ++ const struct gdma_queue_spec *spec, ++ struct gdma_queue **queue_ptr) ++{ ++ struct gdma_context *gc = gd->gdma_context; ++ struct gdma_mem_info *gmi; ++ struct gdma_queue *queue; ++ int err; ++ ++ queue = kzalloc(sizeof(*queue), GFP_KERNEL); ++ if (!queue) ++ return -ENOMEM; ++ ++ gmi = &queue->mem_info; ++ err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); ++ if (err) ++ goto free_q; ++ ++ queue->head = 0; ++ queue->tail = 0; ++ queue->queue_mem_ptr = gmi->virt_addr; ++ queue->queue_size = spec->queue_size; ++ queue->monitor_avl_buf = spec->monitor_avl_buf; ++ queue->type = spec->type; ++ queue->gdma_dev = gd; ++ ++ if (spec->type == GDMA_EQ) ++ err = mana_gd_create_eq(gd, spec, false, queue); ++ else if (spec->type == GDMA_CQ) ++ mana_gd_create_cq(spec, queue); ++ ++ if (err) ++ goto out; ++ ++ *queue_ptr = queue; ++ return 0; ++out: ++ mana_gd_free_memory(gmi); ++free_q: ++ kfree(queue); ++ return err; ++} ++ ++static void mana_gd_destroy_dma_region(struct gdma_context *gc, u64 gdma_region) ++{ ++ struct gdma_destroy_dma_region_req req = {}; ++ struct gdma_general_resp resp = {}; ++ int err; ++ ++ if (gdma_region == GDMA_INVALID_DMA_REGION) ++ return; ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DMA_REGION, sizeof(req), ++ sizeof(resp)); ++ req.gdma_region = gdma_region; ++ ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err || resp.hdr.status) ++ dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", ++ err, resp.hdr.status); ++} ++ ++static int mana_gd_create_dma_region(struct gdma_dev *gd, ++ struct gdma_mem_info *gmi) ++{ ++ unsigned int num_page = gmi->length / PAGE_SIZE; ++ struct gdma_create_dma_region_req *req = NULL; ++ struct gdma_create_dma_region_resp resp = {}; ++ struct gdma_context *gc = gd->gdma_context; ++ struct hw_channel_context *hwc; ++ u32 length = gmi->length; ++ u32 req_msg_size; ++ int err; ++ int i; ++ ++ if (length < PAGE_SIZE || !is_power_of_2(length)) ++ return -EINVAL; ++ ++ if (offset_in_page(gmi->virt_addr) != 0) ++ return -EINVAL; ++ ++ hwc = gc->hwc.driver_data; ++ req_msg_size = sizeof(*req) + num_page * sizeof(u64); ++ if (req_msg_size > hwc->max_req_msg_size) ++ return -EINVAL; ++ ++ req = kzalloc(req_msg_size, GFP_KERNEL); ++ if (!req) ++ return -ENOMEM; ++ ++ mana_gd_init_req_hdr(&req->hdr, GDMA_CREATE_DMA_REGION, ++ req_msg_size, sizeof(resp)); ++ req->length = length; ++ req->offset_in_page = 0; ++ req->gdma_page_type = GDMA_PAGE_TYPE_4K; ++ req->page_count = num_page; ++ req->page_addr_list_len = num_page; ++ ++ for (i = 0; i < num_page; i++) ++ req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; ++ ++ err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); ++ if (err) ++ goto out; ++ ++ if (resp.hdr.status || resp.gdma_region == GDMA_INVALID_DMA_REGION) { ++ dev_err(gc->dev, "Failed to create DMA region: 0x%x\n", ++ resp.hdr.status); ++ err = -EPROTO; ++ goto out; ++ } ++ ++ gmi->gdma_region = resp.gdma_region; ++out: ++ kfree(req); ++ return err; ++} ++ ++int mana_gd_create_mana_eq(struct gdma_dev *gd, ++ const struct gdma_queue_spec *spec, ++ struct gdma_queue **queue_ptr) ++{ ++ struct gdma_context *gc = gd->gdma_context; ++ struct gdma_mem_info *gmi; ++ struct gdma_queue *queue; ++ int err; ++ ++ if (spec->type != GDMA_EQ) ++ return -EINVAL; ++ ++ queue = kzalloc(sizeof(*queue), GFP_KERNEL); ++ if (!queue) ++ return -ENOMEM; ++ ++ gmi = &queue->mem_info; ++ err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); ++ if (err) ++ goto free_q; ++ ++ err = mana_gd_create_dma_region(gd, gmi); ++ if (err) ++ goto out; ++ ++ queue->head = 0; ++ queue->tail = 0; ++ queue->queue_mem_ptr = gmi->virt_addr; ++ queue->queue_size = spec->queue_size; ++ queue->monitor_avl_buf = spec->monitor_avl_buf; ++ queue->type = spec->type; ++ queue->gdma_dev = gd; ++ ++ err = mana_gd_create_eq(gd, spec, true, queue); ++ if (err) ++ goto out; ++ ++ *queue_ptr = queue; ++ return 0; ++out: ++ mana_gd_free_memory(gmi); ++free_q: ++ kfree(queue); ++ return err; ++} ++ ++int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, ++ const struct gdma_queue_spec *spec, ++ struct gdma_queue **queue_ptr) ++{ ++ struct gdma_context *gc = gd->gdma_context; ++ struct gdma_mem_info *gmi; ++ struct gdma_queue *queue; ++ int err; ++ ++ if (spec->type != GDMA_CQ && spec->type != GDMA_SQ && ++ spec->type != GDMA_RQ) ++ return -EINVAL; ++ ++ queue = kzalloc(sizeof(*queue), GFP_KERNEL); ++ if (!queue) ++ return -ENOMEM; ++ ++ gmi = &queue->mem_info; ++ err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); ++ if (err) ++ goto free_q; ++ ++ err = mana_gd_create_dma_region(gd, gmi); ++ if (err) ++ goto out; ++ ++ queue->head = 0; ++ queue->tail = 0; ++ queue->queue_mem_ptr = gmi->virt_addr; ++ queue->queue_size = spec->queue_size; ++ queue->monitor_avl_buf = spec->monitor_avl_buf; ++ queue->type = spec->type; ++ queue->gdma_dev = gd; ++ ++ if (spec->type == GDMA_CQ) ++ mana_gd_create_cq(spec, queue); ++ ++ *queue_ptr = queue; ++ return 0; ++out: ++ mana_gd_free_memory(gmi); ++free_q: ++ kfree(queue); ++ return err; ++} ++ ++void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) ++{ ++ struct gdma_mem_info *gmi = &queue->mem_info; ++ ++ switch (queue->type) { ++ case GDMA_EQ: ++ mana_gd_destroy_eq(gc, queue->eq.disable_needed, queue); ++ break; ++ ++ case GDMA_CQ: ++ mana_gd_destroy_cq(gc, queue); ++ break; ++ ++ case GDMA_RQ: ++ break; ++ ++ case GDMA_SQ: ++ break; ++ ++ default: ++ dev_err(gc->dev, "Can't destroy unknown queue: type=%d\n", ++ queue->type); ++ return; ++ } ++ ++ mana_gd_destroy_dma_region(gc, gmi->gdma_region); ++ mana_gd_free_memory(gmi); ++ kfree(queue); ++} ++ ++int mana_gd_verify_vf_version(struct pci_dev *pdev) ++{ ++ struct gdma_context *gc = pci_get_drvdata(pdev); ++ struct gdma_verify_ver_resp resp = {}; ++ struct gdma_verify_ver_req req = {}; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_VERIFY_VF_DRIVER_VERSION, ++ sizeof(req), sizeof(resp)); ++ ++ req.protocol_ver_min = GDMA_PROTOCOL_FIRST; ++ req.protocol_ver_max = GDMA_PROTOCOL_LAST; ++ ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err || resp.hdr.status) { ++ dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n", ++ err, resp.hdr.status); ++ return err ? err : -EPROTO; ++ } ++ ++ return 0; ++} ++ ++int mana_gd_register_device(struct gdma_dev *gd) ++{ ++ struct gdma_context *gc = gd->gdma_context; ++ struct gdma_register_device_resp resp = {}; ++ struct gdma_general_req req = {}; ++ int err; ++ ++ gd->pdid = INVALID_PDID; ++ gd->doorbell = INVALID_DOORBELL; ++ gd->gpa_mkey = INVALID_MEM_KEY; ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_REGISTER_DEVICE, sizeof(req), ++ sizeof(resp)); ++ ++ req.hdr.dev_id = gd->dev_id; ++ ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err || resp.hdr.status) { ++ dev_err(gc->dev, "gdma_register_device_resp failed: %d, 0x%x\n", ++ err, resp.hdr.status); ++ return err ? err : -EPROTO; ++ } ++ ++ gd->pdid = resp.pdid; ++ gd->gpa_mkey = resp.gpa_mkey; ++ gd->doorbell = resp.db_id; ++ ++ return 0; ++} ++ ++int mana_gd_deregister_device(struct gdma_dev *gd) ++{ ++ struct gdma_context *gc = gd->gdma_context; ++ struct gdma_general_resp resp = {}; ++ struct gdma_general_req req = {}; ++ int err; ++ ++ if (gd->pdid == INVALID_PDID) ++ return -EINVAL; ++ ++ mana_gd_init_req_hdr(&req.hdr, GDMA_DEREGISTER_DEVICE, sizeof(req), ++ sizeof(resp)); ++ ++ req.hdr.dev_id = gd->dev_id; ++ ++ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); ++ if (err || resp.hdr.status) { ++ dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", ++ err, resp.hdr.status); ++ if (!err) ++ err = -EPROTO; ++ } ++ ++ gd->pdid = INVALID_PDID; ++ gd->doorbell = INVALID_DOORBELL; ++ gd->gpa_mkey = INVALID_MEM_KEY; ++ ++ return err; ++} ++ ++u32 mana_gd_wq_avail_space(struct gdma_queue *wq) ++{ ++ u32 used_space = (wq->head - wq->tail) * GDMA_WQE_BU_SIZE; ++ u32 wq_size = wq->queue_size; ++ ++ WARN_ON_ONCE(used_space > wq_size); ++ ++ return wq_size - used_space; ++} ++ ++u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset) ++{ ++ u32 offset = (wqe_offset * GDMA_WQE_BU_SIZE) & (wq->queue_size - 1); ++ ++ WARN_ON_ONCE((offset + GDMA_WQE_BU_SIZE) > wq->queue_size); ++ ++ return wq->queue_mem_ptr + offset; ++} ++ ++static u32 mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req, ++ enum gdma_queue_type q_type, ++ u32 client_oob_size, u32 sgl_data_size, ++ u8 *wqe_ptr) ++{ ++ bool oob_in_sgl = !!(wqe_req->flags & GDMA_WR_OOB_IN_SGL); ++ bool pad_data = !!(wqe_req->flags & GDMA_WR_PAD_BY_SGE0); ++ struct gdma_wqe *header = (struct gdma_wqe *)wqe_ptr; ++ u8 *ptr; ++ ++ memset(header, 0, sizeof(struct gdma_wqe)); ++ header->num_sge = wqe_req->num_sge; ++ header->inline_oob_size_div4 = client_oob_size / sizeof(u32); ++ ++ if (oob_in_sgl) { ++ WARN_ON_ONCE(!pad_data || wqe_req->num_sge < 2); ++ ++ header->client_oob_in_sgl = 1; ++ ++ if (pad_data) ++ header->last_vbytes = wqe_req->sgl[0].size; ++ } ++ ++ if (q_type == GDMA_SQ) ++ header->client_data_unit = wqe_req->client_data_unit; ++ ++ /* The size of gdma_wqe + client_oob_size must be less than or equal ++ * to one Basic Unit (i.e. 32 bytes), so the pointer can't go beyond ++ * the queue memory buffer boundary. ++ */ ++ ptr = wqe_ptr + sizeof(header); ++ ++ if (wqe_req->inline_oob_data && wqe_req->inline_oob_size > 0) { ++ memcpy(ptr, wqe_req->inline_oob_data, wqe_req->inline_oob_size); ++ ++ if (client_oob_size > wqe_req->inline_oob_size) ++ memset(ptr + wqe_req->inline_oob_size, 0, ++ client_oob_size - wqe_req->inline_oob_size); ++ } ++ ++ return sizeof(header) + client_oob_size; ++} ++ ++static void mana_gd_write_sgl(struct gdma_queue *wq, u8 *wqe_ptr, ++ const struct gdma_wqe_request *wqe_req) ++{ ++ u32 sgl_size = sizeof(struct gdma_sge) * wqe_req->num_sge; ++ const u8 *address = (u8 *)wqe_req->sgl; ++ u8 *base_ptr, *end_ptr; ++ u32 size_to_end; ++ ++ base_ptr = wq->queue_mem_ptr; ++ end_ptr = base_ptr + wq->queue_size; ++ size_to_end = (u32)(end_ptr - wqe_ptr); ++ ++ if (size_to_end < sgl_size) { ++ memcpy(wqe_ptr, address, size_to_end); ++ ++ wqe_ptr = base_ptr; ++ address += size_to_end; ++ sgl_size -= size_to_end; ++ } ++ ++ memcpy(wqe_ptr, address, sgl_size); ++} ++ ++int mana_gd_post_work_request(struct gdma_queue *wq, ++ const struct gdma_wqe_request *wqe_req, ++ struct gdma_posted_wqe_info *wqe_info) ++{ ++ u32 client_oob_size = wqe_req->inline_oob_size; ++ struct gdma_context *gc; ++ u32 sgl_data_size; ++ u32 max_wqe_size; ++ u32 wqe_size; ++ u8 *wqe_ptr; ++ ++ if (wqe_req->num_sge == 0) ++ return -EINVAL; ++ ++ if (wq->type == GDMA_RQ) { ++ if (client_oob_size != 0) ++ return -EINVAL; ++ ++ client_oob_size = INLINE_OOB_SMALL_SIZE; ++ ++ max_wqe_size = GDMA_MAX_RQE_SIZE; ++ } else { ++ if (client_oob_size != INLINE_OOB_SMALL_SIZE && ++ client_oob_size != INLINE_OOB_LARGE_SIZE) ++ return -EINVAL; ++ ++ max_wqe_size = GDMA_MAX_SQE_SIZE; ++ } ++ ++ sgl_data_size = sizeof(struct gdma_sge) * wqe_req->num_sge; ++ wqe_size = ALIGN(sizeof(struct gdma_wqe) + client_oob_size + ++ sgl_data_size, GDMA_WQE_BU_SIZE); ++ if (wqe_size > max_wqe_size) ++ return -EINVAL; ++ ++ if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) { ++ gc = wq->gdma_dev->gdma_context; ++ dev_err(gc->dev, "unsuccessful flow control!\n"); ++ return -ENOSPC; ++ } ++ ++ if (wqe_info) ++ wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE; ++ ++ wqe_ptr = mana_gd_get_wqe_ptr(wq, wq->head); ++ wqe_ptr += mana_gd_write_client_oob(wqe_req, wq->type, client_oob_size, ++ sgl_data_size, wqe_ptr); ++ if (wqe_ptr >= (u8 *)wq->queue_mem_ptr + wq->queue_size) ++ wqe_ptr -= wq->queue_size; ++ ++ mana_gd_write_sgl(wq, wqe_ptr, wqe_req); ++ ++ wq->head += wqe_size / GDMA_WQE_BU_SIZE; ++ ++ return 0; ++} ++ ++int mana_gd_post_and_ring(struct gdma_queue *queue, ++ const struct gdma_wqe_request *wqe_req, ++ struct gdma_posted_wqe_info *wqe_info) ++{ ++ struct gdma_context *gc = queue->gdma_dev->gdma_context; ++ int err; ++ ++ err = mana_gd_post_work_request(queue, wqe_req, wqe_info); ++ if (err) ++ return err; ++ ++ mana_gd_wq_ring_doorbell(gc, queue); ++ ++ return 0; ++} ++ ++static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp) ++{ ++ unsigned int num_cqe = cq->queue_size / sizeof(struct gdma_cqe); ++ struct gdma_cqe *cq_cqe = cq->queue_mem_ptr; ++ u32 owner_bits, new_bits, old_bits; ++ struct gdma_cqe *cqe; ++ ++ cqe = &cq_cqe[cq->head % num_cqe]; ++ owner_bits = cqe->cqe_info.owner_bits; ++ ++ old_bits = (cq->head / num_cqe - 1) & GDMA_CQE_OWNER_MASK; ++ /* Return 0 if no more entries. */ ++ if (owner_bits == old_bits) ++ return 0; ++ ++ new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK; ++ /* Return -1 if overflow detected. */ ++ if (owner_bits != new_bits) ++ return -1; ++ ++ comp->wq_num = cqe->cqe_info.wq_num; ++ comp->is_sq = cqe->cqe_info.is_sq; ++ memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE); ++ ++ return 1; ++} ++ ++int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe) ++{ ++ int cqe_idx; ++ int ret; ++ ++ for (cqe_idx = 0; cqe_idx < num_cqe; cqe_idx++) { ++ ret = mana_gd_read_cqe(cq, &comp[cqe_idx]); ++ ++ if (ret < 0) { ++ cq->head -= cqe_idx; ++ return ret; ++ } ++ ++ if (ret == 0) ++ break; ++ ++ cq->head++; ++ } ++ ++ return cqe_idx; ++} ++ ++static irqreturn_t mana_gd_intr(int irq, void *arg) ++{ ++ struct gdma_irq_context *gic = arg; ++ ++ if (gic->handler) ++ gic->handler(gic->arg); ++ ++ return IRQ_HANDLED; ++} ++ ++int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r) ++{ ++ r->map = bitmap_zalloc(res_avail, GFP_KERNEL); ++ if (!r->map) ++ return -ENOMEM; ++ ++ r->size = res_avail; ++ spin_lock_init(&r->lock); ++ ++ return 0; ++} ++ ++void mana_gd_free_res_map(struct gdma_resource *r) ++{ ++ bitmap_free(r->map); ++ r->map = NULL; ++ r->size = 0; ++} ++ ++static int mana_gd_setup_irqs(struct pci_dev *pdev) ++{ ++ unsigned int max_queues_per_port = num_online_cpus(); ++ struct gdma_context *gc = pci_get_drvdata(pdev); ++ struct gdma_irq_context *gic; ++ unsigned int max_irqs; ++ int nvec, irq; ++ int err, i, j; ++ ++ if (max_queues_per_port > MANA_MAX_NUM_QUEUES) ++ max_queues_per_port = MANA_MAX_NUM_QUEUES; ++ ++ max_irqs = max_queues_per_port * MAX_PORTS_IN_MANA_DEV; ++ ++ /* Need 1 interrupt for the Hardware communication Channel (HWC) */ ++ max_irqs++; ++ ++ nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX); ++ if (nvec < 0) ++ return nvec; ++ ++ gc->irq_contexts = kcalloc(nvec, sizeof(struct gdma_irq_context), ++ GFP_KERNEL); ++ if (!gc->irq_contexts) { ++ err = -ENOMEM; ++ goto free_irq_vector; ++ } ++ ++ for (i = 0; i < nvec; i++) { ++ gic = &gc->irq_contexts[i]; ++ gic->handler = NULL; ++ gic->arg = NULL; ++ ++ irq = pci_irq_vector(pdev, i); ++ if (irq < 0) { ++ err = irq; ++ goto free_irq; ++ } ++ ++ err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic); ++ if (err) ++ goto free_irq; ++ } ++ ++ err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); ++ if (err) ++ goto free_irq; ++ ++ gc->max_num_msix = nvec; ++ gc->num_msix_usable = nvec; ++ ++ return 0; ++ ++free_irq: ++ for (j = i - 1; j >= 0; j--) { ++ irq = pci_irq_vector(pdev, j); ++ gic = &gc->irq_contexts[j]; ++ free_irq(irq, gic); ++ } ++ ++ kfree(gc->irq_contexts); ++ gc->irq_contexts = NULL; ++free_irq_vector: ++ pci_free_irq_vectors(pdev); ++ return err; ++} ++ ++static void mana_gd_remove_irqs(struct pci_dev *pdev) ++{ ++ struct gdma_context *gc = pci_get_drvdata(pdev); ++ struct gdma_irq_context *gic; ++ int irq, i; ++ ++ if (gc->max_num_msix < 1) ++ return; ++ ++ mana_gd_free_res_map(&gc->msix_resource); ++ ++ for (i = 0; i < gc->max_num_msix; i++) { ++ irq = pci_irq_vector(pdev, i); ++ if (irq < 0) ++ continue; ++ ++ gic = &gc->irq_contexts[i]; ++ free_irq(irq, gic); ++ } ++ ++ pci_free_irq_vectors(pdev); ++ ++ gc->max_num_msix = 0; ++ gc->num_msix_usable = 0; ++ kfree(gc->irq_contexts); ++ gc->irq_contexts = NULL; ++} ++ ++static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ++{ ++ struct gdma_context *gc; ++ void __iomem *bar0_va; ++ int bar = 0; ++ int err; ++ ++ err = pci_enable_device(pdev); ++ if (err) ++ return -ENXIO; ++ ++ pci_set_master(pdev); ++ ++ err = pci_request_regions(pdev, "mana"); ++ if (err) ++ goto disable_dev; ++ ++ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); ++ if (err) ++ goto release_region; ++ ++ err = -ENOMEM; ++ gc = vzalloc(sizeof(*gc)); ++ if (!gc) ++ goto release_region; ++ ++ bar0_va = pci_iomap(pdev, bar, 0); ++ if (!bar0_va) ++ goto free_gc; ++ ++ gc->bar0_va = bar0_va; ++ gc->dev = &pdev->dev; ++ ++ pci_set_drvdata(pdev, gc); ++ ++ mana_gd_init_registers(pdev); ++ ++ mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); ++ ++ err = mana_gd_setup_irqs(pdev); ++ if (err) ++ goto unmap_bar; ++ ++ mutex_init(&gc->eq_test_event_mutex); ++ ++ err = mana_hwc_create_channel(gc); ++ if (err) ++ goto remove_irq; ++ ++ err = mana_gd_verify_vf_version(pdev); ++ if (err) ++ goto remove_irq; ++ ++ err = mana_gd_query_max_resources(pdev); ++ if (err) ++ goto remove_irq; ++ ++ err = mana_gd_detect_devices(pdev); ++ if (err) ++ goto remove_irq; ++ ++ err = mana_probe(&gc->mana); ++ if (err) ++ goto clean_up_gdma; ++ ++ return 0; ++ ++clean_up_gdma: ++ mana_hwc_destroy_channel(gc); ++ vfree(gc->cq_table); ++ gc->cq_table = NULL; ++remove_irq: ++ mana_gd_remove_irqs(pdev); ++unmap_bar: ++ pci_iounmap(pdev, bar0_va); ++free_gc: ++ vfree(gc); ++release_region: ++ pci_release_regions(pdev); ++disable_dev: ++ pci_clear_master(pdev); ++ pci_disable_device(pdev); ++ dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err); ++ return err; ++} ++ ++static void mana_gd_remove(struct pci_dev *pdev) ++{ ++ struct gdma_context *gc = pci_get_drvdata(pdev); ++ ++ mana_remove(&gc->mana); ++ ++ mana_hwc_destroy_channel(gc); ++ vfree(gc->cq_table); ++ gc->cq_table = NULL; ++ ++ mana_gd_remove_irqs(pdev); ++ ++ pci_iounmap(pdev, gc->bar0_va); ++ ++ vfree(gc); ++ ++ pci_release_regions(pdev); ++ pci_clear_master(pdev); ++ pci_disable_device(pdev); ++} ++ ++#ifndef PCI_VENDOR_ID_MICROSOFT ++#define PCI_VENDOR_ID_MICROSOFT 0x1414 ++#endif ++ ++static const struct pci_device_id mana_id_table[] = { ++ { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 0x00BA) }, ++ { } ++}; ++ ++static struct pci_driver mana_driver = { ++ .name = "mana", ++ .id_table = mana_id_table, ++ .probe = mana_gd_probe, ++ .remove = mana_gd_remove, ++}; ++ ++module_pci_driver(mana_driver); ++ ++MODULE_DEVICE_TABLE(pci, mana_id_table); ++ ++MODULE_LICENSE("Dual BSD/GPL"); ++MODULE_DESCRIPTION("Microsoft Azure Network Adapter driver"); +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +new file mode 100644 +index 000000000000..462bc577692a +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -0,0 +1,843 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause ++/* Copyright (c) 2021, Microsoft Corporation. */ ++ ++#include "gdma.h" ++#include "hw_channel.h" ++ ++static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) ++{ ++ struct gdma_resource *r = &hwc->inflight_msg_res; ++ unsigned long flags; ++ u32 index; ++ ++ down(&hwc->sema); ++ ++ spin_lock_irqsave(&r->lock, flags); ++ ++ index = find_first_zero_bit(hwc->inflight_msg_res.map, ++ hwc->inflight_msg_res.size); ++ ++ bitmap_set(hwc->inflight_msg_res.map, index, 1); ++ ++ spin_unlock_irqrestore(&r->lock, flags); ++ ++ *msg_id = index; ++ ++ return 0; ++} ++ ++static void mana_hwc_put_msg_index(struct hw_channel_context *hwc, u16 msg_id) ++{ ++ struct gdma_resource *r = &hwc->inflight_msg_res; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&r->lock, flags); ++ bitmap_clear(hwc->inflight_msg_res.map, msg_id, 1); ++ spin_unlock_irqrestore(&r->lock, flags); ++ ++ up(&hwc->sema); ++} ++ ++static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx, ++ const struct gdma_resp_hdr *resp_msg, ++ u32 resp_len) ++{ ++ if (resp_len < sizeof(*resp_msg)) ++ return -EPROTO; ++ ++ if (resp_len > caller_ctx->output_buflen) ++ return -EPROTO; ++ ++ return 0; ++} ++ ++static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, ++ const struct gdma_resp_hdr *resp_msg) ++{ ++ struct hwc_caller_ctx *ctx; ++ int err = -EPROTO; ++ ++ if (!test_bit(resp_msg->response.hwc_msg_id, ++ hwc->inflight_msg_res.map)) { ++ dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", ++ resp_msg->response.hwc_msg_id); ++ return; ++ } ++ ++ ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id; ++ err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len); ++ if (err) ++ goto out; ++ ++ ctx->status_code = resp_msg->status; ++ ++ memcpy(ctx->output_buf, resp_msg, resp_len); ++out: ++ ctx->error = err; ++ complete(&ctx->comp_event); ++} ++ ++static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, ++ struct hwc_work_request *req) ++{ ++ struct device *dev = hwc_rxq->hwc->dev; ++ struct gdma_sge *sge; ++ int err; ++ ++ sge = &req->sge; ++ sge->address = (u64)req->buf_sge_addr; ++ sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; ++ sge->size = req->buf_len; ++ ++ memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); ++ req->wqe_req.sgl = sge; ++ req->wqe_req.num_sge = 1; ++ req->wqe_req.client_data_unit = 0; ++ ++ err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); ++ if (err) ++ dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); ++ return err; ++} ++ ++static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, ++ struct gdma_event *event) ++{ ++ struct hw_channel_context *hwc = ctx; ++ struct gdma_dev *gd = hwc->gdma_dev; ++ union hwc_init_type_data type_data; ++ union hwc_init_eq_id_db eq_db; ++ u32 type, val; ++ ++ switch (event->type) { ++ case GDMA_EQE_HWC_INIT_EQ_ID_DB: ++ eq_db.as_uint32 = event->details[0]; ++ hwc->cq->gdma_eq->id = eq_db.eq_id; ++ gd->doorbell = eq_db.doorbell; ++ break; ++ ++ case GDMA_EQE_HWC_INIT_DATA: ++ type_data.as_uint32 = event->details[0]; ++ type = type_data.type; ++ val = type_data.value; ++ ++ switch (type) { ++ case HWC_INIT_DATA_CQID: ++ hwc->cq->gdma_cq->id = val; ++ break; ++ ++ case HWC_INIT_DATA_RQID: ++ hwc->rxq->gdma_wq->id = val; ++ break; ++ ++ case HWC_INIT_DATA_SQID: ++ hwc->txq->gdma_wq->id = val; ++ break; ++ ++ case HWC_INIT_DATA_QUEUE_DEPTH: ++ hwc->hwc_init_q_depth_max = (u16)val; ++ break; ++ ++ case HWC_INIT_DATA_MAX_REQUEST: ++ hwc->hwc_init_max_req_msg_size = val; ++ break; ++ ++ case HWC_INIT_DATA_MAX_RESPONSE: ++ hwc->hwc_init_max_resp_msg_size = val; ++ break; ++ ++ case HWC_INIT_DATA_MAX_NUM_CQS: ++ gd->gdma_context->max_num_cqs = val; ++ break; ++ ++ case HWC_INIT_DATA_PDID: ++ hwc->gdma_dev->pdid = val; ++ break; ++ ++ case HWC_INIT_DATA_GPA_MKEY: ++ hwc->rxq->msg_buf->gpa_mkey = val; ++ hwc->txq->msg_buf->gpa_mkey = val; ++ break; ++ } ++ ++ break; ++ ++ case GDMA_EQE_HWC_INIT_DONE: ++ complete(&hwc->hwc_init_eqe_comp); ++ break; ++ ++ default: ++ /* Ignore unknown events, which should never happen. */ ++ break; ++ } ++} ++ ++static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, ++ const struct hwc_rx_oob *rx_oob) ++{ ++ struct hw_channel_context *hwc = ctx; ++ struct hwc_wq *hwc_rxq = hwc->rxq; ++ struct hwc_work_request *rx_req; ++ struct gdma_resp_hdr *resp; ++ struct gdma_wqe *dma_oob; ++ struct gdma_queue *rq; ++ struct gdma_sge *sge; ++ u64 rq_base_addr; ++ u64 rx_req_idx; ++ u8 *wqe; ++ ++ if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id)) ++ return; ++ ++ rq = hwc_rxq->gdma_wq; ++ wqe = mana_gd_get_wqe_ptr(rq, rx_oob->wqe_offset / GDMA_WQE_BU_SIZE); ++ dma_oob = (struct gdma_wqe *)wqe; ++ ++ sge = (struct gdma_sge *)(wqe + 8 + dma_oob->inline_oob_size_div4 * 4); ++ ++ /* Select the RX work request for virtual address and for reposting. */ ++ rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle; ++ rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size; ++ ++ rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx]; ++ resp = (struct gdma_resp_hdr *)rx_req->buf_va; ++ ++ if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) { ++ dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n", ++ resp->response.hwc_msg_id); ++ return; ++ } ++ ++ mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp); ++ ++ /* Do no longer use 'resp', because the buffer is posted to the HW ++ * in the below mana_hwc_post_rx_wqe(). ++ */ ++ resp = NULL; ++ ++ mana_hwc_post_rx_wqe(hwc_rxq, rx_req); ++} ++ ++static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id, ++ const struct hwc_rx_oob *rx_oob) ++{ ++ struct hw_channel_context *hwc = ctx; ++ struct hwc_wq *hwc_txq = hwc->txq; ++ ++ WARN_ON_ONCE(!hwc_txq || hwc_txq->gdma_wq->id != gdma_txq_id); ++} ++ ++static int mana_hwc_create_gdma_wq(struct hw_channel_context *hwc, ++ enum gdma_queue_type type, u64 queue_size, ++ struct gdma_queue **queue) ++{ ++ struct gdma_queue_spec spec = {}; ++ ++ if (type != GDMA_SQ && type != GDMA_RQ) ++ return -EINVAL; ++ ++ spec.type = type; ++ spec.monitor_avl_buf = false; ++ spec.queue_size = queue_size; ++ ++ return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); ++} ++ ++static int mana_hwc_create_gdma_cq(struct hw_channel_context *hwc, ++ u64 queue_size, ++ void *ctx, gdma_cq_callback *cb, ++ struct gdma_queue *parent_eq, ++ struct gdma_queue **queue) ++{ ++ struct gdma_queue_spec spec = {}; ++ ++ spec.type = GDMA_CQ; ++ spec.monitor_avl_buf = false; ++ spec.queue_size = queue_size; ++ spec.cq.context = ctx; ++ spec.cq.callback = cb; ++ spec.cq.parent_eq = parent_eq; ++ ++ return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); ++} ++ ++static int mana_hwc_create_gdma_eq(struct hw_channel_context *hwc, ++ u64 queue_size, ++ void *ctx, gdma_eq_callback *cb, ++ struct gdma_queue **queue) ++{ ++ struct gdma_queue_spec spec = {}; ++ ++ spec.type = GDMA_EQ; ++ spec.monitor_avl_buf = false; ++ spec.queue_size = queue_size; ++ spec.eq.context = ctx; ++ spec.eq.callback = cb; ++ spec.eq.log2_throttle_limit = DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ; ++ ++ return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); ++} ++ ++static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self) ++{ ++ struct hwc_rx_oob comp_data = {}; ++ struct gdma_comp *completions; ++ struct hwc_cq *hwc_cq = ctx; ++ u32 comp_read, i; ++ ++ WARN_ON_ONCE(hwc_cq->gdma_cq != q_self); ++ ++ completions = hwc_cq->comp_buf; ++ comp_read = mana_gd_poll_cq(q_self, completions, hwc_cq->queue_depth); ++ WARN_ON_ONCE(comp_read <= 0 || comp_read > hwc_cq->queue_depth); ++ ++ for (i = 0; i < comp_read; ++i) { ++ comp_data = *(struct hwc_rx_oob *)completions[i].cqe_data; ++ ++ if (completions[i].is_sq) ++ hwc_cq->tx_event_handler(hwc_cq->tx_event_ctx, ++ completions[i].wq_num, ++ &comp_data); ++ else ++ hwc_cq->rx_event_handler(hwc_cq->rx_event_ctx, ++ completions[i].wq_num, ++ &comp_data); ++ } ++ ++ mana_gd_arm_cq(q_self); ++} ++ ++static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq) ++{ ++ if (!hwc_cq) ++ return; ++ ++ kfree(hwc_cq->comp_buf); ++ ++ if (hwc_cq->gdma_cq) ++ mana_gd_destroy_queue(gc, hwc_cq->gdma_cq); ++ ++ if (hwc_cq->gdma_eq) ++ mana_gd_destroy_queue(gc, hwc_cq->gdma_eq); ++ ++ kfree(hwc_cq); ++} ++ ++static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth, ++ gdma_eq_callback *callback, void *ctx, ++ hwc_rx_event_handler_t *rx_ev_hdlr, ++ void *rx_ev_ctx, ++ hwc_tx_event_handler_t *tx_ev_hdlr, ++ void *tx_ev_ctx, struct hwc_cq **hwc_cq_ptr) ++{ ++ struct gdma_queue *eq, *cq; ++ struct gdma_comp *comp_buf; ++ struct hwc_cq *hwc_cq; ++ u32 eq_size, cq_size; ++ int err; ++ ++ eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); ++ if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) ++ eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; ++ ++ cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); ++ if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) ++ cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; ++ ++ hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL); ++ if (!hwc_cq) ++ return -ENOMEM; ++ ++ err = mana_hwc_create_gdma_eq(hwc, eq_size, ctx, callback, &eq); ++ if (err) { ++ dev_err(hwc->dev, "Failed to create HWC EQ for RQ: %d\n", err); ++ goto out; ++ } ++ hwc_cq->gdma_eq = eq; ++ ++ err = mana_hwc_create_gdma_cq(hwc, cq_size, hwc_cq, mana_hwc_comp_event, ++ eq, &cq); ++ if (err) { ++ dev_err(hwc->dev, "Failed to create HWC CQ for RQ: %d\n", err); ++ goto out; ++ } ++ hwc_cq->gdma_cq = cq; ++ ++ comp_buf = kcalloc(q_depth, sizeof(struct gdma_comp), GFP_KERNEL); ++ if (!comp_buf) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ hwc_cq->hwc = hwc; ++ hwc_cq->comp_buf = comp_buf; ++ hwc_cq->queue_depth = q_depth; ++ hwc_cq->rx_event_handler = rx_ev_hdlr; ++ hwc_cq->rx_event_ctx = rx_ev_ctx; ++ hwc_cq->tx_event_handler = tx_ev_hdlr; ++ hwc_cq->tx_event_ctx = tx_ev_ctx; ++ ++ *hwc_cq_ptr = hwc_cq; ++ return 0; ++out: ++ mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq); ++ return err; ++} ++ ++static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, ++ u32 max_msg_size, ++ struct hwc_dma_buf **dma_buf_ptr) ++{ ++ struct gdma_context *gc = hwc->gdma_dev->gdma_context; ++ struct hwc_work_request *hwc_wr; ++ struct hwc_dma_buf *dma_buf; ++ struct gdma_mem_info *gmi; ++ void *virt_addr; ++ u32 buf_size; ++ u8 *base_pa; ++ int err; ++ u16 i; ++ ++ dma_buf = kzalloc(sizeof(*dma_buf) + ++ q_depth * sizeof(struct hwc_work_request), ++ GFP_KERNEL); ++ if (!dma_buf) ++ return -ENOMEM; ++ ++ dma_buf->num_reqs = q_depth; ++ ++ buf_size = PAGE_ALIGN(q_depth * max_msg_size); ++ ++ gmi = &dma_buf->mem_info; ++ err = mana_gd_alloc_memory(gc, buf_size, gmi); ++ if (err) { ++ dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); ++ goto out; ++ } ++ ++ virt_addr = dma_buf->mem_info.virt_addr; ++ base_pa = (u8 *)dma_buf->mem_info.dma_handle; ++ ++ for (i = 0; i < q_depth; i++) { ++ hwc_wr = &dma_buf->reqs[i]; ++ ++ hwc_wr->buf_va = virt_addr + i * max_msg_size; ++ hwc_wr->buf_sge_addr = base_pa + i * max_msg_size; ++ ++ hwc_wr->buf_len = max_msg_size; ++ } ++ ++ *dma_buf_ptr = dma_buf; ++ return 0; ++out: ++ kfree(dma_buf); ++ return err; ++} ++ ++static void mana_hwc_dealloc_dma_buf(struct hw_channel_context *hwc, ++ struct hwc_dma_buf *dma_buf) ++{ ++ if (!dma_buf) ++ return; ++ ++ mana_gd_free_memory(&dma_buf->mem_info); ++ ++ kfree(dma_buf); ++} ++ ++static void mana_hwc_destroy_wq(struct hw_channel_context *hwc, ++ struct hwc_wq *hwc_wq) ++{ ++ if (!hwc_wq) ++ return; ++ ++ mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf); ++ ++ if (hwc_wq->gdma_wq) ++ mana_gd_destroy_queue(hwc->gdma_dev->gdma_context, ++ hwc_wq->gdma_wq); ++ ++ kfree(hwc_wq); ++} ++ ++static int mana_hwc_create_wq(struct hw_channel_context *hwc, ++ enum gdma_queue_type q_type, u16 q_depth, ++ u32 max_msg_size, struct hwc_cq *hwc_cq, ++ struct hwc_wq **hwc_wq_ptr) ++{ ++ struct gdma_queue *queue; ++ struct hwc_wq *hwc_wq; ++ u32 queue_size; ++ int err; ++ ++ WARN_ON(q_type != GDMA_SQ && q_type != GDMA_RQ); ++ ++ if (q_type == GDMA_RQ) ++ queue_size = roundup_pow_of_two(GDMA_MAX_RQE_SIZE * q_depth); ++ else ++ queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); ++ ++ if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) ++ queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; ++ ++ hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL); ++ if (!hwc_wq) ++ return -ENOMEM; ++ ++ err = mana_hwc_create_gdma_wq(hwc, q_type, queue_size, &queue); ++ if (err) ++ goto out; ++ ++ err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size, ++ &hwc_wq->msg_buf); ++ if (err) ++ goto out; ++ ++ hwc_wq->hwc = hwc; ++ hwc_wq->gdma_wq = queue; ++ hwc_wq->queue_depth = q_depth; ++ hwc_wq->hwc_cq = hwc_cq; ++ ++ *hwc_wq_ptr = hwc_wq; ++ return 0; ++out: ++ if (err) ++ mana_hwc_destroy_wq(hwc, hwc_wq); ++ return err; ++} ++ ++static int mana_hwc_post_tx_wqe(const struct hwc_wq *hwc_txq, ++ struct hwc_work_request *req, ++ u32 dest_virt_rq_id, u32 dest_virt_rcq_id, ++ bool dest_pf) ++{ ++ struct device *dev = hwc_txq->hwc->dev; ++ struct hwc_tx_oob *tx_oob; ++ struct gdma_sge *sge; ++ int err; ++ ++ if (req->msg_size == 0 || req->msg_size > req->buf_len) { ++ dev_err(dev, "wrong msg_size: %u, buf_len: %u\n", ++ req->msg_size, req->buf_len); ++ return -EINVAL; ++ } ++ ++ tx_oob = &req->tx_oob; ++ ++ tx_oob->vrq_id = dest_virt_rq_id; ++ tx_oob->dest_vfid = 0; ++ tx_oob->vrcq_id = dest_virt_rcq_id; ++ tx_oob->vscq_id = hwc_txq->hwc_cq->gdma_cq->id; ++ tx_oob->loopback = false; ++ tx_oob->lso_override = false; ++ tx_oob->dest_pf = dest_pf; ++ tx_oob->vsq_id = hwc_txq->gdma_wq->id; ++ ++ sge = &req->sge; ++ sge->address = (u64)req->buf_sge_addr; ++ sge->mem_key = hwc_txq->msg_buf->gpa_mkey; ++ sge->size = req->msg_size; ++ ++ memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); ++ req->wqe_req.sgl = sge; ++ req->wqe_req.num_sge = 1; ++ req->wqe_req.inline_oob_size = sizeof(struct hwc_tx_oob); ++ req->wqe_req.inline_oob_data = tx_oob; ++ req->wqe_req.client_data_unit = 0; ++ ++ err = mana_gd_post_and_ring(hwc_txq->gdma_wq, &req->wqe_req, NULL); ++ if (err) ++ dev_err(dev, "Failed to post WQE on HWC SQ: %d\n", err); ++ return err; ++} ++ ++static int mana_hwc_init_inflight_msg(struct hw_channel_context *hwc, ++ u16 num_msg) ++{ ++ int err; ++ ++ sema_init(&hwc->sema, num_msg); ++ ++ err = mana_gd_alloc_res_map(num_msg, &hwc->inflight_msg_res); ++ if (err) ++ dev_err(hwc->dev, "Failed to init inflight_msg_res: %d\n", err); ++ return err; ++} ++ ++static int mana_hwc_test_channel(struct hw_channel_context *hwc, u16 q_depth, ++ u32 max_req_msg_size, u32 max_resp_msg_size) ++{ ++ struct gdma_context *gc = hwc->gdma_dev->gdma_context; ++ struct hwc_wq *hwc_rxq = hwc->rxq; ++ struct hwc_work_request *req; ++ struct hwc_caller_ctx *ctx; ++ int err; ++ int i; ++ ++ /* Post all WQEs on the RQ */ ++ for (i = 0; i < q_depth; i++) { ++ req = &hwc_rxq->msg_buf->reqs[i]; ++ err = mana_hwc_post_rx_wqe(hwc_rxq, req); ++ if (err) ++ return err; ++ } ++ ++ ctx = kzalloc(q_depth * sizeof(struct hwc_caller_ctx), GFP_KERNEL); ++ if (!ctx) ++ return -ENOMEM; ++ ++ for (i = 0; i < q_depth; ++i) ++ init_completion(&ctx[i].comp_event); ++ ++ hwc->caller_ctx = ctx; ++ ++ return mana_gd_test_eq(gc, hwc->cq->gdma_eq); ++} ++ ++static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth, ++ u32 *max_req_msg_size, ++ u32 *max_resp_msg_size) ++{ ++ struct hw_channel_context *hwc = gc->hwc.driver_data; ++ struct gdma_queue *rq = hwc->rxq->gdma_wq; ++ struct gdma_queue *sq = hwc->txq->gdma_wq; ++ struct gdma_queue *eq = hwc->cq->gdma_eq; ++ struct gdma_queue *cq = hwc->cq->gdma_cq; ++ int err; ++ ++ init_completion(&hwc->hwc_init_eqe_comp); ++ ++ err = mana_smc_setup_hwc(&gc->shm_channel, false, ++ eq->mem_info.dma_handle, ++ cq->mem_info.dma_handle, ++ rq->mem_info.dma_handle, ++ sq->mem_info.dma_handle, ++ eq->eq.msix_index); ++ if (err) ++ return err; ++ ++ if (!wait_for_completion_timeout(&hwc->hwc_init_eqe_comp, 60 * HZ)) ++ return -ETIMEDOUT; ++ ++ *q_depth = hwc->hwc_init_q_depth_max; ++ *max_req_msg_size = hwc->hwc_init_max_req_msg_size; ++ *max_resp_msg_size = hwc->hwc_init_max_resp_msg_size; ++ ++ if (WARN_ON(cq->id >= gc->max_num_cqs)) ++ return -EPROTO; ++ ++ gc->cq_table = vzalloc(gc->max_num_cqs * sizeof(struct gdma_queue *)); ++ if (!gc->cq_table) ++ return -ENOMEM; ++ ++ gc->cq_table[cq->id] = cq; ++ ++ return 0; ++} ++ ++static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16 q_depth, ++ u32 max_req_msg_size, u32 max_resp_msg_size) ++{ ++ struct hwc_wq *hwc_rxq = NULL; ++ struct hwc_wq *hwc_txq = NULL; ++ struct hwc_cq *hwc_cq = NULL; ++ int err; ++ ++ err = mana_hwc_init_inflight_msg(hwc, q_depth); ++ if (err) ++ return err; ++ ++ /* CQ is shared by SQ and RQ, so CQ's queue depth is the sum of SQ ++ * queue depth and RQ queue depth. ++ */ ++ err = mana_hwc_create_cq(hwc, q_depth * 2, ++ mana_hwc_init_event_handler, hwc, ++ mana_hwc_rx_event_handler, hwc, ++ mana_hwc_tx_event_handler, hwc, &hwc_cq); ++ if (err) { ++ dev_err(hwc->dev, "Failed to create HWC CQ: %d\n", err); ++ goto out; ++ } ++ hwc->cq = hwc_cq; ++ ++ err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size, ++ hwc_cq, &hwc_rxq); ++ if (err) { ++ dev_err(hwc->dev, "Failed to create HWC RQ: %d\n", err); ++ goto out; ++ } ++ hwc->rxq = hwc_rxq; ++ ++ err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size, ++ hwc_cq, &hwc_txq); ++ if (err) { ++ dev_err(hwc->dev, "Failed to create HWC SQ: %d\n", err); ++ goto out; ++ } ++ hwc->txq = hwc_txq; ++ ++ hwc->num_inflight_msg = q_depth; ++ hwc->max_req_msg_size = max_req_msg_size; ++ ++ return 0; ++out: ++ if (hwc_txq) ++ mana_hwc_destroy_wq(hwc, hwc_txq); ++ ++ if (hwc_rxq) ++ mana_hwc_destroy_wq(hwc, hwc_rxq); ++ ++ if (hwc_cq) ++ mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq); ++ ++ mana_gd_free_res_map(&hwc->inflight_msg_res); ++ return err; ++} ++ ++int mana_hwc_create_channel(struct gdma_context *gc) ++{ ++ u32 max_req_msg_size, max_resp_msg_size; ++ struct gdma_dev *gd = &gc->hwc; ++ struct hw_channel_context *hwc; ++ u16 q_depth_max; ++ int err; ++ ++ hwc = kzalloc(sizeof(*hwc), GFP_KERNEL); ++ if (!hwc) ++ return -ENOMEM; ++ ++ gd->gdma_context = gc; ++ gd->driver_data = hwc; ++ hwc->gdma_dev = gd; ++ hwc->dev = gc->dev; ++ ++ /* HWC's instance number is always 0. */ ++ gd->dev_id.as_uint32 = 0; ++ gd->dev_id.type = GDMA_DEVICE_HWC; ++ ++ gd->pdid = INVALID_PDID; ++ gd->doorbell = INVALID_DOORBELL; ++ ++ err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, ++ HW_CHANNEL_MAX_REQUEST_SIZE, ++ HW_CHANNEL_MAX_RESPONSE_SIZE); ++ if (err) { ++ dev_err(hwc->dev, "Failed to initialize HWC: %d\n", err); ++ goto out; ++ } ++ ++ err = mana_hwc_establish_channel(gc, &q_depth_max, &max_req_msg_size, ++ &max_resp_msg_size); ++ if (err) { ++ dev_err(hwc->dev, "Failed to establish HWC: %d\n", err); ++ goto out; ++ } ++ ++ err = mana_hwc_test_channel(gc->hwc.driver_data, ++ HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, ++ max_req_msg_size, max_resp_msg_size); ++ if (err) { ++ dev_err(hwc->dev, "Failed to test HWC: %d\n", err); ++ goto out; ++ } ++ ++ return 0; ++out: ++ kfree(hwc); ++ return err; ++} ++ ++void mana_hwc_destroy_channel(struct gdma_context *gc) ++{ ++ struct hw_channel_context *hwc = gc->hwc.driver_data; ++ struct hwc_caller_ctx *ctx; ++ ++ mana_smc_teardown_hwc(&gc->shm_channel, false); ++ ++ ctx = hwc->caller_ctx; ++ kfree(ctx); ++ hwc->caller_ctx = NULL; ++ ++ mana_hwc_destroy_wq(hwc, hwc->txq); ++ hwc->txq = NULL; ++ ++ mana_hwc_destroy_wq(hwc, hwc->rxq); ++ hwc->rxq = NULL; ++ ++ mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq); ++ hwc->cq = NULL; ++ ++ mana_gd_free_res_map(&hwc->inflight_msg_res); ++ ++ hwc->num_inflight_msg = 0; ++ ++ if (hwc->gdma_dev->pdid != INVALID_PDID) { ++ hwc->gdma_dev->doorbell = INVALID_DOORBELL; ++ hwc->gdma_dev->pdid = INVALID_PDID; ++ } ++ ++ kfree(hwc); ++ gc->hwc.driver_data = NULL; ++ gc->hwc.gdma_context = NULL; ++} ++ ++int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, ++ const void *req, u32 resp_len, void *resp) ++{ ++ struct hwc_work_request *tx_wr; ++ struct hwc_wq *txq = hwc->txq; ++ struct gdma_req_hdr *req_msg; ++ struct hwc_caller_ctx *ctx; ++ u16 msg_id; ++ int err; ++ ++ mana_hwc_get_msg_index(hwc, &msg_id); ++ ++ tx_wr = &txq->msg_buf->reqs[msg_id]; ++ ++ if (req_len > tx_wr->buf_len) { ++ dev_err(hwc->dev, "HWC: req msg size: %d > %d\n", req_len, ++ tx_wr->buf_len); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ ctx = hwc->caller_ctx + msg_id; ++ ctx->output_buf = resp; ++ ctx->output_buflen = resp_len; ++ ++ req_msg = (struct gdma_req_hdr *)tx_wr->buf_va; ++ if (req) ++ memcpy(req_msg, req, req_len); ++ ++ req_msg->req.hwc_msg_id = msg_id; ++ ++ tx_wr->msg_size = req_len; ++ ++ err = mana_hwc_post_tx_wqe(txq, tx_wr, 0, 0, false); ++ if (err) { ++ dev_err(hwc->dev, "HWC: Failed to post send WQE: %d\n", err); ++ goto out; ++ } ++ ++ if (!wait_for_completion_timeout(&ctx->comp_event, 30 * HZ)) { ++ dev_err(hwc->dev, "HWC: Request timed out!\n"); ++ err = -ETIMEDOUT; ++ goto out; ++ } ++ ++ if (ctx->error) { ++ err = ctx->error; ++ goto out; ++ } ++ ++ if (ctx->status_code) { ++ dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", ++ ctx->status_code); ++ err = -EPROTO; ++ goto out; ++ } ++out: ++ mana_hwc_put_msg_index(hwc, msg_id); ++ return err; ++} +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.h b/drivers/net/ethernet/microsoft/mana/hw_channel.h +new file mode 100644 +index 000000000000..31c6e83c454a +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.h +@@ -0,0 +1,190 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ ++/* Copyright (c) 2021, Microsoft Corporation. */ ++ ++#ifndef _HW_CHANNEL_H ++#define _HW_CHANNEL_H ++ ++#define DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ 4 ++ ++#define HW_CHANNEL_MAX_REQUEST_SIZE 0x1000 ++#define HW_CHANNEL_MAX_RESPONSE_SIZE 0x1000 ++ ++#define HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH 1 ++ ++#define HWC_INIT_DATA_CQID 1 ++#define HWC_INIT_DATA_RQID 2 ++#define HWC_INIT_DATA_SQID 3 ++#define HWC_INIT_DATA_QUEUE_DEPTH 4 ++#define HWC_INIT_DATA_MAX_REQUEST 5 ++#define HWC_INIT_DATA_MAX_RESPONSE 6 ++#define HWC_INIT_DATA_MAX_NUM_CQS 7 ++#define HWC_INIT_DATA_PDID 8 ++#define HWC_INIT_DATA_GPA_MKEY 9 ++ ++/* Structures labeled with "HW DATA" are exchanged with the hardware. All of ++ * them are naturally aligned and hence don't need __packed. ++ */ ++ ++union hwc_init_eq_id_db { ++ u32 as_uint32; ++ ++ struct { ++ u32 eq_id : 16; ++ u32 doorbell : 16; ++ }; ++}; /* HW DATA */ ++ ++union hwc_init_type_data { ++ u32 as_uint32; ++ ++ struct { ++ u32 value : 24; ++ u32 type : 8; ++ }; ++}; /* HW DATA */ ++ ++struct hwc_rx_oob { ++ u32 type : 6; ++ u32 eom : 1; ++ u32 som : 1; ++ u32 vendor_err : 8; ++ u32 reserved1 : 16; ++ ++ u32 src_virt_wq : 24; ++ u32 src_vfid : 8; ++ ++ u32 reserved2; ++ ++ union { ++ u32 wqe_addr_low; ++ u32 wqe_offset; ++ }; ++ ++ u32 wqe_addr_high; ++ ++ u32 client_data_unit : 14; ++ u32 reserved3 : 18; ++ ++ u32 tx_oob_data_size; ++ ++ u32 chunk_offset : 21; ++ u32 reserved4 : 11; ++}; /* HW DATA */ ++ ++struct hwc_tx_oob { ++ u32 reserved1; ++ ++ u32 reserved2; ++ ++ u32 vrq_id : 24; ++ u32 dest_vfid : 8; ++ ++ u32 vrcq_id : 24; ++ u32 reserved3 : 8; ++ ++ u32 vscq_id : 24; ++ u32 loopback : 1; ++ u32 lso_override: 1; ++ u32 dest_pf : 1; ++ u32 reserved4 : 5; ++ ++ u32 vsq_id : 24; ++ u32 reserved5 : 8; ++}; /* HW DATA */ ++ ++struct hwc_work_request { ++ void *buf_va; ++ void *buf_sge_addr; ++ u32 buf_len; ++ u32 msg_size; ++ ++ struct gdma_wqe_request wqe_req; ++ struct hwc_tx_oob tx_oob; ++ ++ struct gdma_sge sge; ++}; ++ ++/* hwc_dma_buf represents the array of in-flight WQEs. ++ * mem_info as know as the GDMA mapped memory is partitioned and used by ++ * in-flight WQEs. ++ * The number of WQEs is determined by the number of in-flight messages. ++ */ ++struct hwc_dma_buf { ++ struct gdma_mem_info mem_info; ++ ++ u32 gpa_mkey; ++ ++ u32 num_reqs; ++ struct hwc_work_request reqs[]; ++}; ++ ++typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id, ++ const struct hwc_rx_oob *rx_oob); ++ ++typedef void hwc_tx_event_handler_t(void *ctx, u32 gdma_txq_id, ++ const struct hwc_rx_oob *rx_oob); ++ ++struct hwc_cq { ++ struct hw_channel_context *hwc; ++ ++ struct gdma_queue *gdma_cq; ++ struct gdma_queue *gdma_eq; ++ struct gdma_comp *comp_buf; ++ u16 queue_depth; ++ ++ hwc_rx_event_handler_t *rx_event_handler; ++ void *rx_event_ctx; ++ ++ hwc_tx_event_handler_t *tx_event_handler; ++ void *tx_event_ctx; ++}; ++ ++struct hwc_wq { ++ struct hw_channel_context *hwc; ++ ++ struct gdma_queue *gdma_wq; ++ struct hwc_dma_buf *msg_buf; ++ u16 queue_depth; ++ ++ struct hwc_cq *hwc_cq; ++}; ++ ++struct hwc_caller_ctx { ++ struct completion comp_event; ++ void *output_buf; ++ u32 output_buflen; ++ ++ u32 error; /* Linux error code */ ++ u32 status_code; ++}; ++ ++struct hw_channel_context { ++ struct gdma_dev *gdma_dev; ++ struct device *dev; ++ ++ u16 num_inflight_msg; ++ u32 max_req_msg_size; ++ ++ u16 hwc_init_q_depth_max; ++ u32 hwc_init_max_req_msg_size; ++ u32 hwc_init_max_resp_msg_size; ++ ++ struct completion hwc_init_eqe_comp; ++ ++ struct hwc_wq *rxq; ++ struct hwc_wq *txq; ++ struct hwc_cq *cq; ++ ++ struct semaphore sema; ++ struct gdma_resource inflight_msg_res; ++ ++ struct hwc_caller_ctx *caller_ctx; ++}; ++ ++int mana_hwc_create_channel(struct gdma_context *gc); ++void mana_hwc_destroy_channel(struct gdma_context *gc); ++ ++int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, ++ const void *req, u32 resp_len, void *resp); ++ ++#endif /* _HW_CHANNEL_H */ +diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h +new file mode 100644 +index 000000000000..a2c3f826f022 +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/mana.h +@@ -0,0 +1,533 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ ++/* Copyright (c) 2021, Microsoft Corporation. */ ++ ++#ifndef _MANA_H ++#define _MANA_H ++ ++#include "gdma.h" ++#include "hw_channel.h" ++ ++/* Microsoft Azure Network Adapter (MANA)'s definitions ++ * ++ * Structures labeled with "HW DATA" are exchanged with the hardware. All of ++ * them are naturally aligned and hence don't need __packed. ++ */ ++ ++/* MANA protocol version */ ++#define MANA_MAJOR_VERSION 0 ++#define MANA_MINOR_VERSION 1 ++#define MANA_MICRO_VERSION 1 ++ ++typedef u64 mana_handle_t; ++#define INVALID_MANA_HANDLE ((mana_handle_t)-1) ++ ++enum TRI_STATE { ++ TRI_STATE_UNKNOWN = -1, ++ TRI_STATE_FALSE = 0, ++ TRI_STATE_TRUE = 1 ++}; ++ ++/* Number of entries for hardware indirection table must be in power of 2 */ ++#define MANA_INDIRECT_TABLE_SIZE 64 ++#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) ++ ++/* The Toeplitz hash key's length in bytes: should be multiple of 8 */ ++#define MANA_HASH_KEY_SIZE 40 ++ ++#define COMP_ENTRY_SIZE 64 ++ ++#define ADAPTER_MTU_SIZE 1500 ++#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14) ++ ++#define RX_BUFFERS_PER_QUEUE 512 ++ ++#define MAX_SEND_BUFFERS_PER_QUEUE 256 ++ ++#define EQ_SIZE (8 * PAGE_SIZE) ++#define LOG2_EQ_THROTTLE 3 ++ ++#define MAX_PORTS_IN_MANA_DEV 16 ++ ++struct mana_stats { ++ u64 packets; ++ u64 bytes; ++ struct u64_stats_sync syncp; ++}; ++ ++struct mana_txq { ++ struct gdma_queue *gdma_sq; ++ ++ union { ++ u32 gdma_txq_id; ++ struct { ++ u32 reserved1 : 10; ++ u32 vsq_frame : 14; ++ u32 reserved2 : 8; ++ }; ++ }; ++ ++ u16 vp_offset; ++ ++ struct net_device *ndev; ++ ++ /* The SKBs are sent to the HW and we are waiting for the CQEs. */ ++ struct sk_buff_head pending_skbs; ++ struct netdev_queue *net_txq; ++ ++ atomic_t pending_sends; ++ ++ struct mana_stats stats; ++}; ++ ++/* skb data and frags dma mappings */ ++struct mana_skb_head { ++ dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; ++ ++ u32 size[MAX_SKB_FRAGS + 1]; ++}; ++ ++#define MANA_HEADROOM sizeof(struct mana_skb_head) ++ ++enum mana_tx_pkt_format { ++ MANA_SHORT_PKT_FMT = 0, ++ MANA_LONG_PKT_FMT = 1, ++}; ++ ++struct mana_tx_short_oob { ++ u32 pkt_fmt : 2; ++ u32 is_outer_ipv4 : 1; ++ u32 is_outer_ipv6 : 1; ++ u32 comp_iphdr_csum : 1; ++ u32 comp_tcp_csum : 1; ++ u32 comp_udp_csum : 1; ++ u32 supress_txcqe_gen : 1; ++ u32 vcq_num : 24; ++ ++ u32 trans_off : 10; /* Transport header offset */ ++ u32 vsq_frame : 14; ++ u32 short_vp_offset : 8; ++}; /* HW DATA */ ++ ++struct mana_tx_long_oob { ++ u32 is_encap : 1; ++ u32 inner_is_ipv6 : 1; ++ u32 inner_tcp_opt : 1; ++ u32 inject_vlan_pri_tag : 1; ++ u32 reserved1 : 12; ++ u32 pcp : 3; /* 802.1Q */ ++ u32 dei : 1; /* 802.1Q */ ++ u32 vlan_id : 12; /* 802.1Q */ ++ ++ u32 inner_frame_offset : 10; ++ u32 inner_ip_rel_offset : 6; ++ u32 long_vp_offset : 12; ++ u32 reserved2 : 4; ++ ++ u32 reserved3; ++ u32 reserved4; ++}; /* HW DATA */ ++ ++struct mana_tx_oob { ++ struct mana_tx_short_oob s_oob; ++ struct mana_tx_long_oob l_oob; ++}; /* HW DATA */ ++ ++enum mana_cq_type { ++ MANA_CQ_TYPE_RX, ++ MANA_CQ_TYPE_TX, ++}; ++ ++enum mana_cqe_type { ++ CQE_INVALID = 0, ++ CQE_RX_OKAY = 1, ++ CQE_RX_COALESCED_4 = 2, ++ CQE_RX_OBJECT_FENCE = 3, ++ CQE_RX_TRUNCATED = 4, ++ ++ CQE_TX_OKAY = 32, ++ CQE_TX_SA_DROP = 33, ++ CQE_TX_MTU_DROP = 34, ++ CQE_TX_INVALID_OOB = 35, ++ CQE_TX_INVALID_ETH_TYPE = 36, ++ CQE_TX_HDR_PROCESSING_ERROR = 37, ++ CQE_TX_VF_DISABLED = 38, ++ CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39, ++ CQE_TX_VPORT_DISABLED = 40, ++ CQE_TX_VLAN_TAGGING_VIOLATION = 41, ++}; ++ ++#define MANA_CQE_COMPLETION 1 ++ ++struct mana_cqe_header { ++ u32 cqe_type : 6; ++ u32 client_type : 2; ++ u32 vendor_err : 24; ++}; /* HW DATA */ ++ ++/* NDIS HASH Types */ ++#define NDIS_HASH_IPV4 BIT(0) ++#define NDIS_HASH_TCP_IPV4 BIT(1) ++#define NDIS_HASH_UDP_IPV4 BIT(2) ++#define NDIS_HASH_IPV6 BIT(3) ++#define NDIS_HASH_TCP_IPV6 BIT(4) ++#define NDIS_HASH_UDP_IPV6 BIT(5) ++#define NDIS_HASH_IPV6_EX BIT(6) ++#define NDIS_HASH_TCP_IPV6_EX BIT(7) ++#define NDIS_HASH_UDP_IPV6_EX BIT(8) ++ ++#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX) ++#define MANA_HASH_L4 \ ++ (NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \ ++ NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX) ++ ++struct mana_rxcomp_perpkt_info { ++ u32 pkt_len : 16; ++ u32 reserved1 : 16; ++ u32 reserved2; ++ u32 pkt_hash; ++}; /* HW DATA */ ++ ++#define MANA_RXCOMP_OOB_NUM_PPI 4 ++ ++/* Receive completion OOB */ ++struct mana_rxcomp_oob { ++ struct mana_cqe_header cqe_hdr; ++ ++ u32 rx_vlan_id : 12; ++ u32 rx_vlantag_present : 1; ++ u32 rx_outer_iphdr_csum_succeed : 1; ++ u32 rx_outer_iphdr_csum_fail : 1; ++ u32 reserved1 : 1; ++ u32 rx_hashtype : 9; ++ u32 rx_iphdr_csum_succeed : 1; ++ u32 rx_iphdr_csum_fail : 1; ++ u32 rx_tcp_csum_succeed : 1; ++ u32 rx_tcp_csum_fail : 1; ++ u32 rx_udp_csum_succeed : 1; ++ u32 rx_udp_csum_fail : 1; ++ u32 reserved2 : 1; ++ ++ struct mana_rxcomp_perpkt_info ppi[MANA_RXCOMP_OOB_NUM_PPI]; ++ ++ u32 rx_wqe_offset; ++}; /* HW DATA */ ++ ++struct mana_tx_comp_oob { ++ struct mana_cqe_header cqe_hdr; ++ ++ u32 tx_data_offset; ++ ++ u32 tx_sgl_offset : 5; ++ u32 tx_wqe_offset : 27; ++ ++ u32 reserved[12]; ++}; /* HW DATA */ ++ ++struct mana_rxq; ++ ++struct mana_cq { ++ struct gdma_queue *gdma_cq; ++ ++ /* Cache the CQ id (used to verify if each CQE comes to the right CQ. */ ++ u32 gdma_id; ++ ++ /* Type of the CQ: TX or RX */ ++ enum mana_cq_type type; ++ ++ /* Pointer to the mana_rxq that is pushing RX CQEs to the queue. ++ * Only and must be non-NULL if type is MANA_CQ_TYPE_RX. ++ */ ++ struct mana_rxq *rxq; ++ ++ /* Pointer to the mana_txq that is pushing TX CQEs to the queue. ++ * Only and must be non-NULL if type is MANA_CQ_TYPE_TX. ++ */ ++ struct mana_txq *txq; ++ ++ /* Pointer to a buffer which the CQ handler can copy the CQE's into. */ ++ struct gdma_comp *gdma_comp_buf; ++}; ++ ++#define GDMA_MAX_RQE_SGES 15 ++ ++struct mana_recv_buf_oob { ++ /* A valid GDMA work request representing the data buffer. */ ++ struct gdma_wqe_request wqe_req; ++ ++ void *buf_va; ++ dma_addr_t buf_dma_addr; ++ ++ /* SGL of the buffer going to be sent has part of the work request. */ ++ u32 num_sge; ++ struct gdma_sge sgl[GDMA_MAX_RQE_SGES]; ++ ++ /* Required to store the result of mana_gd_post_work_request. ++ * gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the ++ * work queue when the WQE is consumed. ++ */ ++ struct gdma_posted_wqe_info wqe_inf; ++}; ++ ++struct mana_rxq { ++ struct gdma_queue *gdma_rq; ++ /* Cache the gdma receive queue id */ ++ u32 gdma_id; ++ ++ /* Index of RQ in the vPort, not gdma receive queue id */ ++ u32 rxq_idx; ++ ++ u32 datasize; ++ ++ mana_handle_t rxobj; ++ ++ struct mana_cq rx_cq; ++ ++ struct net_device *ndev; ++ ++ /* Total number of receive buffers to be allocated */ ++ u32 num_rx_buf; ++ ++ u32 buf_index; ++ ++ struct mana_stats stats; ++ ++ /* MUST BE THE LAST MEMBER: ++ * Each receive buffer has an associated mana_recv_buf_oob. ++ */ ++ struct mana_recv_buf_oob rx_oobs[]; ++}; ++ ++struct mana_tx_qp { ++ struct mana_txq txq; ++ ++ struct mana_cq tx_cq; ++ ++ mana_handle_t tx_object; ++}; ++ ++struct mana_ethtool_stats { ++ u64 stop_queue; ++ u64 wake_queue; ++}; ++ ++struct mana_context { ++ struct gdma_dev *gdma_dev; ++ ++ u16 num_ports; ++ ++ struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; ++}; ++ ++struct mana_port_context { ++ struct mana_context *ac; ++ struct net_device *ndev; ++ ++ u8 mac_addr[ETH_ALEN]; ++ ++ struct mana_eq *eqs; ++ ++ enum TRI_STATE rss_state; ++ ++ mana_handle_t default_rxobj; ++ bool tx_shortform_allowed; ++ u16 tx_vp_offset; ++ ++ struct mana_tx_qp *tx_qp; ++ ++ /* Indirection Table for RX & TX. The values are queue indexes */ ++ u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; ++ ++ /* Indirection table containing RxObject Handles */ ++ mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; ++ ++ /* Hash key used by the NIC */ ++ u8 hashkey[MANA_HASH_KEY_SIZE]; ++ ++ /* This points to an array of num_queues of RQ pointers. */ ++ struct mana_rxq **rxqs; ++ ++ /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */ ++ unsigned int max_queues; ++ unsigned int num_queues; ++ ++ mana_handle_t port_handle; ++ ++ u16 port_idx; ++ ++ bool port_is_up; ++ bool port_st_save; /* Saved port state */ ++ ++ struct mana_ethtool_stats eth_stats; ++}; ++ ++int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, ++ bool update_hash, bool update_tab); ++ ++int mana_alloc_queues(struct net_device *ndev); ++int mana_attach(struct net_device *ndev); ++int mana_detach(struct net_device *ndev, bool from_close); ++ ++int mana_probe(struct gdma_dev *gd); ++void mana_remove(struct gdma_dev *gd); ++ ++extern const struct ethtool_ops mana_ethtool_ops; ++ ++struct mana_obj_spec { ++ u32 queue_index; ++ u64 gdma_region; ++ u32 queue_size; ++ u32 attached_eq; ++ u32 modr_ctx_id; ++}; ++ ++enum mana_command_code { ++ MANA_QUERY_DEV_CONFIG = 0x20001, ++ MANA_QUERY_GF_STAT = 0x20002, ++ MANA_CONFIG_VPORT_TX = 0x20003, ++ MANA_CREATE_WQ_OBJ = 0x20004, ++ MANA_DESTROY_WQ_OBJ = 0x20005, ++ MANA_FENCE_RQ = 0x20006, ++ MANA_CONFIG_VPORT_RX = 0x20007, ++ MANA_QUERY_VPORT_CONFIG = 0x20008, ++}; ++ ++/* Query Device Configuration */ ++struct mana_query_device_cfg_req { ++ struct gdma_req_hdr hdr; ++ ++ /* Driver Capability flags */ ++ u64 drv_cap_flags1; ++ u64 drv_cap_flags2; ++ u64 drv_cap_flags3; ++ u64 drv_cap_flags4; ++ ++ u32 proto_major_ver; ++ u32 proto_minor_ver; ++ u32 proto_micro_ver; ++ ++ u32 reserved; ++}; /* HW DATA */ ++ ++struct mana_query_device_cfg_resp { ++ struct gdma_resp_hdr hdr; ++ ++ u64 pf_cap_flags1; ++ u64 pf_cap_flags2; ++ u64 pf_cap_flags3; ++ u64 pf_cap_flags4; ++ ++ u16 max_num_vports; ++ u16 reserved; ++ u32 max_num_eqs; ++}; /* HW DATA */ ++ ++/* Query vPort Configuration */ ++struct mana_query_vport_cfg_req { ++ struct gdma_req_hdr hdr; ++ u32 vport_index; ++}; /* HW DATA */ ++ ++struct mana_query_vport_cfg_resp { ++ struct gdma_resp_hdr hdr; ++ u32 max_num_sq; ++ u32 max_num_rq; ++ u32 num_indirection_ent; ++ u32 reserved1; ++ u8 mac_addr[6]; ++ u8 reserved2[2]; ++ mana_handle_t vport; ++}; /* HW DATA */ ++ ++/* Configure vPort */ ++struct mana_config_vport_req { ++ struct gdma_req_hdr hdr; ++ mana_handle_t vport; ++ u32 pdid; ++ u32 doorbell_pageid; ++}; /* HW DATA */ ++ ++struct mana_config_vport_resp { ++ struct gdma_resp_hdr hdr; ++ u16 tx_vport_offset; ++ u8 short_form_allowed; ++ u8 reserved; ++}; /* HW DATA */ ++ ++/* Create WQ Object */ ++struct mana_create_wqobj_req { ++ struct gdma_req_hdr hdr; ++ mana_handle_t vport; ++ u32 wq_type; ++ u32 reserved; ++ u64 wq_gdma_region; ++ u64 cq_gdma_region; ++ u32 wq_size; ++ u32 cq_size; ++ u32 cq_moderation_ctx_id; ++ u32 cq_parent_qid; ++}; /* HW DATA */ ++ ++struct mana_create_wqobj_resp { ++ struct gdma_resp_hdr hdr; ++ u32 wq_id; ++ u32 cq_id; ++ mana_handle_t wq_obj; ++}; /* HW DATA */ ++ ++/* Destroy WQ Object */ ++struct mana_destroy_wqobj_req { ++ struct gdma_req_hdr hdr; ++ u32 wq_type; ++ u32 reserved; ++ mana_handle_t wq_obj_handle; ++}; /* HW DATA */ ++ ++struct mana_destroy_wqobj_resp { ++ struct gdma_resp_hdr hdr; ++}; /* HW DATA */ ++ ++/* Fence RQ */ ++struct mana_fence_rq_req { ++ struct gdma_req_hdr hdr; ++ mana_handle_t wq_obj_handle; ++}; /* HW DATA */ ++ ++struct mana_fence_rq_resp { ++ struct gdma_resp_hdr hdr; ++}; /* HW DATA */ ++ ++/* Configure vPort Rx Steering */ ++struct mana_cfg_rx_steer_req { ++ struct gdma_req_hdr hdr; ++ mana_handle_t vport; ++ u16 num_indir_entries; ++ u16 indir_tab_offset; ++ u32 rx_enable; ++ u32 rss_enable; ++ u8 update_default_rxobj; ++ u8 update_hashkey; ++ u8 update_indir_tab; ++ u8 reserved; ++ mana_handle_t default_rxobj; ++ u8 hashkey[MANA_HASH_KEY_SIZE]; ++}; /* HW DATA */ ++ ++struct mana_cfg_rx_steer_resp { ++ struct gdma_resp_hdr hdr; ++}; /* HW DATA */ ++ ++#define MANA_MAX_NUM_QUEUES 16 ++ ++#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) ++ ++struct mana_tx_package { ++ struct gdma_wqe_request wqe_req; ++ struct gdma_sge sgl_array[5]; ++ struct gdma_sge *sgl_ptr; ++ ++ struct mana_tx_oob tx_oob; ++ ++ struct gdma_posted_wqe_info wqe_info; ++}; ++ ++#endif /* _MANA_H */ +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +new file mode 100644 +index 000000000000..a744ca0b6c19 +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -0,0 +1,1895 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause ++/* Copyright (c) 2021, Microsoft Corporation. */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "mana.h" ++ ++/* Microsoft Azure Network Adapter (MANA) functions */ ++ ++static int mana_open(struct net_device *ndev) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ int err; ++ ++ err = mana_alloc_queues(ndev); ++ if (err) ++ return err; ++ ++ apc->port_is_up = true; ++ ++ /* Ensure port state updated before txq state */ ++ smp_wmb(); ++ ++ netif_carrier_on(ndev); ++ netif_tx_wake_all_queues(ndev); ++ ++ return 0; ++} ++ ++static int mana_close(struct net_device *ndev) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ ++ if (!apc->port_is_up) ++ return 0; ++ ++ return mana_detach(ndev, true); ++} ++ ++static bool mana_can_tx(struct gdma_queue *wq) ++{ ++ return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; ++} ++ ++static unsigned int mana_checksum_info(struct sk_buff *skb) ++{ ++ if (skb->protocol == htons(ETH_P_IP)) { ++ struct iphdr *ip = ip_hdr(skb); ++ ++ if (ip->protocol == IPPROTO_TCP) ++ return IPPROTO_TCP; ++ ++ if (ip->protocol == IPPROTO_UDP) ++ return IPPROTO_UDP; ++ } else if (skb->protocol == htons(ETH_P_IPV6)) { ++ struct ipv6hdr *ip6 = ipv6_hdr(skb); ++ ++ if (ip6->nexthdr == IPPROTO_TCP) ++ return IPPROTO_TCP; ++ ++ if (ip6->nexthdr == IPPROTO_UDP) ++ return IPPROTO_UDP; ++ } ++ ++ /* No csum offloading */ ++ return 0; ++} ++ ++static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, ++ struct mana_tx_package *tp) ++{ ++ struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; ++ struct gdma_dev *gd = apc->ac->gdma_dev; ++ struct gdma_context *gc; ++ struct device *dev; ++ skb_frag_t *frag; ++ dma_addr_t da; ++ int i; ++ ++ gc = gd->gdma_context; ++ dev = gc->dev; ++ da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); ++ ++ if (dma_mapping_error(dev, da)) ++ return -ENOMEM; ++ ++ ash->dma_handle[0] = da; ++ ash->size[0] = skb_headlen(skb); ++ ++ tp->wqe_req.sgl[0].address = ash->dma_handle[0]; ++ tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey; ++ tp->wqe_req.sgl[0].size = ash->size[0]; ++ ++ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { ++ frag = &skb_shinfo(skb)->frags[i]; ++ da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), ++ DMA_TO_DEVICE); ++ ++ if (dma_mapping_error(dev, da)) ++ goto frag_err; ++ ++ ash->dma_handle[i + 1] = da; ++ ash->size[i + 1] = skb_frag_size(frag); ++ ++ tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1]; ++ tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey; ++ tp->wqe_req.sgl[i + 1].size = ash->size[i + 1]; ++ } ++ ++ return 0; ++ ++frag_err: ++ for (i = i - 1; i >= 0; i--) ++ dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1], ++ DMA_TO_DEVICE); ++ ++ dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); ++ ++ return -ENOMEM; ++} ++ ++static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ++{ ++ enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; ++ struct mana_port_context *apc = netdev_priv(ndev); ++ u16 txq_idx = skb_get_queue_mapping(skb); ++ struct gdma_dev *gd = apc->ac->gdma_dev; ++ bool ipv4 = false, ipv6 = false; ++ struct mana_tx_package pkg = {}; ++ struct netdev_queue *net_txq; ++ struct mana_stats *tx_stats; ++ struct gdma_queue *gdma_sq; ++ unsigned int csum_type; ++ struct mana_txq *txq; ++ struct mana_cq *cq; ++ int err, len; ++ ++ if (unlikely(!apc->port_is_up)) ++ goto tx_drop; ++ ++ if (skb_cow_head(skb, MANA_HEADROOM)) ++ goto tx_drop_count; ++ ++ txq = &apc->tx_qp[txq_idx].txq; ++ gdma_sq = txq->gdma_sq; ++ cq = &apc->tx_qp[txq_idx].tx_cq; ++ ++ pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; ++ pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; ++ ++ if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) { ++ pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset; ++ pkt_fmt = MANA_LONG_PKT_FMT; ++ } else { ++ pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; ++ } ++ ++ pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; ++ ++ if (pkt_fmt == MANA_SHORT_PKT_FMT) ++ pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); ++ else ++ pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); ++ ++ pkg.wqe_req.inline_oob_data = &pkg.tx_oob; ++ pkg.wqe_req.flags = 0; ++ pkg.wqe_req.client_data_unit = 0; ++ ++ pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; ++ WARN_ON_ONCE(pkg.wqe_req.num_sge > 30); ++ ++ if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { ++ pkg.wqe_req.sgl = pkg.sgl_array; ++ } else { ++ pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge, ++ sizeof(struct gdma_sge), ++ GFP_ATOMIC); ++ if (!pkg.sgl_ptr) ++ goto tx_drop_count; ++ ++ pkg.wqe_req.sgl = pkg.sgl_ptr; ++ } ++ ++ if (skb->protocol == htons(ETH_P_IP)) ++ ipv4 = true; ++ else if (skb->protocol == htons(ETH_P_IPV6)) ++ ipv6 = true; ++ ++ if (skb_is_gso(skb)) { ++ pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; ++ pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; ++ ++ pkg.tx_oob.s_oob.comp_iphdr_csum = 1; ++ pkg.tx_oob.s_oob.comp_tcp_csum = 1; ++ pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); ++ ++ pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size; ++ pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0; ++ if (ipv4) { ++ ip_hdr(skb)->tot_len = 0; ++ ip_hdr(skb)->check = 0; ++ tcp_hdr(skb)->check = ++ ~csum_tcpudp_magic(ip_hdr(skb)->saddr, ++ ip_hdr(skb)->daddr, 0, ++ IPPROTO_TCP, 0); ++ } else { ++ ipv6_hdr(skb)->payload_len = 0; ++ tcp_hdr(skb)->check = ++ ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, ++ &ipv6_hdr(skb)->daddr, 0, ++ IPPROTO_TCP, 0); ++ } ++ } else if (skb->ip_summed == CHECKSUM_PARTIAL) { ++ csum_type = mana_checksum_info(skb); ++ ++ if (csum_type == IPPROTO_TCP) { ++ pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; ++ pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; ++ ++ pkg.tx_oob.s_oob.comp_tcp_csum = 1; ++ pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); ++ ++ } else if (csum_type == IPPROTO_UDP) { ++ pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; ++ pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; ++ ++ pkg.tx_oob.s_oob.comp_udp_csum = 1; ++ } else { ++ /* Can't do offload of this type of checksum */ ++ if (skb_checksum_help(skb)) ++ goto free_sgl_ptr; ++ } ++ } ++ ++ if (mana_map_skb(skb, apc, &pkg)) ++ goto free_sgl_ptr; ++ ++ skb_queue_tail(&txq->pending_skbs, skb); ++ ++ len = skb->len; ++ net_txq = netdev_get_tx_queue(ndev, txq_idx); ++ ++ err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, ++ (struct gdma_posted_wqe_info *)skb->cb); ++ if (!mana_can_tx(gdma_sq)) { ++ netif_tx_stop_queue(net_txq); ++ apc->eth_stats.stop_queue++; ++ } ++ ++ if (err) { ++ (void)skb_dequeue_tail(&txq->pending_skbs); ++ netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); ++ err = NETDEV_TX_BUSY; ++ goto tx_busy; ++ } ++ ++ err = NETDEV_TX_OK; ++ atomic_inc(&txq->pending_sends); ++ ++ mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq); ++ ++ /* skb may be freed after mana_gd_post_work_request. Do not use it. */ ++ skb = NULL; ++ ++ tx_stats = &txq->stats; ++ u64_stats_update_begin(&tx_stats->syncp); ++ tx_stats->packets++; ++ tx_stats->bytes += len; ++ u64_stats_update_end(&tx_stats->syncp); ++ ++tx_busy: ++ if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) { ++ netif_tx_wake_queue(net_txq); ++ apc->eth_stats.wake_queue++; ++ } ++ ++ kfree(pkg.sgl_ptr); ++ return err; ++ ++free_sgl_ptr: ++ kfree(pkg.sgl_ptr); ++tx_drop_count: ++ ndev->stats.tx_dropped++; ++tx_drop: ++ dev_kfree_skb_any(skb); ++ return NETDEV_TX_OK; ++} ++ ++static void mana_get_stats64(struct net_device *ndev, ++ struct rtnl_link_stats64 *st) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ unsigned int num_queues = apc->num_queues; ++ struct mana_stats *stats; ++ unsigned int start; ++ u64 packets, bytes; ++ int q; ++ ++ if (!apc->port_is_up) ++ return; ++ ++ netdev_stats_to_stats64(st, &ndev->stats); ++ ++ for (q = 0; q < num_queues; q++) { ++ stats = &apc->rxqs[q]->stats; ++ ++ do { ++ start = u64_stats_fetch_begin_irq(&stats->syncp); ++ packets = stats->packets; ++ bytes = stats->bytes; ++ } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ ++ st->rx_packets += packets; ++ st->rx_bytes += bytes; ++ } ++ ++ for (q = 0; q < num_queues; q++) { ++ stats = &apc->tx_qp[q].txq.stats; ++ ++ do { ++ start = u64_stats_fetch_begin_irq(&stats->syncp); ++ packets = stats->packets; ++ bytes = stats->bytes; ++ } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ ++ st->tx_packets += packets; ++ st->tx_bytes += bytes; ++ } ++} ++ ++static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, ++ int old_q) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ u32 hash = skb_get_hash(skb); ++ struct sock *sk = skb->sk; ++ int txq; ++ ++ txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; ++ ++ if (txq != old_q && sk && sk_fullsock(sk) && ++ rcu_access_pointer(sk->sk_dst_cache)) ++ sk_tx_queue_set(sk, txq); ++ ++ return txq; ++} ++ ++static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, ++ struct net_device *sb_dev) ++{ ++ int txq; ++ ++ if (ndev->real_num_tx_queues == 1) ++ return 0; ++ ++ txq = sk_tx_queue_get(skb->sk); ++ ++ if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) { ++ if (skb_rx_queue_recorded(skb)) ++ txq = skb_get_rx_queue(skb); ++ else ++ txq = mana_get_tx_queue(ndev, skb, txq); ++ } ++ ++ return txq; ++} ++ ++static const struct net_device_ops mana_devops = { ++ .ndo_open = mana_open, ++ .ndo_stop = mana_close, ++ .ndo_select_queue = mana_select_queue, ++ .ndo_start_xmit = mana_start_xmit, ++ .ndo_validate_addr = eth_validate_addr, ++ .ndo_get_stats64 = mana_get_stats64, ++}; ++ ++static void mana_cleanup_port_context(struct mana_port_context *apc) ++{ ++ kfree(apc->rxqs); ++ apc->rxqs = NULL; ++} ++ ++static int mana_init_port_context(struct mana_port_context *apc) ++{ ++ apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), ++ GFP_KERNEL); ++ ++ return !apc->rxqs ? -ENOMEM : 0; ++} ++ ++static int mana_send_request(struct mana_context *ac, void *in_buf, ++ u32 in_len, void *out_buf, u32 out_len) ++{ ++ struct gdma_context *gc = ac->gdma_dev->gdma_context; ++ struct gdma_resp_hdr *resp = out_buf; ++ struct gdma_req_hdr *req = in_buf; ++ struct device *dev = gc->dev; ++ static atomic_t activity_id; ++ int err; ++ ++ req->dev_id = gc->mana.dev_id; ++ req->activity_id = atomic_inc_return(&activity_id); ++ ++ err = mana_gd_send_request(gc, in_len, in_buf, out_len, ++ out_buf); ++ if (err || resp->status) { ++ dev_err(dev, "Failed to send mana message: %d, 0x%x\n", ++ err, resp->status); ++ return err ? err : -EPROTO; ++ } ++ ++ if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 || ++ req->activity_id != resp->activity_id) { ++ dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n", ++ req->dev_id.as_uint32, resp->dev_id.as_uint32, ++ req->activity_id, resp->activity_id); ++ return -EPROTO; ++ } ++ ++ return 0; ++} ++ ++static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, ++ const enum mana_command_code expected_code, ++ const u32 min_size) ++{ ++ if (resp_hdr->response.msg_type != expected_code) ++ return -EPROTO; ++ ++ if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1) ++ return -EPROTO; ++ ++ if (resp_hdr->response.msg_size < min_size) ++ return -EPROTO; ++ ++ return 0; ++} ++ ++static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, ++ u32 proto_minor_ver, u32 proto_micro_ver, ++ u16 *max_num_vports) ++{ ++ struct gdma_context *gc = ac->gdma_dev->gdma_context; ++ struct mana_query_device_cfg_resp resp = {}; ++ struct mana_query_device_cfg_req req = {}; ++ struct device *dev = gc->dev; ++ int err = 0; ++ ++ mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, ++ sizeof(req), sizeof(resp)); ++ req.proto_major_ver = proto_major_ver; ++ req.proto_minor_ver = proto_minor_ver; ++ req.proto_micro_ver = proto_micro_ver; ++ ++ err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp)); ++ if (err) { ++ dev_err(dev, "Failed to query config: %d", err); ++ return err; ++ } ++ ++ err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG, ++ sizeof(resp)); ++ if (err || resp.hdr.status) { ++ dev_err(dev, "Invalid query result: %d, 0x%x\n", err, ++ resp.hdr.status); ++ if (!err) ++ err = -EPROTO; ++ return err; ++ } ++ ++ *max_num_vports = resp.max_num_vports; ++ ++ return 0; ++} ++ ++static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, ++ u32 *max_sq, u32 *max_rq, u32 *num_indir_entry) ++{ ++ struct mana_query_vport_cfg_resp resp = {}; ++ struct mana_query_vport_cfg_req req = {}; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG, ++ sizeof(req), sizeof(resp)); ++ ++ req.vport_index = vport_index; ++ ++ err = mana_send_request(apc->ac, &req, sizeof(req), &resp, ++ sizeof(resp)); ++ if (err) ++ return err; ++ ++ err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG, ++ sizeof(resp)); ++ if (err) ++ return err; ++ ++ if (resp.hdr.status) ++ return -EPROTO; ++ ++ *max_sq = resp.max_num_sq; ++ *max_rq = resp.max_num_rq; ++ *num_indir_entry = resp.num_indirection_ent; ++ ++ apc->port_handle = resp.vport; ++ ether_addr_copy(apc->mac_addr, resp.mac_addr); ++ ++ return 0; ++} ++ ++static int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, ++ u32 doorbell_pg_id) ++{ ++ struct mana_config_vport_resp resp = {}; ++ struct mana_config_vport_req req = {}; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, ++ sizeof(req), sizeof(resp)); ++ req.vport = apc->port_handle; ++ req.pdid = protection_dom_id; ++ req.doorbell_pageid = doorbell_pg_id; ++ ++ err = mana_send_request(apc->ac, &req, sizeof(req), &resp, ++ sizeof(resp)); ++ if (err) { ++ netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err); ++ goto out; ++ } ++ ++ err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX, ++ sizeof(resp)); ++ if (err || resp.hdr.status) { ++ netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n", ++ err, resp.hdr.status); ++ if (!err) ++ err = -EPROTO; ++ ++ goto out; ++ } ++ ++ apc->tx_shortform_allowed = resp.short_form_allowed; ++ apc->tx_vp_offset = resp.tx_vport_offset; ++out: ++ return err; ++} ++ ++static int mana_cfg_vport_steering(struct mana_port_context *apc, ++ enum TRI_STATE rx, ++ bool update_default_rxobj, bool update_key, ++ bool update_tab) ++{ ++ u16 num_entries = MANA_INDIRECT_TABLE_SIZE; ++ struct mana_cfg_rx_steer_req *req = NULL; ++ struct mana_cfg_rx_steer_resp resp = {}; ++ struct net_device *ndev = apc->ndev; ++ mana_handle_t *req_indir_tab; ++ u32 req_buf_size; ++ int err; ++ ++ req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; ++ req = kzalloc(req_buf_size, GFP_KERNEL); ++ if (!req) ++ return -ENOMEM; ++ ++ mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, ++ sizeof(resp)); ++ ++ req->vport = apc->port_handle; ++ req->num_indir_entries = num_entries; ++ req->indir_tab_offset = sizeof(*req); ++ req->rx_enable = rx; ++ req->rss_enable = apc->rss_state; ++ req->update_default_rxobj = update_default_rxobj; ++ req->update_hashkey = update_key; ++ req->update_indir_tab = update_tab; ++ req->default_rxobj = apc->default_rxobj; ++ ++ if (update_key) ++ memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); ++ ++ if (update_tab) { ++ req_indir_tab = (mana_handle_t *)(req + 1); ++ memcpy(req_indir_tab, apc->rxobj_table, ++ req->num_indir_entries * sizeof(mana_handle_t)); ++ } ++ ++ err = mana_send_request(apc->ac, req, req_buf_size, &resp, ++ sizeof(resp)); ++ if (err) { ++ netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); ++ goto out; ++ } ++ ++ err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX, ++ sizeof(resp)); ++ if (err) { ++ netdev_err(ndev, "vPort RX configuration failed: %d\n", err); ++ goto out; ++ } ++ ++ if (resp.hdr.status) { ++ netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", ++ resp.hdr.status); ++ err = -EPROTO; ++ } ++out: ++ kfree(req); ++ return err; ++} ++ ++static int mana_create_wq_obj(struct mana_port_context *apc, ++ mana_handle_t vport, ++ u32 wq_type, struct mana_obj_spec *wq_spec, ++ struct mana_obj_spec *cq_spec, ++ mana_handle_t *wq_obj) ++{ ++ struct mana_create_wqobj_resp resp = {}; ++ struct mana_create_wqobj_req req = {}; ++ struct net_device *ndev = apc->ndev; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ, ++ sizeof(req), sizeof(resp)); ++ req.vport = vport; ++ req.wq_type = wq_type; ++ req.wq_gdma_region = wq_spec->gdma_region; ++ req.cq_gdma_region = cq_spec->gdma_region; ++ req.wq_size = wq_spec->queue_size; ++ req.cq_size = cq_spec->queue_size; ++ req.cq_moderation_ctx_id = cq_spec->modr_ctx_id; ++ req.cq_parent_qid = cq_spec->attached_eq; ++ ++ err = mana_send_request(apc->ac, &req, sizeof(req), &resp, ++ sizeof(resp)); ++ if (err) { ++ netdev_err(ndev, "Failed to create WQ object: %d\n", err); ++ goto out; ++ } ++ ++ err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ, ++ sizeof(resp)); ++ if (err || resp.hdr.status) { ++ netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err, ++ resp.hdr.status); ++ if (!err) ++ err = -EPROTO; ++ goto out; ++ } ++ ++ if (resp.wq_obj == INVALID_MANA_HANDLE) { ++ netdev_err(ndev, "Got an invalid WQ object handle\n"); ++ err = -EPROTO; ++ goto out; ++ } ++ ++ *wq_obj = resp.wq_obj; ++ wq_spec->queue_index = resp.wq_id; ++ cq_spec->queue_index = resp.cq_id; ++ ++ return 0; ++out: ++ return err; ++} ++ ++static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, ++ mana_handle_t wq_obj) ++{ ++ struct mana_destroy_wqobj_resp resp = {}; ++ struct mana_destroy_wqobj_req req = {}; ++ struct net_device *ndev = apc->ndev; ++ int err; ++ ++ mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ, ++ sizeof(req), sizeof(resp)); ++ req.wq_type = wq_type; ++ req.wq_obj_handle = wq_obj; ++ ++ err = mana_send_request(apc->ac, &req, sizeof(req), &resp, ++ sizeof(resp)); ++ if (err) { ++ netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); ++ return; ++ } ++ ++ err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ, ++ sizeof(resp)); ++ if (err || resp.hdr.status) ++ netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, ++ resp.hdr.status); ++} ++ ++static void mana_init_cqe_poll_buf(struct gdma_comp *cqe_poll_buf) ++{ ++ int i; ++ ++ for (i = 0; i < CQE_POLLING_BUFFER; i++) ++ memset(&cqe_poll_buf[i], 0, sizeof(struct gdma_comp)); ++} ++ ++static void mana_destroy_eq(struct gdma_context *gc, ++ struct mana_port_context *apc) ++{ ++ struct gdma_queue *eq; ++ int i; ++ ++ if (!apc->eqs) ++ return; ++ ++ for (i = 0; i < apc->num_queues; i++) { ++ eq = apc->eqs[i].eq; ++ if (!eq) ++ continue; ++ ++ mana_gd_destroy_queue(gc, eq); ++ } ++ ++ kfree(apc->eqs); ++ apc->eqs = NULL; ++} ++ ++static int mana_create_eq(struct mana_port_context *apc) ++{ ++ struct gdma_dev *gd = apc->ac->gdma_dev; ++ struct gdma_queue_spec spec = {}; ++ int err; ++ int i; ++ ++ apc->eqs = kcalloc(apc->num_queues, sizeof(struct mana_eq), ++ GFP_KERNEL); ++ if (!apc->eqs) ++ return -ENOMEM; ++ ++ spec.type = GDMA_EQ; ++ spec.monitor_avl_buf = false; ++ spec.queue_size = EQ_SIZE; ++ spec.eq.callback = NULL; ++ spec.eq.context = apc->eqs; ++ spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; ++ spec.eq.ndev = apc->ndev; ++ ++ for (i = 0; i < apc->num_queues; i++) { ++ mana_init_cqe_poll_buf(apc->eqs[i].cqe_poll); ++ ++ err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq); ++ if (err) ++ goto out; ++ } ++ ++ return 0; ++out: ++ mana_destroy_eq(gd->gdma_context, apc); ++ return err; ++} ++ ++static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) ++{ ++ u32 used_space_old; ++ u32 used_space_new; ++ ++ used_space_old = wq->head - wq->tail; ++ used_space_new = wq->head - (wq->tail + num_units); ++ ++ if (WARN_ON_ONCE(used_space_new > used_space_old)) ++ return -ERANGE; ++ ++ wq->tail += num_units; ++ return 0; ++} ++ ++static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) ++{ ++ struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; ++ struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; ++ struct device *dev = gc->dev; ++ int i; ++ ++ dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); ++ ++ for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) ++ dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], ++ DMA_TO_DEVICE); ++} ++ ++static void mana_poll_tx_cq(struct mana_cq *cq) ++{ ++ struct gdma_queue *gdma_eq = cq->gdma_cq->cq.parent; ++ struct gdma_comp *completions = cq->gdma_comp_buf; ++ struct gdma_posted_wqe_info *wqe_info; ++ unsigned int pkt_transmitted = 0; ++ unsigned int wqe_unit_cnt = 0; ++ struct mana_txq *txq = cq->txq; ++ struct mana_port_context *apc; ++ struct netdev_queue *net_txq; ++ struct gdma_queue *gdma_wq; ++ unsigned int avail_space; ++ struct net_device *ndev; ++ struct sk_buff *skb; ++ bool txq_stopped; ++ int comp_read; ++ int i; ++ ++ ndev = txq->ndev; ++ apc = netdev_priv(ndev); ++ ++ comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, ++ CQE_POLLING_BUFFER); ++ ++ for (i = 0; i < comp_read; i++) { ++ struct mana_tx_comp_oob *cqe_oob; ++ ++ if (WARN_ON_ONCE(!completions[i].is_sq)) ++ return; ++ ++ cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data; ++ if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type != ++ MANA_CQE_COMPLETION)) ++ return; ++ ++ switch (cqe_oob->cqe_hdr.cqe_type) { ++ case CQE_TX_OKAY: ++ break; ++ ++ case CQE_TX_SA_DROP: ++ case CQE_TX_MTU_DROP: ++ case CQE_TX_INVALID_OOB: ++ case CQE_TX_INVALID_ETH_TYPE: ++ case CQE_TX_HDR_PROCESSING_ERROR: ++ case CQE_TX_VF_DISABLED: ++ case CQE_TX_VPORT_IDX_OUT_OF_RANGE: ++ case CQE_TX_VPORT_DISABLED: ++ case CQE_TX_VLAN_TAGGING_VIOLATION: ++ WARN_ONCE(1, "TX: CQE error %d: ignored.\n", ++ cqe_oob->cqe_hdr.cqe_type); ++ break; ++ ++ default: ++ /* If the CQE type is unexpected, log an error, assert, ++ * and go through the error path. ++ */ ++ WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n", ++ cqe_oob->cqe_hdr.cqe_type); ++ return; ++ } ++ ++ if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) ++ return; ++ ++ skb = skb_dequeue(&txq->pending_skbs); ++ if (WARN_ON_ONCE(!skb)) ++ return; ++ ++ wqe_info = (struct gdma_posted_wqe_info *)skb->cb; ++ wqe_unit_cnt += wqe_info->wqe_size_in_bu; ++ ++ mana_unmap_skb(skb, apc); ++ ++ napi_consume_skb(skb, gdma_eq->eq.budget); ++ ++ pkt_transmitted++; ++ } ++ ++ if (WARN_ON_ONCE(wqe_unit_cnt == 0)) ++ return; ++ ++ mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt); ++ ++ gdma_wq = txq->gdma_sq; ++ avail_space = mana_gd_wq_avail_space(gdma_wq); ++ ++ /* Ensure tail updated before checking q stop */ ++ smp_mb(); ++ ++ net_txq = txq->net_txq; ++ txq_stopped = netif_tx_queue_stopped(net_txq); ++ ++ /* Ensure checking txq_stopped before apc->port_is_up. */ ++ smp_rmb(); ++ ++ if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { ++ netif_tx_wake_queue(net_txq); ++ apc->eth_stats.wake_queue++; ++ } ++ ++ if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) ++ WARN_ON_ONCE(1); ++} ++ ++static void mana_post_pkt_rxq(struct mana_rxq *rxq) ++{ ++ struct mana_recv_buf_oob *recv_buf_oob; ++ u32 curr_index; ++ int err; ++ ++ curr_index = rxq->buf_index++; ++ if (rxq->buf_index == rxq->num_rx_buf) ++ rxq->buf_index = 0; ++ ++ recv_buf_oob = &rxq->rx_oobs[curr_index]; ++ ++ err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req, ++ &recv_buf_oob->wqe_inf); ++ if (WARN_ON_ONCE(err)) ++ return; ++ ++ WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); ++} ++ ++static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, ++ struct mana_rxq *rxq) ++{ ++ struct mana_stats *rx_stats = &rxq->stats; ++ struct net_device *ndev = rxq->ndev; ++ uint pkt_len = cqe->ppi[0].pkt_len; ++ struct mana_port_context *apc; ++ u16 rxq_idx = rxq->rxq_idx; ++ struct napi_struct *napi; ++ struct gdma_queue *eq; ++ struct sk_buff *skb; ++ u32 hash_value; ++ ++ apc = netdev_priv(ndev); ++ eq = apc->eqs[rxq_idx].eq; ++ eq->eq.work_done++; ++ napi = &eq->eq.napi; ++ ++ if (!buf_va) { ++ ++ndev->stats.rx_dropped; ++ return; ++ } ++ ++ skb = build_skb(buf_va, PAGE_SIZE); ++ ++ if (!skb) { ++ free_page((unsigned long)buf_va); ++ ++ndev->stats.rx_dropped; ++ return; ++ } ++ ++ skb_put(skb, pkt_len); ++ skb->dev = napi->dev; ++ ++ skb->protocol = eth_type_trans(skb, ndev); ++ skb_checksum_none_assert(skb); ++ skb_record_rx_queue(skb, rxq_idx); ++ ++ if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) { ++ if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ } ++ ++ if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) { ++ hash_value = cqe->ppi[0].pkt_hash; ++ ++ if (cqe->rx_hashtype & MANA_HASH_L4) ++ skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4); ++ else ++ skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); ++ } ++ ++ napi_gro_receive(napi, skb); ++ ++ u64_stats_update_begin(&rx_stats->syncp); ++ rx_stats->packets++; ++ rx_stats->bytes += pkt_len; ++ u64_stats_update_end(&rx_stats->syncp); ++} ++ ++static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, ++ struct gdma_comp *cqe) ++{ ++ struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data; ++ struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; ++ struct net_device *ndev = rxq->ndev; ++ struct mana_recv_buf_oob *rxbuf_oob; ++ struct device *dev = gc->dev; ++ void *new_buf, *old_buf; ++ struct page *new_page; ++ u32 curr, pktlen; ++ dma_addr_t da; ++ ++ switch (oob->cqe_hdr.cqe_type) { ++ case CQE_RX_OKAY: ++ break; ++ ++ case CQE_RX_TRUNCATED: ++ netdev_err(ndev, "Dropped a truncated packet\n"); ++ return; ++ ++ case CQE_RX_COALESCED_4: ++ netdev_err(ndev, "RX coalescing is unsupported\n"); ++ return; ++ ++ case CQE_RX_OBJECT_FENCE: ++ netdev_err(ndev, "RX Fencing is unsupported\n"); ++ return; ++ ++ default: ++ netdev_err(ndev, "Unknown RX CQE type = %d\n", ++ oob->cqe_hdr.cqe_type); ++ return; ++ } ++ ++ if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY) ++ return; ++ ++ pktlen = oob->ppi[0].pkt_len; ++ ++ if (pktlen == 0) { ++ /* data packets should never have packetlength of zero */ ++ netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", ++ rxq->gdma_id, cq->gdma_id, rxq->rxobj); ++ return; ++ } ++ ++ curr = rxq->buf_index; ++ rxbuf_oob = &rxq->rx_oobs[curr]; ++ WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); ++ ++ new_page = alloc_page(GFP_ATOMIC); ++ ++ if (new_page) { ++ da = dma_map_page(dev, new_page, 0, rxq->datasize, ++ DMA_FROM_DEVICE); ++ ++ if (dma_mapping_error(dev, da)) { ++ __free_page(new_page); ++ new_page = NULL; ++ } ++ } ++ ++ new_buf = new_page ? page_to_virt(new_page) : NULL; ++ ++ if (new_buf) { ++ dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize, ++ DMA_FROM_DEVICE); ++ ++ old_buf = rxbuf_oob->buf_va; ++ ++ /* refresh the rxbuf_oob with the new page */ ++ rxbuf_oob->buf_va = new_buf; ++ rxbuf_oob->buf_dma_addr = da; ++ rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr; ++ } else { ++ old_buf = NULL; /* drop the packet if no memory */ ++ } ++ ++ mana_rx_skb(old_buf, oob, rxq); ++ ++ mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); ++ ++ mana_post_pkt_rxq(rxq); ++} ++ ++static void mana_poll_rx_cq(struct mana_cq *cq) ++{ ++ struct gdma_comp *comp = cq->gdma_comp_buf; ++ u32 comp_read, i; ++ ++ comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); ++ WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); ++ ++ for (i = 0; i < comp_read; i++) { ++ if (WARN_ON_ONCE(comp[i].is_sq)) ++ return; ++ ++ /* verify recv cqe references the right rxq */ ++ if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) ++ return; ++ ++ mana_process_rx_cqe(cq->rxq, cq, &comp[i]); ++ } ++} ++ ++static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) ++{ ++ struct mana_cq *cq = context; ++ ++ WARN_ON_ONCE(cq->gdma_cq != gdma_queue); ++ ++ if (cq->type == MANA_CQ_TYPE_RX) ++ mana_poll_rx_cq(cq); ++ else ++ mana_poll_tx_cq(cq); ++ ++ mana_gd_arm_cq(gdma_queue); ++} ++ ++static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) ++{ ++ struct gdma_dev *gd = apc->ac->gdma_dev; ++ ++ if (!cq->gdma_cq) ++ return; ++ ++ mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq); ++} ++ ++static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) ++{ ++ struct gdma_dev *gd = apc->ac->gdma_dev; ++ ++ if (!txq->gdma_sq) ++ return; ++ ++ mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); ++} ++ ++static void mana_destroy_txq(struct mana_port_context *apc) ++{ ++ int i; ++ ++ if (!apc->tx_qp) ++ return; ++ ++ for (i = 0; i < apc->num_queues; i++) { ++ mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); ++ ++ mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); ++ ++ mana_deinit_txq(apc, &apc->tx_qp[i].txq); ++ } ++ ++ kfree(apc->tx_qp); ++ apc->tx_qp = NULL; ++} ++ ++static int mana_create_txq(struct mana_port_context *apc, ++ struct net_device *net) ++{ ++ struct gdma_dev *gd = apc->ac->gdma_dev; ++ struct mana_obj_spec wq_spec; ++ struct mana_obj_spec cq_spec; ++ struct gdma_queue_spec spec; ++ struct gdma_context *gc; ++ struct mana_txq *txq; ++ struct mana_cq *cq; ++ u32 txq_size; ++ u32 cq_size; ++ int err; ++ int i; ++ ++ apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp), ++ GFP_KERNEL); ++ if (!apc->tx_qp) ++ return -ENOMEM; ++ ++ /* The minimum size of the WQE is 32 bytes, hence ++ * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs ++ * the SQ can store. This value is then used to size other queues ++ * to prevent overflow. ++ */ ++ txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; ++ BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); ++ ++ cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; ++ cq_size = PAGE_ALIGN(cq_size); ++ ++ gc = gd->gdma_context; ++ ++ for (i = 0; i < apc->num_queues; i++) { ++ apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE; ++ ++ /* Create SQ */ ++ txq = &apc->tx_qp[i].txq; ++ ++ u64_stats_init(&txq->stats.syncp); ++ txq->ndev = net; ++ txq->net_txq = netdev_get_tx_queue(net, i); ++ txq->vp_offset = apc->tx_vp_offset; ++ skb_queue_head_init(&txq->pending_skbs); ++ ++ memset(&spec, 0, sizeof(spec)); ++ spec.type = GDMA_SQ; ++ spec.monitor_avl_buf = true; ++ spec.queue_size = txq_size; ++ err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq); ++ if (err) ++ goto out; ++ ++ /* Create SQ's CQ */ ++ cq = &apc->tx_qp[i].tx_cq; ++ cq->gdma_comp_buf = apc->eqs[i].cqe_poll; ++ cq->type = MANA_CQ_TYPE_TX; ++ ++ cq->txq = txq; ++ ++ memset(&spec, 0, sizeof(spec)); ++ spec.type = GDMA_CQ; ++ spec.monitor_avl_buf = false; ++ spec.queue_size = cq_size; ++ spec.cq.callback = mana_cq_handler; ++ spec.cq.parent_eq = apc->eqs[i].eq; ++ spec.cq.context = cq; ++ err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); ++ if (err) ++ goto out; ++ ++ memset(&wq_spec, 0, sizeof(wq_spec)); ++ memset(&cq_spec, 0, sizeof(cq_spec)); ++ ++ wq_spec.gdma_region = txq->gdma_sq->mem_info.gdma_region; ++ wq_spec.queue_size = txq->gdma_sq->queue_size; ++ ++ cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; ++ cq_spec.queue_size = cq->gdma_cq->queue_size; ++ cq_spec.modr_ctx_id = 0; ++ cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; ++ ++ err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ, ++ &wq_spec, &cq_spec, ++ &apc->tx_qp[i].tx_object); ++ ++ if (err) ++ goto out; ++ ++ txq->gdma_sq->id = wq_spec.queue_index; ++ cq->gdma_cq->id = cq_spec.queue_index; ++ ++ txq->gdma_sq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; ++ cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; ++ ++ txq->gdma_txq_id = txq->gdma_sq->id; ++ ++ cq->gdma_id = cq->gdma_cq->id; ++ ++ if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) ++ return -EINVAL; ++ ++ gc->cq_table[cq->gdma_id] = cq->gdma_cq; ++ ++ mana_gd_arm_cq(cq->gdma_cq); ++ } ++ ++ return 0; ++out: ++ mana_destroy_txq(apc); ++ return err; ++} ++ ++static void mana_napi_sync_for_rx(struct mana_rxq *rxq) ++{ ++ struct net_device *ndev = rxq->ndev; ++ struct mana_port_context *apc; ++ u16 rxq_idx = rxq->rxq_idx; ++ struct napi_struct *napi; ++ struct gdma_queue *eq; ++ ++ apc = netdev_priv(ndev); ++ eq = apc->eqs[rxq_idx].eq; ++ napi = &eq->eq.napi; ++ ++ napi_synchronize(napi); ++} ++ ++static void mana_destroy_rxq(struct mana_port_context *apc, ++ struct mana_rxq *rxq, bool validate_state) ++ ++{ ++ struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; ++ struct mana_recv_buf_oob *rx_oob; ++ struct device *dev = gc->dev; ++ int i; ++ ++ if (!rxq) ++ return; ++ ++ if (validate_state) ++ mana_napi_sync_for_rx(rxq); ++ ++ mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); ++ ++ mana_deinit_cq(apc, &rxq->rx_cq); ++ ++ for (i = 0; i < rxq->num_rx_buf; i++) { ++ rx_oob = &rxq->rx_oobs[i]; ++ ++ if (!rx_oob->buf_va) ++ continue; ++ ++ dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize, ++ DMA_FROM_DEVICE); ++ ++ free_page((unsigned long)rx_oob->buf_va); ++ rx_oob->buf_va = NULL; ++ } ++ ++ if (rxq->gdma_rq) ++ mana_gd_destroy_queue(gc, rxq->gdma_rq); ++ ++ kfree(rxq); ++} ++ ++#define MANA_WQE_HEADER_SIZE 16 ++#define MANA_WQE_SGE_SIZE 16 ++ ++static int mana_alloc_rx_wqe(struct mana_port_context *apc, ++ struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size) ++{ ++ struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; ++ struct mana_recv_buf_oob *rx_oob; ++ struct device *dev = gc->dev; ++ struct page *page; ++ dma_addr_t da; ++ u32 buf_idx; ++ ++ WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE); ++ ++ *rxq_size = 0; ++ *cq_size = 0; ++ ++ for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { ++ rx_oob = &rxq->rx_oobs[buf_idx]; ++ memset(rx_oob, 0, sizeof(*rx_oob)); ++ ++ page = alloc_page(GFP_KERNEL); ++ if (!page) ++ return -ENOMEM; ++ ++ da = dma_map_page(dev, page, 0, rxq->datasize, DMA_FROM_DEVICE); ++ ++ if (dma_mapping_error(dev, da)) { ++ __free_page(page); ++ return -ENOMEM; ++ } ++ ++ rx_oob->buf_va = page_to_virt(page); ++ rx_oob->buf_dma_addr = da; ++ ++ rx_oob->num_sge = 1; ++ rx_oob->sgl[0].address = rx_oob->buf_dma_addr; ++ rx_oob->sgl[0].size = rxq->datasize; ++ rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey; ++ ++ rx_oob->wqe_req.sgl = rx_oob->sgl; ++ rx_oob->wqe_req.num_sge = rx_oob->num_sge; ++ rx_oob->wqe_req.inline_oob_size = 0; ++ rx_oob->wqe_req.inline_oob_data = NULL; ++ rx_oob->wqe_req.flags = 0; ++ rx_oob->wqe_req.client_data_unit = 0; ++ ++ *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE + ++ MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32); ++ *cq_size += COMP_ENTRY_SIZE; ++ } ++ ++ return 0; ++} ++ ++static int mana_push_wqe(struct mana_rxq *rxq) ++{ ++ struct mana_recv_buf_oob *rx_oob; ++ u32 buf_idx; ++ int err; ++ ++ for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { ++ rx_oob = &rxq->rx_oobs[buf_idx]; ++ ++ err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req, ++ &rx_oob->wqe_inf); ++ if (err) ++ return -ENOSPC; ++ } ++ ++ return 0; ++} ++ ++static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, ++ u32 rxq_idx, struct mana_eq *eq, ++ struct net_device *ndev) ++{ ++ struct gdma_dev *gd = apc->ac->gdma_dev; ++ struct mana_obj_spec wq_spec; ++ struct mana_obj_spec cq_spec; ++ struct gdma_queue_spec spec; ++ struct mana_cq *cq = NULL; ++ struct gdma_context *gc; ++ u32 cq_size, rq_size; ++ struct mana_rxq *rxq; ++ int err; ++ ++ gc = gd->gdma_context; ++ ++ rxq = kzalloc(sizeof(*rxq) + ++ RX_BUFFERS_PER_QUEUE * sizeof(struct mana_recv_buf_oob), ++ GFP_KERNEL); ++ if (!rxq) ++ return NULL; ++ ++ rxq->ndev = ndev; ++ rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; ++ rxq->rxq_idx = rxq_idx; ++ rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64); ++ rxq->rxobj = INVALID_MANA_HANDLE; ++ ++ err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); ++ if (err) ++ goto out; ++ ++ rq_size = PAGE_ALIGN(rq_size); ++ cq_size = PAGE_ALIGN(cq_size); ++ ++ /* Create RQ */ ++ memset(&spec, 0, sizeof(spec)); ++ spec.type = GDMA_RQ; ++ spec.monitor_avl_buf = true; ++ spec.queue_size = rq_size; ++ err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq); ++ if (err) ++ goto out; ++ ++ /* Create RQ's CQ */ ++ cq = &rxq->rx_cq; ++ cq->gdma_comp_buf = eq->cqe_poll; ++ cq->type = MANA_CQ_TYPE_RX; ++ cq->rxq = rxq; ++ ++ memset(&spec, 0, sizeof(spec)); ++ spec.type = GDMA_CQ; ++ spec.monitor_avl_buf = false; ++ spec.queue_size = cq_size; ++ spec.cq.callback = mana_cq_handler; ++ spec.cq.parent_eq = eq->eq; ++ spec.cq.context = cq; ++ err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); ++ if (err) ++ goto out; ++ ++ memset(&wq_spec, 0, sizeof(wq_spec)); ++ memset(&cq_spec, 0, sizeof(cq_spec)); ++ wq_spec.gdma_region = rxq->gdma_rq->mem_info.gdma_region; ++ wq_spec.queue_size = rxq->gdma_rq->queue_size; ++ ++ cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; ++ cq_spec.queue_size = cq->gdma_cq->queue_size; ++ cq_spec.modr_ctx_id = 0; ++ cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; ++ ++ err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ, ++ &wq_spec, &cq_spec, &rxq->rxobj); ++ if (err) ++ goto out; ++ ++ rxq->gdma_rq->id = wq_spec.queue_index; ++ cq->gdma_cq->id = cq_spec.queue_index; ++ ++ rxq->gdma_rq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; ++ cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; ++ ++ rxq->gdma_id = rxq->gdma_rq->id; ++ cq->gdma_id = cq->gdma_cq->id; ++ ++ err = mana_push_wqe(rxq); ++ if (err) ++ goto out; ++ ++ if (cq->gdma_id >= gc->max_num_cqs) ++ goto out; ++ ++ gc->cq_table[cq->gdma_id] = cq->gdma_cq; ++ ++ mana_gd_arm_cq(cq->gdma_cq); ++out: ++ if (!err) ++ return rxq; ++ ++ netdev_err(ndev, "Failed to create RXQ: err = %d\n", err); ++ ++ mana_destroy_rxq(apc, rxq, false); ++ ++ if (cq) ++ mana_deinit_cq(apc, cq); ++ ++ return NULL; ++} ++ ++static int mana_add_rx_queues(struct mana_port_context *apc, ++ struct net_device *ndev) ++{ ++ struct mana_rxq *rxq; ++ int err = 0; ++ int i; ++ ++ for (i = 0; i < apc->num_queues; i++) { ++ rxq = mana_create_rxq(apc, i, &apc->eqs[i], ndev); ++ if (!rxq) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ u64_stats_init(&rxq->stats.syncp); ++ ++ apc->rxqs[i] = rxq; ++ } ++ ++ apc->default_rxobj = apc->rxqs[0]->rxobj; ++out: ++ return err; ++} ++ ++static void mana_destroy_vport(struct mana_port_context *apc) ++{ ++ struct mana_rxq *rxq; ++ u32 rxq_idx; ++ ++ for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { ++ rxq = apc->rxqs[rxq_idx]; ++ if (!rxq) ++ continue; ++ ++ mana_destroy_rxq(apc, rxq, true); ++ apc->rxqs[rxq_idx] = NULL; ++ } ++ ++ mana_destroy_txq(apc); ++} ++ ++static int mana_create_vport(struct mana_port_context *apc, ++ struct net_device *net) ++{ ++ struct gdma_dev *gd = apc->ac->gdma_dev; ++ int err; ++ ++ apc->default_rxobj = INVALID_MANA_HANDLE; ++ ++ err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); ++ if (err) ++ return err; ++ ++ return mana_create_txq(apc, net); ++} ++ ++static void mana_rss_table_init(struct mana_port_context *apc) ++{ ++ int i; ++ ++ for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) ++ apc->indir_table[i] = ++ ethtool_rxfh_indir_default(i, apc->num_queues); ++} ++ ++int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, ++ bool update_hash, bool update_tab) ++{ ++ u32 queue_idx; ++ int i; ++ ++ if (update_tab) { ++ for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { ++ queue_idx = apc->indir_table[i]; ++ apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; ++ } ++ } ++ ++ return mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); ++} ++ ++static int mana_init_port(struct net_device *ndev) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ u32 max_txq, max_rxq, max_queues; ++ int port_idx = apc->port_idx; ++ u32 num_indirect_entries; ++ int err; ++ ++ err = mana_init_port_context(apc); ++ if (err) ++ return err; ++ ++ err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, ++ &num_indirect_entries); ++ if (err) { ++ netdev_err(ndev, "Failed to query info for vPort 0\n"); ++ goto reset_apc; ++ } ++ ++ max_queues = min_t(u32, max_txq, max_rxq); ++ if (apc->max_queues > max_queues) ++ apc->max_queues = max_queues; ++ ++ if (apc->num_queues > apc->max_queues) ++ apc->num_queues = apc->max_queues; ++ ++ ether_addr_copy(ndev->dev_addr, apc->mac_addr); ++ ++ return 0; ++ ++reset_apc: ++ kfree(apc->rxqs); ++ apc->rxqs = NULL; ++ return err; ++} ++ ++int mana_alloc_queues(struct net_device *ndev) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ struct gdma_dev *gd = apc->ac->gdma_dev; ++ int err; ++ ++ err = mana_create_eq(apc); ++ if (err) ++ return err; ++ ++ err = mana_create_vport(apc, ndev); ++ if (err) ++ goto destroy_eq; ++ ++ err = netif_set_real_num_tx_queues(ndev, apc->num_queues); ++ if (err) ++ goto destroy_vport; ++ ++ err = mana_add_rx_queues(apc, ndev); ++ if (err) ++ goto destroy_vport; ++ ++ apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; ++ ++ err = netif_set_real_num_rx_queues(ndev, apc->num_queues); ++ if (err) ++ goto destroy_vport; ++ ++ mana_rss_table_init(apc); ++ ++ err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); ++ if (err) ++ goto destroy_vport; ++ ++ return 0; ++ ++destroy_vport: ++ mana_destroy_vport(apc); ++destroy_eq: ++ mana_destroy_eq(gd->gdma_context, apc); ++ return err; ++} ++ ++int mana_attach(struct net_device *ndev) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ int err; ++ ++ ASSERT_RTNL(); ++ ++ err = mana_init_port(ndev); ++ if (err) ++ return err; ++ ++ err = mana_alloc_queues(ndev); ++ if (err) { ++ kfree(apc->rxqs); ++ apc->rxqs = NULL; ++ return err; ++ } ++ ++ netif_device_attach(ndev); ++ ++ apc->port_is_up = apc->port_st_save; ++ ++ /* Ensure port state updated before txq state */ ++ smp_wmb(); ++ ++ if (apc->port_is_up) { ++ netif_carrier_on(ndev); ++ netif_tx_wake_all_queues(ndev); ++ } ++ ++ return 0; ++} ++ ++static int mana_dealloc_queues(struct net_device *ndev) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ struct mana_txq *txq; ++ int i, err; ++ ++ if (apc->port_is_up) ++ return -EINVAL; ++ ++ /* No packet can be transmitted now since apc->port_is_up is false. ++ * There is still a tiny chance that mana_poll_tx_cq() can re-enable ++ * a txq because it may not timely see apc->port_is_up being cleared ++ * to false, but it doesn't matter since mana_start_xmit() drops any ++ * new packets due to apc->port_is_up being false. ++ * ++ * Drain all the in-flight TX packets ++ */ ++ for (i = 0; i < apc->num_queues; i++) { ++ txq = &apc->tx_qp[i].txq; ++ ++ while (atomic_read(&txq->pending_sends) > 0) ++ usleep_range(1000, 2000); ++ } ++ ++ /* We're 100% sure the queues can no longer be woken up, because ++ * we're sure now mana_poll_tx_cq() can't be running. ++ */ ++ ++ apc->rss_state = TRI_STATE_FALSE; ++ err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); ++ if (err) { ++ netdev_err(ndev, "Failed to disable vPort: %d\n", err); ++ return err; ++ } ++ ++ /* TODO: Implement RX fencing */ ++ ssleep(1); ++ ++ mana_destroy_vport(apc); ++ ++ mana_destroy_eq(apc->ac->gdma_dev->gdma_context, apc); ++ ++ return 0; ++} ++ ++int mana_detach(struct net_device *ndev, bool from_close) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ int err; ++ ++ ASSERT_RTNL(); ++ ++ apc->port_st_save = apc->port_is_up; ++ apc->port_is_up = false; ++ ++ /* Ensure port state updated before txq state */ ++ smp_wmb(); ++ ++ netif_tx_disable(ndev); ++ netif_carrier_off(ndev); ++ ++ if (apc->port_st_save) { ++ err = mana_dealloc_queues(ndev); ++ if (err) ++ return err; ++ } ++ ++ if (!from_close) { ++ netif_device_detach(ndev); ++ mana_cleanup_port_context(apc); ++ } ++ ++ return 0; ++} ++ ++static int mana_probe_port(struct mana_context *ac, int port_idx, ++ struct net_device **ndev_storage) ++{ ++ struct gdma_context *gc = ac->gdma_dev->gdma_context; ++ struct mana_port_context *apc; ++ struct net_device *ndev; ++ int err; ++ ++ ndev = alloc_etherdev_mq(sizeof(struct mana_port_context), ++ gc->max_num_queues); ++ if (!ndev) ++ return -ENOMEM; ++ ++ *ndev_storage = ndev; ++ ++ apc = netdev_priv(ndev); ++ apc->ac = ac; ++ apc->ndev = ndev; ++ apc->max_queues = gc->max_num_queues; ++ apc->num_queues = min_t(uint, gc->max_num_queues, MANA_MAX_NUM_QUEUES); ++ apc->port_handle = INVALID_MANA_HANDLE; ++ apc->port_idx = port_idx; ++ ++ ndev->netdev_ops = &mana_devops; ++ ndev->ethtool_ops = &mana_ethtool_ops; ++ ndev->mtu = ETH_DATA_LEN; ++ ndev->max_mtu = ndev->mtu; ++ ndev->min_mtu = ndev->mtu; ++ ndev->needed_headroom = MANA_HEADROOM; ++ SET_NETDEV_DEV(ndev, gc->dev); ++ ++ netif_carrier_off(ndev); ++ ++ netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); ++ ++ err = mana_init_port(ndev); ++ if (err) ++ goto free_net; ++ ++ netdev_lockdep_set_classes(ndev); ++ ++ ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; ++ ndev->hw_features |= NETIF_F_RXCSUM; ++ ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; ++ ndev->hw_features |= NETIF_F_RXHASH; ++ ndev->features = ndev->hw_features; ++ ndev->vlan_features = 0; ++ ++ err = register_netdev(ndev); ++ if (err) { ++ netdev_err(ndev, "Unable to register netdev.\n"); ++ goto reset_apc; ++ } ++ ++ return 0; ++ ++reset_apc: ++ kfree(apc->rxqs); ++ apc->rxqs = NULL; ++free_net: ++ *ndev_storage = NULL; ++ netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); ++ free_netdev(ndev); ++ return err; ++} ++ ++int mana_probe(struct gdma_dev *gd) ++{ ++ struct gdma_context *gc = gd->gdma_context; ++ struct device *dev = gc->dev; ++ struct mana_context *ac; ++ int err; ++ int i; ++ ++ dev_info(dev, ++ "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n", ++ MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); ++ ++ err = mana_gd_register_device(gd); ++ if (err) ++ return err; ++ ++ ac = kzalloc(sizeof(*ac), GFP_KERNEL); ++ if (!ac) ++ return -ENOMEM; ++ ++ ac->gdma_dev = gd; ++ ac->num_ports = 1; ++ gd->driver_data = ac; ++ ++ err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, ++ MANA_MICRO_VERSION, &ac->num_ports); ++ if (err) ++ goto out; ++ ++ if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) ++ ac->num_ports = MAX_PORTS_IN_MANA_DEV; ++ ++ for (i = 0; i < ac->num_ports; i++) { ++ err = mana_probe_port(ac, i, &ac->ports[i]); ++ if (err) ++ break; ++ } ++out: ++ if (err) ++ mana_remove(gd); ++ ++ return err; ++} ++ ++void mana_remove(struct gdma_dev *gd) ++{ ++ struct gdma_context *gc = gd->gdma_context; ++ struct mana_context *ac = gd->driver_data; ++ struct device *dev = gc->dev; ++ struct net_device *ndev; ++ int i; ++ ++ for (i = 0; i < ac->num_ports; i++) { ++ ndev = ac->ports[i]; ++ if (!ndev) { ++ if (i == 0) ++ dev_err(dev, "No net device to remove\n"); ++ goto out; ++ } ++ ++ /* All cleanup actions should stay after rtnl_lock(), otherwise ++ * other functions may access partially cleaned up data. ++ */ ++ rtnl_lock(); ++ ++ mana_detach(ndev, false); ++ ++ unregister_netdevice(ndev); ++ ++ rtnl_unlock(); ++ ++ free_netdev(ndev); ++ } ++out: ++ mana_gd_deregister_device(gd); ++ gd->driver_data = NULL; ++ gd->gdma_context = NULL; ++ kfree(ac); ++} +diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +new file mode 100644 +index 000000000000..7e74339f39ae +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +@@ -0,0 +1,250 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause ++/* Copyright (c) 2021, Microsoft Corporation. */ ++ ++#include ++#include ++#include ++ ++#include "mana.h" ++ ++static const struct { ++ char name[ETH_GSTRING_LEN]; ++ u16 offset; ++} mana_eth_stats[] = { ++ {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, ++ {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, ++}; ++ ++static int mana_get_sset_count(struct net_device *ndev, int stringset) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ unsigned int num_queues = apc->num_queues; ++ ++ if (stringset != ETH_SS_STATS) ++ return -EINVAL; ++ ++ return ARRAY_SIZE(mana_eth_stats) + num_queues * 4; ++} ++ ++static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ unsigned int num_queues = apc->num_queues; ++ u8 *p = data; ++ int i; ++ ++ if (stringset != ETH_SS_STATS) ++ return; ++ ++ for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) { ++ memcpy(p, mana_eth_stats[i].name, ETH_GSTRING_LEN); ++ p += ETH_GSTRING_LEN; ++ } ++ ++ for (i = 0; i < num_queues; i++) { ++ sprintf(p, "rx_%d_packets", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "rx_%d_bytes", i); ++ p += ETH_GSTRING_LEN; ++ } ++ ++ for (i = 0; i < num_queues; i++) { ++ sprintf(p, "tx_%d_packets", i); ++ p += ETH_GSTRING_LEN; ++ sprintf(p, "tx_%d_bytes", i); ++ p += ETH_GSTRING_LEN; ++ } ++} ++ ++static void mana_get_ethtool_stats(struct net_device *ndev, ++ struct ethtool_stats *e_stats, u64 *data) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ unsigned int num_queues = apc->num_queues; ++ void *eth_stats = &apc->eth_stats; ++ struct mana_stats *stats; ++ unsigned int start; ++ u64 packets, bytes; ++ int q, i = 0; ++ ++ if (!apc->port_is_up) ++ return; ++ ++ for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++) ++ data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); ++ ++ for (q = 0; q < num_queues; q++) { ++ stats = &apc->rxqs[q]->stats; ++ ++ do { ++ start = u64_stats_fetch_begin_irq(&stats->syncp); ++ packets = stats->packets; ++ bytes = stats->bytes; ++ } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ ++ data[i++] = packets; ++ data[i++] = bytes; ++ } ++ ++ for (q = 0; q < num_queues; q++) { ++ stats = &apc->tx_qp[q].txq.stats; ++ ++ do { ++ start = u64_stats_fetch_begin_irq(&stats->syncp); ++ packets = stats->packets; ++ bytes = stats->bytes; ++ } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ ++ data[i++] = packets; ++ data[i++] = bytes; ++ } ++} ++ ++static int mana_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *cmd, ++ u32 *rules) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ ++ switch (cmd->cmd) { ++ case ETHTOOL_GRXRINGS: ++ cmd->data = apc->num_queues; ++ return 0; ++ } ++ ++ return -EOPNOTSUPP; ++} ++ ++static u32 mana_get_rxfh_key_size(struct net_device *ndev) ++{ ++ return MANA_HASH_KEY_SIZE; ++} ++ ++static u32 mana_rss_indir_size(struct net_device *ndev) ++{ ++ return MANA_INDIRECT_TABLE_SIZE; ++} ++ ++static int mana_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key, ++ u8 *hfunc) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ int i; ++ ++ if (hfunc) ++ *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ ++ ++ if (indir) { ++ for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) ++ indir[i] = apc->indir_table[i]; ++ } ++ ++ if (key) ++ memcpy(key, apc->hashkey, MANA_HASH_KEY_SIZE); ++ ++ return 0; ++} ++ ++static int mana_set_rxfh(struct net_device *ndev, const u32 *indir, ++ const u8 *key, const u8 hfunc) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ bool update_hash = false, update_table = false; ++ u32 save_table[MANA_INDIRECT_TABLE_SIZE]; ++ u8 save_key[MANA_HASH_KEY_SIZE]; ++ int i, err; ++ ++ if (!apc->port_is_up) ++ return -EOPNOTSUPP; ++ ++ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) ++ return -EOPNOTSUPP; ++ ++ if (indir) { ++ for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) ++ if (indir[i] >= apc->num_queues) ++ return -EINVAL; ++ ++ update_table = true; ++ for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { ++ save_table[i] = apc->indir_table[i]; ++ apc->indir_table[i] = indir[i]; ++ } ++ } ++ ++ if (key) { ++ update_hash = true; ++ memcpy(save_key, apc->hashkey, MANA_HASH_KEY_SIZE); ++ memcpy(apc->hashkey, key, MANA_HASH_KEY_SIZE); ++ } ++ ++ err = mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); ++ ++ if (err) { /* recover to original values */ ++ if (update_table) { ++ for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) ++ apc->indir_table[i] = save_table[i]; ++ } ++ ++ if (update_hash) ++ memcpy(apc->hashkey, save_key, MANA_HASH_KEY_SIZE); ++ ++ mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); ++ } ++ ++ return err; ++} ++ ++static void mana_get_channels(struct net_device *ndev, ++ struct ethtool_channels *channel) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ ++ channel->max_combined = apc->max_queues; ++ channel->combined_count = apc->num_queues; ++} ++ ++static int mana_set_channels(struct net_device *ndev, ++ struct ethtool_channels *channels) ++{ ++ struct mana_port_context *apc = netdev_priv(ndev); ++ unsigned int new_count = channels->combined_count; ++ unsigned int old_count = apc->num_queues; ++ int err, err2; ++ ++ if (!apc->port_is_up) ++ return -EOPNOTSUPP; ++ ++ err = mana_detach(ndev, false); ++ if (err) { ++ netdev_err(ndev, "mana_detach failed: %d\n", err); ++ return err; ++ } ++ ++ apc->num_queues = new_count; ++ err = mana_attach(ndev); ++ if (!err) ++ return 0; ++ ++ netdev_err(ndev, "mana_attach failed: %d\n", err); ++ ++ /* Try to roll it back to the old configuration. */ ++ apc->num_queues = old_count; ++ err2 = mana_attach(ndev); ++ if (err2) ++ netdev_err(ndev, "mana re-attach failed: %d\n", err2); ++ ++ return err; ++} ++ ++const struct ethtool_ops mana_ethtool_ops = { ++ .get_ethtool_stats = mana_get_ethtool_stats, ++ .get_sset_count = mana_get_sset_count, ++ .get_strings = mana_get_strings, ++ .get_rxnfc = mana_get_rxnfc, ++ .get_rxfh_key_size = mana_get_rxfh_key_size, ++ .get_rxfh_indir_size = mana_rss_indir_size, ++ .get_rxfh = mana_get_rxfh, ++ .set_rxfh = mana_set_rxfh, ++ .get_channels = mana_get_channels, ++ .set_channels = mana_set_channels, ++}; +diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c +new file mode 100644 +index 000000000000..da255da62176 +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c +@@ -0,0 +1,291 @@ ++// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause ++/* Copyright (c) 2021, Microsoft Corporation. */ ++ ++#include ++#include ++#include ++#include ++ ++#include "shm_channel.h" ++ ++#define PAGE_FRAME_L48_WIDTH_BYTES 6 ++#define PAGE_FRAME_L48_WIDTH_BITS (PAGE_FRAME_L48_WIDTH_BYTES * 8) ++#define PAGE_FRAME_L48_MASK 0x0000FFFFFFFFFFFF ++#define PAGE_FRAME_H4_WIDTH_BITS 4 ++#define VECTOR_MASK 0xFFFF ++#define SHMEM_VF_RESET_STATE ((u32)-1) ++ ++#define SMC_MSG_TYPE_ESTABLISH_HWC 1 ++#define SMC_MSG_TYPE_ESTABLISH_HWC_VERSION 0 ++ ++#define SMC_MSG_TYPE_DESTROY_HWC 2 ++#define SMC_MSG_TYPE_DESTROY_HWC_VERSION 0 ++ ++#define SMC_MSG_DIRECTION_REQUEST 0 ++#define SMC_MSG_DIRECTION_RESPONSE 1 ++ ++/* Structures labeled with "HW DATA" are exchanged with the hardware. All of ++ * them are naturally aligned and hence don't need __packed. ++ */ ++ ++/* Shared memory channel protocol header ++ * ++ * msg_type: set on request and response; response matches request. ++ * msg_version: newer PF writes back older response (matching request) ++ * older PF acts on latest version known and sets that version in result ++ * (less than request). ++ * direction: 0 for request, VF->PF; 1 for response, PF->VF. ++ * status: 0 on request, ++ * operation result on response (success = 0, failure = 1 or greater). ++ * reset_vf: If set on either establish or destroy request, indicates perform ++ * FLR before/after the operation. ++ * owner_is_pf: 1 indicates PF owned, 0 indicates VF owned. ++ */ ++union smc_proto_hdr { ++ u32 as_uint32; ++ ++ struct { ++ u8 msg_type : 3; ++ u8 msg_version : 3; ++ u8 reserved_1 : 1; ++ u8 direction : 1; ++ ++ u8 status; ++ ++ u8 reserved_2; ++ ++ u8 reset_vf : 1; ++ u8 reserved_3 : 6; ++ u8 owner_is_pf : 1; ++ }; ++}; /* HW DATA */ ++ ++#define SMC_APERTURE_BITS 256 ++#define SMC_BASIC_UNIT (sizeof(u32)) ++#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) ++#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) ++ ++static int mana_smc_poll_register(void __iomem *base, bool reset) ++{ ++ void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT; ++ u32 last_dword; ++ int i; ++ ++ /* Poll the hardware for the ownership bit. This should be pretty fast, ++ * but let's do it in a loop just in case the hardware or the PF ++ * driver are temporarily busy. ++ */ ++ for (i = 0; i < 20 * 1000; i++) { ++ last_dword = readl(ptr); ++ ++ /* shmem reads as 0xFFFFFFFF in the reset case */ ++ if (reset && last_dword == SHMEM_VF_RESET_STATE) ++ return 0; ++ ++ /* If bit_31 is set, the PF currently owns the SMC. */ ++ if (!(last_dword & BIT(31))) ++ return 0; ++ ++ usleep_range(1000, 2000); ++ } ++ ++ return -ETIMEDOUT; ++} ++ ++static int mana_smc_read_response(struct shm_channel *sc, u32 msg_type, ++ u32 msg_version, bool reset_vf) ++{ ++ void __iomem *base = sc->base; ++ union smc_proto_hdr hdr; ++ int err; ++ ++ /* Wait for PF to respond. */ ++ err = mana_smc_poll_register(base, reset_vf); ++ if (err) ++ return err; ++ ++ hdr.as_uint32 = readl(base + SMC_LAST_DWORD * SMC_BASIC_UNIT); ++ ++ if (reset_vf && hdr.as_uint32 == SHMEM_VF_RESET_STATE) ++ return 0; ++ ++ /* Validate protocol fields from the PF driver */ ++ if (hdr.msg_type != msg_type || hdr.msg_version > msg_version || ++ hdr.direction != SMC_MSG_DIRECTION_RESPONSE) { ++ dev_err(sc->dev, "Wrong SMC response 0x%x, type=%d, ver=%d\n", ++ hdr.as_uint32, msg_type, msg_version); ++ return -EPROTO; ++ } ++ ++ /* Validate the operation result */ ++ if (hdr.status != 0) { ++ dev_err(sc->dev, "SMC operation failed: 0x%x\n", hdr.status); ++ return -EPROTO; ++ } ++ ++ return 0; ++} ++ ++void mana_smc_init(struct shm_channel *sc, struct device *dev, ++ void __iomem *base) ++{ ++ sc->dev = dev; ++ sc->base = base; ++} ++ ++int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, ++ u64 cq_addr, u64 rq_addr, u64 sq_addr, ++ u32 eq_msix_index) ++{ ++ union smc_proto_hdr *hdr; ++ u16 all_addr_h4bits = 0; ++ u16 frame_addr_seq = 0; ++ u64 frame_addr = 0; ++ u8 shm_buf[32]; ++ u64 *shmem; ++ u32 *dword; ++ u8 *ptr; ++ int err; ++ int i; ++ ++ /* Ensure VF already has possession of shared memory */ ++ err = mana_smc_poll_register(sc->base, false); ++ if (err) { ++ dev_err(sc->dev, "Timeout when setting up HWC: %d\n", err); ++ return err; ++ } ++ ++ if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) || ++ !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr)) ++ return -EINVAL; ++ ++ if ((eq_msix_index & VECTOR_MASK) != eq_msix_index) ++ return -EINVAL; ++ ++ /* Scheme for packing four addresses and extra info into 256 bits. ++ * ++ * Addresses must be page frame aligned, so only frame address bits ++ * are transferred. ++ * ++ * 52-bit frame addresses are split into the lower 48 bits and upper ++ * 4 bits. Lower 48 bits of 4 address are written sequentially from ++ * the start of the 256-bit shared memory region followed by 16 bits ++ * containing the upper 4 bits of the 4 addresses in sequence. ++ * ++ * A 16 bit EQ vector number fills out the next-to-last 32-bit dword. ++ * ++ * The final 32-bit dword is used for protocol control information as ++ * defined in smc_proto_hdr. ++ */ ++ ++ memset(shm_buf, 0, sizeof(shm_buf)); ++ ptr = shm_buf; ++ ++ /* EQ addr: low 48 bits of frame address */ ++ shmem = (u64 *)ptr; ++ frame_addr = PHYS_PFN(eq_addr); ++ *shmem = frame_addr & PAGE_FRAME_L48_MASK; ++ all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << ++ (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); ++ ptr += PAGE_FRAME_L48_WIDTH_BYTES; ++ ++ /* CQ addr: low 48 bits of frame address */ ++ shmem = (u64 *)ptr; ++ frame_addr = PHYS_PFN(cq_addr); ++ *shmem = frame_addr & PAGE_FRAME_L48_MASK; ++ all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << ++ (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); ++ ptr += PAGE_FRAME_L48_WIDTH_BYTES; ++ ++ /* RQ addr: low 48 bits of frame address */ ++ shmem = (u64 *)ptr; ++ frame_addr = PHYS_PFN(rq_addr); ++ *shmem = frame_addr & PAGE_FRAME_L48_MASK; ++ all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << ++ (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); ++ ptr += PAGE_FRAME_L48_WIDTH_BYTES; ++ ++ /* SQ addr: low 48 bits of frame address */ ++ shmem = (u64 *)ptr; ++ frame_addr = PHYS_PFN(sq_addr); ++ *shmem = frame_addr & PAGE_FRAME_L48_MASK; ++ all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << ++ (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); ++ ptr += PAGE_FRAME_L48_WIDTH_BYTES; ++ ++ /* High 4 bits of the four frame addresses */ ++ *((u16 *)ptr) = all_addr_h4bits; ++ ptr += sizeof(u16); ++ ++ /* EQ MSIX vector number */ ++ *((u16 *)ptr) = (u16)eq_msix_index; ++ ptr += sizeof(u16); ++ ++ /* 32-bit protocol header in final dword */ ++ *((u32 *)ptr) = 0; ++ ++ hdr = (union smc_proto_hdr *)ptr; ++ hdr->msg_type = SMC_MSG_TYPE_ESTABLISH_HWC; ++ hdr->msg_version = SMC_MSG_TYPE_ESTABLISH_HWC_VERSION; ++ hdr->direction = SMC_MSG_DIRECTION_REQUEST; ++ hdr->reset_vf = reset_vf; ++ ++ /* Write 256-message buffer to shared memory (final 32-bit write ++ * triggers HW to set possession bit to PF). ++ */ ++ dword = (u32 *)shm_buf; ++ for (i = 0; i < SMC_APERTURE_DWORDS; i++) ++ writel(*dword++, sc->base + i * SMC_BASIC_UNIT); ++ ++ /* Read shmem response (polling for VF possession) and validate. ++ * For setup, waiting for response on shared memory is not strictly ++ * necessary, since wait occurs later for results to appear in EQE's. ++ */ ++ err = mana_smc_read_response(sc, SMC_MSG_TYPE_ESTABLISH_HWC, ++ SMC_MSG_TYPE_ESTABLISH_HWC_VERSION, ++ reset_vf); ++ if (err) { ++ dev_err(sc->dev, "Error when setting up HWC: %d\n", err); ++ return err; ++ } ++ ++ return 0; ++} ++ ++int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf) ++{ ++ union smc_proto_hdr hdr = {}; ++ int err; ++ ++ /* Ensure already has possession of shared memory */ ++ err = mana_smc_poll_register(sc->base, false); ++ if (err) { ++ dev_err(sc->dev, "Timeout when tearing down HWC\n"); ++ return err; ++ } ++ ++ /* Set up protocol header for HWC destroy message */ ++ hdr.msg_type = SMC_MSG_TYPE_DESTROY_HWC; ++ hdr.msg_version = SMC_MSG_TYPE_DESTROY_HWC_VERSION; ++ hdr.direction = SMC_MSG_DIRECTION_REQUEST; ++ hdr.reset_vf = reset_vf; ++ ++ /* Write message in high 32 bits of 256-bit shared memory, causing HW ++ * to set possession bit to PF. ++ */ ++ writel(hdr.as_uint32, sc->base + SMC_LAST_DWORD * SMC_BASIC_UNIT); ++ ++ /* Read shmem response (polling for VF possession) and validate. ++ * For teardown, waiting for response is required to ensure hardware ++ * invalidates MST entries before software frees memory. ++ */ ++ err = mana_smc_read_response(sc, SMC_MSG_TYPE_DESTROY_HWC, ++ SMC_MSG_TYPE_DESTROY_HWC_VERSION, ++ reset_vf); ++ if (err) { ++ dev_err(sc->dev, "Error when tearing down HWC: %d\n", err); ++ return err; ++ } ++ ++ return 0; ++} +diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.h b/drivers/net/ethernet/microsoft/mana/shm_channel.h +new file mode 100644 +index 000000000000..5199b41497ff +--- /dev/null ++++ b/drivers/net/ethernet/microsoft/mana/shm_channel.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ ++/* Copyright (c) 2021, Microsoft Corporation. */ ++ ++#ifndef _SHM_CHANNEL_H ++#define _SHM_CHANNEL_H ++ ++struct shm_channel { ++ struct device *dev; ++ void __iomem *base; ++}; ++ ++void mana_smc_init(struct shm_channel *sc, struct device *dev, ++ void __iomem *base); ++ ++int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, ++ u64 cq_addr, u64 rq_addr, u64 sq_addr, ++ u32 eq_msix_index); ++ ++int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf); ++ ++#endif /* _SHM_CHANNEL_H */ diff --git a/patches.suse/msft-hv-2337-net-mana-remove-redundant-initialization-of-variable.patch b/patches.suse/msft-hv-2337-net-mana-remove-redundant-initialization-of-variable.patch new file mode 100644 index 0000000..99810b2 --- /dev/null +++ b/patches.suse/msft-hv-2337-net-mana-remove-redundant-initialization-of-variable.patch @@ -0,0 +1,31 @@ +From: Colin Ian King +Date: Tue, 20 Apr 2021 13:27:30 +0100 +Patch-mainline: v5.13-rc1 +Subject: net: mana: remove redundant initialization of variable err +Git-commit: 55cdc26a91ac270887583945aef2bd460a2805f7 +References: jsc#SLE-18779, bsc#1185727 + +The variable err is being initialized with a value that is +never read and it is being updated later with a new value. The +initialization is redundant and can be removed + +Addresses-Coverity: ("Unused value") +Signed-off-by: Colin Ian King +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/ethernet/microsoft/mana/hw_channel.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -55,7 +55,7 @@ static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, + const struct gdma_resp_hdr *resp_msg) + { + struct hwc_caller_ctx *ctx; +- int err = -EPROTO; ++ int err; + + if (!test_bit(resp_msg->response.hwc_msg_id, + hwc->inflight_msg_res.map)) { diff --git a/patches.suse/msft-hv-2340-net-mana-fix-PCI_HYPERV-dependency.patch b/patches.suse/msft-hv-2340-net-mana-fix-PCI_HYPERV-dependency.patch new file mode 100644 index 0000000..c7b1495 --- /dev/null +++ b/patches.suse/msft-hv-2340-net-mana-fix-PCI_HYPERV-dependency.patch @@ -0,0 +1,43 @@ +From: Arnd Bergmann +Date: Thu, 22 Apr 2021 15:34:34 +0200 +Patch-mainline: v5.13-rc1 +Subject: net: mana: fix PCI_HYPERV dependency +Git-commit: 45b102dd81491e30ac7596b5515856141f99319f +References: jsc#SLE-18779, bsc#1185727 + +The MANA driver causes a build failure in some configurations when +it selects an unavailable symbol: + +WARNING: unmet direct dependencies detected for PCI_HYPERV + Depends on [n]: PCI [=y] && X86_64 [=y] && HYPERV [=n] && PCI_MSI [=y] && PCI_MSI_IRQ_DOMAIN [=y] && SYSFS [=y] + Selected by [y]: + - MICROSOFT_MANA [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_MICROSOFT [=y] && PCI_MSI [=y] && X86_64 [=y] +drivers/pci/controller/pci-hyperv.c: In function 'hv_irq_unmask': +drivers/pci/controller/pci-hyperv.c:1217:9: error: implicit declaration of function 'hv_set_msi_entry_from_desc' [-Werror=implicit-function-declaration] + 1217 | hv_set_msi_entry_from_desc(¶ms->int_entry.msi_entry, msi_desc); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~ + +A PCI driver should never depend on a particular host bridge +implementation in the first place, but if we have this dependency +it's better to express it as a 'depends on' rather than 'select'. + +Signed-off-by: Arnd Bergmann +Reviewed-by: Dexuan Cui +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/ethernet/microsoft/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig +--- a/drivers/net/ethernet/microsoft/Kconfig ++++ b/drivers/net/ethernet/microsoft/Kconfig +@@ -18,7 +18,7 @@ if NET_VENDOR_MICROSOFT + config MICROSOFT_MANA + tristate "Microsoft Azure Network Adapter (MANA) support" + depends on PCI_MSI && X86_64 +- select PCI_HYPERV ++ depends on PCI_HYPERV + help + This driver supports Microsoft Azure Network Adapter (MANA). + So far, the driver is only supported on X86_64. diff --git a/patches.suse/msft-hv-2341-net-mana-Use-int-to-check-the-return-value-of-mana_g.patch b/patches.suse/msft-hv-2341-net-mana-Use-int-to-check-the-return-value-of-mana_g.patch new file mode 100644 index 0000000..1e527b7 --- /dev/null +++ b/patches.suse/msft-hv-2341-net-mana-Use-int-to-check-the-return-value-of-mana_g.patch @@ -0,0 +1,46 @@ +From: Dexuan Cui +Date: Thu, 22 Apr 2021 13:08:16 -0700 +Patch-mainline: v5.13-rc1 +Subject: net: mana: Use int to check the return value of mana_gd_poll_cq() +Git-commit: d90a94680bc0a8069d93282bc5f2966d00b9c4a4 +References: jsc#SLE-18779, bsc#1185727 + +mana_gd_poll_cq() may return -1 if an overflow error is detected (this +should never happen unless there is a bug in the driver or the hardware). + +Fix the type of the variable "comp_read" by using int rather than u32. + +Reported-by: Dan Carpenter +Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") +Signed-off-by: Dexuan Cui +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/ethernet/microsoft/mana/hw_channel.c | 2 +- + drivers/net/ethernet/microsoft/mana/mana_en.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -283,7 +283,7 @@ static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self) + struct hwc_rx_oob comp_data = {}; + struct gdma_comp *completions; + struct hwc_cq *hwc_cq = ctx; +- u32 comp_read, i; ++ int comp_read, i; + + WARN_ON_ONCE(hwc_cq->gdma_cq != q_self); + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1061,7 +1061,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, + static void mana_poll_rx_cq(struct mana_cq *cq) + { + struct gdma_comp *comp = cq->gdma_comp_buf; +- u32 comp_read, i; ++ int comp_read, i; + + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); + WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); diff --git a/patches.suse/msft-hv-2342-hv_netvsc-Make-netvsc-VF-binding-check-both-MAC-and-.patch b/patches.suse/msft-hv-2342-hv_netvsc-Make-netvsc-VF-binding-check-both-MAC-and-.patch new file mode 100644 index 0000000..fad1c93 --- /dev/null +++ b/patches.suse/msft-hv-2342-hv_netvsc-Make-netvsc-VF-binding-check-both-MAC-and-.patch @@ -0,0 +1,61 @@ +From: Dexuan Cui +Date: Fri, 23 Apr 2021 18:12:35 -0700 +Patch-mainline: v5.13-rc1 +Subject: hv_netvsc: Make netvsc/VF binding check both MAC and serial number +Git-commit: 64ff412ad41fe3a5bf759ff4844dc1382176485c +References: jsc#SLE-18779, bsc#1185727 + +Currently the netvsc/VF binding logic only checks the PCI serial number. + +The Microsoft Azure Network Adapter (MANA) supports multiple net_device +interfaces (each such interface is called a "vPort", and has its unique +MAC address) which are backed by the same VF PCI device, so the binding +logic should check both the MAC address and the PCI serial number. + +The change should not break any other existing VF drivers, because +Hyper-V NIC SR-IOV implementation requires the netvsc network +interface and the VF network interface have the same MAC address. + +Co-developed-by: Haiyang Zhang +Signed-off-by: Haiyang Zhang +Co-developed-by: Shachar Raindel +Signed-off-by: Shachar Raindel +Acked-by: Stephen Hemminger +Signed-off-by: Dexuan Cui +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/hyperv/netvsc_drv.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -2297,6 +2297,7 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) + { + struct device *parent = vf_netdev->dev.parent; + struct net_device_context *ndev_ctx; ++ struct net_device *ndev; + struct pci_dev *pdev; + u32 serial; + +@@ -2319,8 +2320,17 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) + if (!ndev_ctx->vf_alloc) + continue; + +- if (ndev_ctx->vf_serial == serial) +- return hv_get_drvdata(ndev_ctx->device_ctx); ++ if (ndev_ctx->vf_serial != serial) ++ continue; ++ ++ ndev = hv_get_drvdata(ndev_ctx->device_ctx); ++ if (ndev->addr_len != vf_netdev->addr_len || ++ memcmp(ndev->perm_addr, vf_netdev->perm_addr, ++ ndev->addr_len) != 0) ++ continue; ++ ++ return ndev; ++ + } + + netdev_notice(vf_netdev, diff --git a/patches.suse/msft-hv-2344-net-mana-Use-struct_size-in-kzalloc.patch b/patches.suse/msft-hv-2344-net-mana-Use-struct_size-in-kzalloc.patch new file mode 100644 index 0000000..43fa1d8 --- /dev/null +++ b/patches.suse/msft-hv-2344-net-mana-Use-struct_size-in-kzalloc.patch @@ -0,0 +1,34 @@ +From: "Gustavo A. R. Silva" +Date: Thu, 13 May 2021 17:49:14 -0500 +Patch-mainline: v5.14-rc1 +Subject: net: mana: Use struct_size() in kzalloc() +Git-commit: ea89c862f01e02ec459932c7c3113fa37aedd09a +References: jsc#SLE-18779, bsc#1185727 + +Make use of the struct_size() helper instead of an open-coded version, +in order to avoid any potential type mistakes or integer overflows +that, in the worst scenario, could lead to heap overflows. + +This code was detected with the help of Coccinelle and, audited and +fixed manually. + +Signed-off-by: Gustavo A. R. Silva +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1387,8 +1387,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + + gc = gd->gdma_context; + +- rxq = kzalloc(sizeof(*rxq) + +- RX_BUFFERS_PER_QUEUE * sizeof(struct mana_recv_buf_oob), ++ rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE), + GFP_KERNEL); + if (!rxq) + return NULL; diff --git a/patches.suse/msft-hv-2430-net-mana-Move-NAPI-from-EQ-to-CQ.patch b/patches.suse/msft-hv-2430-net-mana-Move-NAPI-from-EQ-to-CQ.patch new file mode 100644 index 0000000..2ef1961 --- /dev/null +++ b/patches.suse/msft-hv-2430-net-mana-Move-NAPI-from-EQ-to-CQ.patch @@ -0,0 +1,460 @@ +From: Haiyang Zhang +Date: Tue, 24 Aug 2021 09:45:59 -0700 +Patch-mainline: v5.15-rc1 +Subject: net: mana: Move NAPI from EQ to CQ +Git-commit: e1b5683ff62e7b328317aec08869495992053e9d +References: jsc#SLE-18779, bsc#1185727 + +The existing code has NAPI threads polling on EQ directly. To prepare +for EQ sharing among vPorts, move NAPI from EQ to CQ so that one EQ +can serve multiple CQs from different vPorts. + +The "arm bit" is only set when CQ processing is completed to reduce +the number of EQ entries, which in turn reduce the number of interrupts +on EQ. + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/ethernet/microsoft/mana/gdma.h | 9 +- + drivers/net/ethernet/microsoft/mana/gdma_main.c | 55 +----------- + drivers/net/ethernet/microsoft/mana/hw_channel.c | 2 +- + drivers/net/ethernet/microsoft/mana/mana.h | 11 ++- + drivers/net/ethernet/microsoft/mana/mana_en.c | 100 +++++++++++++--------- + 5 files changed, 74 insertions(+), 103 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h +--- a/drivers/net/ethernet/microsoft/mana/gdma.h ++++ b/drivers/net/ethernet/microsoft/mana/gdma.h +@@ -239,10 +239,8 @@ struct gdma_event { + + struct gdma_queue; + +-#define CQE_POLLING_BUFFER 512 + struct mana_eq { + struct gdma_queue *eq; +- struct gdma_comp cqe_poll[CQE_POLLING_BUFFER]; + }; + + typedef void gdma_eq_callback(void *context, struct gdma_queue *q, +@@ -291,11 +289,6 @@ struct gdma_queue { + unsigned int msix_index; + + u32 log2_throttle_limit; +- +- /* NAPI data */ +- struct napi_struct napi; +- int work_done; +- int budget; + } eq; + + struct { +@@ -406,7 +399,7 @@ void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue); + + int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe); + +-void mana_gd_arm_cq(struct gdma_queue *cq); ++void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit); + + struct gdma_wqe { + u32 reserved :24; +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -267,7 +267,7 @@ void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue) + queue->id, queue->head * GDMA_WQE_BU_SIZE, 1); + } + +-void mana_gd_arm_cq(struct gdma_queue *cq) ++void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) + { + struct gdma_context *gc = cq->gdma_dev->gdma_context; + +@@ -276,7 +276,7 @@ void mana_gd_arm_cq(struct gdma_queue *cq) + u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS); + + mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id, +- head, SET_ARM_BIT); ++ head, arm_bit); + } + + static void mana_gd_process_eqe(struct gdma_queue *eq) +@@ -339,7 +339,6 @@ static void mana_gd_process_eq_events(void *arg) + struct gdma_queue *eq = arg; + struct gdma_context *gc; + struct gdma_eqe *eqe; +- unsigned int arm_bit; + u32 head, num_eqe; + int i; + +@@ -370,48 +369,16 @@ static void mana_gd_process_eq_events(void *arg) + eq->head++; + } + +- /* Always rearm the EQ for HWC. For MANA, rearm it when NAPI is done. */ +- if (mana_gd_is_hwc(eq->gdma_dev)) { +- arm_bit = SET_ARM_BIT; +- } else if (eq->eq.work_done < eq->eq.budget && +- napi_complete_done(&eq->eq.napi, eq->eq.work_done)) { +- arm_bit = SET_ARM_BIT; +- } else { +- arm_bit = 0; +- } +- + head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS); + + mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id, +- head, arm_bit); +-} +- +-static int mana_poll(struct napi_struct *napi, int budget) +-{ +- struct gdma_queue *eq = container_of(napi, struct gdma_queue, eq.napi); +- +- eq->eq.work_done = 0; +- eq->eq.budget = budget; +- +- mana_gd_process_eq_events(eq); +- +- return min(eq->eq.work_done, budget); +-} +- +-static void mana_gd_schedule_napi(void *arg) +-{ +- struct gdma_queue *eq = arg; +- struct napi_struct *napi; +- +- napi = &eq->eq.napi; +- napi_schedule_irqoff(napi); ++ head, SET_ARM_BIT); + } + + static int mana_gd_register_irq(struct gdma_queue *queue, + const struct gdma_queue_spec *spec) + { + struct gdma_dev *gd = queue->gdma_dev; +- bool is_mana = mana_gd_is_mana(gd); + struct gdma_irq_context *gic; + struct gdma_context *gc; + struct gdma_resource *r; +@@ -442,20 +409,11 @@ static int mana_gd_register_irq(struct gdma_queue *queue, + + gic = &gc->irq_contexts[msi_index]; + +- if (is_mana) { +- netif_napi_add(spec->eq.ndev, &queue->eq.napi, mana_poll, +- NAPI_POLL_WEIGHT); +- napi_enable(&queue->eq.napi); +- } +- + WARN_ON(gic->handler || gic->arg); + + gic->arg = queue; + +- if (is_mana) +- gic->handler = mana_gd_schedule_napi; +- else +- gic->handler = mana_gd_process_eq_events; ++ gic->handler = mana_gd_process_eq_events; + + return 0; + } +@@ -549,11 +507,6 @@ static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, + + mana_gd_deregiser_irq(queue); + +- if (mana_gd_is_mana(queue->gdma_dev)) { +- napi_disable(&queue->eq.napi); +- netif_napi_del(&queue->eq.napi); +- } +- + if (queue->eq.disable_needed) + mana_gd_disable_queue(queue); + } +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -304,7 +304,7 @@ static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self) + &comp_data); + } + +- mana_gd_arm_cq(q_self); ++ mana_gd_ring_cq(q_self, SET_ARM_BIT); + } + + static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq) +diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h +--- a/drivers/net/ethernet/microsoft/mana/mana.h ++++ b/drivers/net/ethernet/microsoft/mana/mana.h +@@ -225,6 +225,8 @@ struct mana_tx_comp_oob { + + struct mana_rxq; + ++#define CQE_POLLING_BUFFER 512 ++ + struct mana_cq { + struct gdma_queue *gdma_cq; + +@@ -244,8 +246,13 @@ struct mana_cq { + */ + struct mana_txq *txq; + +- /* Pointer to a buffer which the CQ handler can copy the CQE's into. */ +- struct gdma_comp *gdma_comp_buf; ++ /* Buffer which the CQ handler can copy the CQE's into. */ ++ struct gdma_comp gdma_comp_buf[CQE_POLLING_BUFFER]; ++ ++ /* NAPI data */ ++ struct napi_struct napi; ++ int work_done; ++ int budget; + }; + + #define GDMA_MAX_RQE_SGES 15 +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -696,14 +696,6 @@ static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, + resp.hdr.status); + } + +-static void mana_init_cqe_poll_buf(struct gdma_comp *cqe_poll_buf) +-{ +- int i; +- +- for (i = 0; i < CQE_POLLING_BUFFER; i++) +- memset(&cqe_poll_buf[i], 0, sizeof(struct gdma_comp)); +-} +- + static void mana_destroy_eq(struct gdma_context *gc, + struct mana_port_context *apc) + { +@@ -746,8 +738,6 @@ static int mana_create_eq(struct mana_port_context *apc) + spec.eq.ndev = apc->ndev; + + for (i = 0; i < apc->num_queues; i++) { +- mana_init_cqe_poll_buf(apc->eqs[i].cqe_poll); +- + err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq); + if (err) + goto out; +@@ -790,7 +780,6 @@ static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) + + static void mana_poll_tx_cq(struct mana_cq *cq) + { +- struct gdma_queue *gdma_eq = cq->gdma_cq->cq.parent; + struct gdma_comp *completions = cq->gdma_comp_buf; + struct gdma_posted_wqe_info *wqe_info; + unsigned int pkt_transmitted = 0; +@@ -812,6 +801,9 @@ static void mana_poll_tx_cq(struct mana_cq *cq) + comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, + CQE_POLLING_BUFFER); + ++ if (comp_read < 1) ++ return; ++ + for (i = 0; i < comp_read; i++) { + struct mana_tx_comp_oob *cqe_oob; + +@@ -861,7 +853,7 @@ static void mana_poll_tx_cq(struct mana_cq *cq) + + mana_unmap_skb(skb, apc); + +- napi_consume_skb(skb, gdma_eq->eq.budget); ++ napi_consume_skb(skb, cq->budget); + + pkt_transmitted++; + } +@@ -890,6 +882,8 @@ static void mana_poll_tx_cq(struct mana_cq *cq) + + if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) + WARN_ON_ONCE(1); ++ ++ cq->work_done = pkt_transmitted; + } + + static void mana_post_pkt_rxq(struct mana_rxq *rxq) +@@ -918,17 +912,13 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, + struct mana_stats *rx_stats = &rxq->stats; + struct net_device *ndev = rxq->ndev; + uint pkt_len = cqe->ppi[0].pkt_len; +- struct mana_port_context *apc; + u16 rxq_idx = rxq->rxq_idx; + struct napi_struct *napi; +- struct gdma_queue *eq; + struct sk_buff *skb; + u32 hash_value; + +- apc = netdev_priv(ndev); +- eq = apc->eqs[rxq_idx].eq; +- eq->eq.work_done++; +- napi = &eq->eq.napi; ++ rxq->rx_cq.work_done++; ++ napi = &rxq->rx_cq.napi; + + if (!buf_va) { + ++ndev->stats.rx_dropped; +@@ -1081,6 +1071,7 @@ static void mana_poll_rx_cq(struct mana_cq *cq) + static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) + { + struct mana_cq *cq = context; ++ u8 arm_bit; + + WARN_ON_ONCE(cq->gdma_cq != gdma_queue); + +@@ -1089,7 +1080,33 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) + else + mana_poll_tx_cq(cq); + +- mana_gd_arm_cq(gdma_queue); ++ if (cq->work_done < cq->budget && ++ napi_complete_done(&cq->napi, cq->work_done)) { ++ arm_bit = SET_ARM_BIT; ++ } else { ++ arm_bit = 0; ++ } ++ ++ mana_gd_ring_cq(gdma_queue, arm_bit); ++} ++ ++static int mana_poll(struct napi_struct *napi, int budget) ++{ ++ struct mana_cq *cq = container_of(napi, struct mana_cq, napi); ++ ++ cq->work_done = 0; ++ cq->budget = budget; ++ ++ mana_cq_handler(cq, cq->gdma_cq); ++ ++ return min(cq->work_done, budget); ++} ++ ++static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) ++{ ++ struct mana_cq *cq = context; ++ ++ napi_schedule_irqoff(&cq->napi); + } + + static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) +@@ -1114,12 +1131,18 @@ static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) + + static void mana_destroy_txq(struct mana_port_context *apc) + { ++ struct napi_struct *napi; + int i; + + if (!apc->tx_qp) + return; + + for (i = 0; i < apc->num_queues; i++) { ++ napi = &apc->tx_qp[i].tx_cq.napi; ++ napi_synchronize(napi); ++ napi_disable(napi); ++ netif_napi_del(napi); ++ + mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); + + mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); +@@ -1186,7 +1209,6 @@ static int mana_create_txq(struct mana_port_context *apc, + + /* Create SQ's CQ */ + cq = &apc->tx_qp[i].tx_cq; +- cq->gdma_comp_buf = apc->eqs[i].cqe_poll; + cq->type = MANA_CQ_TYPE_TX; + + cq->txq = txq; +@@ -1195,7 +1217,7 @@ static int mana_create_txq(struct mana_port_context *apc, + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = cq_size; +- spec.cq.callback = mana_cq_handler; ++ spec.cq.callback = mana_schedule_napi; + spec.cq.parent_eq = apc->eqs[i].eq; + spec.cq.context = cq; + err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); +@@ -1237,7 +1259,10 @@ static int mana_create_txq(struct mana_port_context *apc, + + gc->cq_table[cq->gdma_id] = cq->gdma_cq; + +- mana_gd_arm_cq(cq->gdma_cq); ++ netif_tx_napi_add(net, &cq->napi, mana_poll, NAPI_POLL_WEIGHT); ++ napi_enable(&cq->napi); ++ ++ mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); + } + + return 0; +@@ -1246,21 +1271,6 @@ out: + return err; + } + +-static void mana_napi_sync_for_rx(struct mana_rxq *rxq) +-{ +- struct net_device *ndev = rxq->ndev; +- struct mana_port_context *apc; +- u16 rxq_idx = rxq->rxq_idx; +- struct napi_struct *napi; +- struct gdma_queue *eq; +- +- apc = netdev_priv(ndev); +- eq = apc->eqs[rxq_idx].eq; +- napi = &eq->eq.napi; +- +- napi_synchronize(napi); +-} +- + static void mana_destroy_rxq(struct mana_port_context *apc, + struct mana_rxq *rxq, bool validate_state) + +@@ -1268,13 +1278,19 @@ static void mana_destroy_rxq(struct mana_port_context *apc, + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct mana_recv_buf_oob *rx_oob; + struct device *dev = gc->dev; ++ struct napi_struct *napi; + int i; + + if (!rxq) + return; + ++ napi = &rxq->rx_cq.napi; ++ + if (validate_state) +- mana_napi_sync_for_rx(rxq); ++ napi_synchronize(napi); ++ ++ napi_disable(napi); ++ netif_napi_del(napi); + + mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); + +@@ -1418,7 +1434,6 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + + /* Create RQ's CQ */ + cq = &rxq->rx_cq; +- cq->gdma_comp_buf = eq->cqe_poll; + cq->type = MANA_CQ_TYPE_RX; + cq->rxq = rxq; + +@@ -1426,7 +1441,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = cq_size; +- spec.cq.callback = mana_cq_handler; ++ spec.cq.callback = mana_schedule_napi; + spec.cq.parent_eq = eq->eq; + spec.cq.context = cq; + err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); +@@ -1466,7 +1481,10 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + + gc->cq_table[cq->gdma_id] = cq->gdma_cq; + +- mana_gd_arm_cq(cq->gdma_cq); ++ netif_napi_add(ndev, &cq->napi, mana_poll, 1); ++ napi_enable(&cq->napi); ++ ++ mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); + out: + if (!err) + return rxq; diff --git a/patches.suse/msft-hv-2431-net-mana-Add-support-for-EQ-sharing.patch b/patches.suse/msft-hv-2431-net-mana-Add-support-for-EQ-sharing.patch new file mode 100644 index 0000000..8e173b6 --- /dev/null +++ b/patches.suse/msft-hv-2431-net-mana-Add-support-for-EQ-sharing.patch @@ -0,0 +1,397 @@ +From: Haiyang Zhang +Date: Tue, 24 Aug 2021 09:46:00 -0700 +Patch-mainline: v5.15-rc1 +Subject: net: mana: Add support for EQ sharing +Git-commit: 1e2d0824a9c3de5371a46601aac0c14af5c0a50a +References: jsc#SLE-18779, bsc#1185727 + +The existing code uses (1 + #vPorts * #Queues) MSIXs, which may exceed +the device limit. + +Support EQ sharing, so that multiple vPorts (NICs) can share the same +set of MSIXs. + +And, report the EQ-sharing capability bit to the host, which means the +host can potentially offer more vPorts and queues to the VM. + +Also update the resource limit checking and error handling for better +robustness. + +Now, we support up to 256 virtual ports per VF (it was 16/VF), and +support up to 64 queues per vPort (it was 16). + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/ethernet/microsoft/mana/gdma.h | 23 +++++--- + drivers/net/ethernet/microsoft/mana/gdma_main.c | 31 +++++++---- + drivers/net/ethernet/microsoft/mana/mana.h | 18 +++---- + drivers/net/ethernet/microsoft/mana/mana_en.c | 62 +++++++++++----------- + 4 files changed, 78 insertions(+), 56 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h +--- a/drivers/net/ethernet/microsoft/mana/gdma.h ++++ b/drivers/net/ethernet/microsoft/mana/gdma.h +@@ -312,9 +312,6 @@ struct gdma_queue_spec { + void *context; + + unsigned long log2_throttle_limit; +- +- /* Only used by the MANA device. */ +- struct net_device *ndev; + } eq; + + struct { +@@ -489,16 +486,28 @@ enum { + GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1, + }; + ++#define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0) ++ ++#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT ++ ++#define GDMA_DRV_CAP_FLAGS2 0 ++ ++#define GDMA_DRV_CAP_FLAGS3 0 ++ ++#define GDMA_DRV_CAP_FLAGS4 0 ++ + struct gdma_verify_ver_req { + struct gdma_req_hdr hdr; + + /* Mandatory fields required for protocol establishment */ + u64 protocol_ver_min; + u64 protocol_ver_max; +- u64 drv_cap_flags1; +- u64 drv_cap_flags2; +- u64 drv_cap_flags3; +- u64 drv_cap_flags4; ++ ++ /* Gdma Driver Capability Flags */ ++ u64 gd_drv_cap_flags1; ++ u64 gd_drv_cap_flags2; ++ u64 gd_drv_cap_flags3; ++ u64 gd_drv_cap_flags4; + + /* Advisory fields */ + u64 drv_ver; +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -67,6 +67,10 @@ static int mana_gd_query_max_resources(struct pci_dev *pdev) + if (gc->max_num_queues > resp.max_rq) + gc->max_num_queues = resp.max_rq; + ++ /* The Hardware Channel (HWC) used 1 MSI-X */ ++ if (gc->max_num_queues > gc->num_msix_usable - 1) ++ gc->max_num_queues = gc->num_msix_usable - 1; ++ + return 0; + } + +@@ -384,28 +388,31 @@ static int mana_gd_register_irq(struct gdma_queue *queue, + struct gdma_resource *r; + unsigned int msi_index; + unsigned long flags; +- int err; ++ struct device *dev; ++ int err = 0; + + gc = gd->gdma_context; + r = &gc->msix_resource; ++ dev = gc->dev; + + spin_lock_irqsave(&r->lock, flags); + + msi_index = find_first_zero_bit(r->map, r->size); +- if (msi_index >= r->size) { ++ if (msi_index >= r->size || msi_index >= gc->num_msix_usable) { + err = -ENOSPC; + } else { + bitmap_set(r->map, msi_index, 1); + queue->eq.msix_index = msi_index; +- err = 0; + } + + spin_unlock_irqrestore(&r->lock, flags); + +- if (err) +- return err; ++ if (err) { ++ dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u", ++ err, msi_index, r->size, gc->num_msix_usable); + +- WARN_ON(msi_index >= gc->num_msix_usable); ++ return err; ++ } + + gic = &gc->irq_contexts[msi_index]; + +@@ -836,6 +843,11 @@ int mana_gd_verify_vf_version(struct pci_dev *pdev) + req.protocol_ver_min = GDMA_PROTOCOL_FIRST; + req.protocol_ver_max = GDMA_PROTOCOL_LAST; + ++ req.gd_drv_cap_flags1 = GDMA_DRV_CAP_FLAGS1; ++ req.gd_drv_cap_flags2 = GDMA_DRV_CAP_FLAGS2; ++ req.gd_drv_cap_flags3 = GDMA_DRV_CAP_FLAGS3; ++ req.gd_drv_cap_flags4 = GDMA_DRV_CAP_FLAGS4; ++ + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n", +@@ -1154,10 +1166,8 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) + if (max_queues_per_port > MANA_MAX_NUM_QUEUES) + max_queues_per_port = MANA_MAX_NUM_QUEUES; + +- max_irqs = max_queues_per_port * MAX_PORTS_IN_MANA_DEV; +- + /* Need 1 interrupt for the Hardware communication Channel (HWC) */ +- max_irqs++; ++ max_irqs = max_queues_per_port + 1; + + nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX); + if (nvec < 0) +@@ -1244,6 +1254,9 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + int bar = 0; + int err; + ++ /* Each port has 2 CQs, each CQ has at most 1 EQE at a time */ ++ BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE); ++ + err = pci_enable_device(pdev); + if (err) + return -ENXIO; +diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h +--- a/drivers/net/ethernet/microsoft/mana/mana.h ++++ b/drivers/net/ethernet/microsoft/mana/mana.h +@@ -46,7 +46,7 @@ enum TRI_STATE { + #define EQ_SIZE (8 * PAGE_SIZE) + #define LOG2_EQ_THROTTLE 3 + +-#define MAX_PORTS_IN_MANA_DEV 16 ++#define MAX_PORTS_IN_MANA_DEV 256 + + struct mana_stats { + u64 packets; +@@ -322,6 +322,8 @@ struct mana_context { + + u16 num_ports; + ++ struct mana_eq *eqs; ++ + struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; + }; + +@@ -331,8 +333,6 @@ struct mana_port_context { + + u8 mac_addr[ETH_ALEN]; + +- struct mana_eq *eqs; +- + enum TRI_STATE rss_state; + + mana_handle_t default_rxobj; +@@ -402,11 +402,11 @@ enum mana_command_code { + struct mana_query_device_cfg_req { + struct gdma_req_hdr hdr; + +- /* Driver Capability flags */ +- u64 drv_cap_flags1; +- u64 drv_cap_flags2; +- u64 drv_cap_flags3; +- u64 drv_cap_flags4; ++ /* MANA Nic Driver Capability flags */ ++ u64 mn_drv_cap_flags1; ++ u64 mn_drv_cap_flags2; ++ u64 mn_drv_cap_flags3; ++ u64 mn_drv_cap_flags4; + + u32 proto_major_ver; + u32 proto_minor_ver; +@@ -523,7 +523,7 @@ struct mana_cfg_rx_steer_resp { + struct gdma_resp_hdr hdr; + }; /* HW DATA */ + +-#define MANA_MAX_NUM_QUEUES 16 ++#define MANA_MAX_NUM_QUEUES 64 + + #define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -696,56 +696,56 @@ static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, + resp.hdr.status); + } + +-static void mana_destroy_eq(struct gdma_context *gc, +- struct mana_port_context *apc) ++static void mana_destroy_eq(struct mana_context *ac) + { ++ struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct gdma_queue *eq; + int i; + +- if (!apc->eqs) ++ if (!ac->eqs) + return; + +- for (i = 0; i < apc->num_queues; i++) { +- eq = apc->eqs[i].eq; ++ for (i = 0; i < gc->max_num_queues; i++) { ++ eq = ac->eqs[i].eq; + if (!eq) + continue; + + mana_gd_destroy_queue(gc, eq); + } + +- kfree(apc->eqs); +- apc->eqs = NULL; ++ kfree(ac->eqs); ++ ac->eqs = NULL; + } + +-static int mana_create_eq(struct mana_port_context *apc) ++static int mana_create_eq(struct mana_context *ac) + { +- struct gdma_dev *gd = apc->ac->gdma_dev; ++ struct gdma_dev *gd = ac->gdma_dev; ++ struct gdma_context *gc = gd->gdma_context; + struct gdma_queue_spec spec = {}; + int err; + int i; + +- apc->eqs = kcalloc(apc->num_queues, sizeof(struct mana_eq), +- GFP_KERNEL); +- if (!apc->eqs) ++ ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq), ++ GFP_KERNEL); ++ if (!ac->eqs) + return -ENOMEM; + + spec.type = GDMA_EQ; + spec.monitor_avl_buf = false; + spec.queue_size = EQ_SIZE; + spec.eq.callback = NULL; +- spec.eq.context = apc->eqs; ++ spec.eq.context = ac->eqs; + spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; +- spec.eq.ndev = apc->ndev; + +- for (i = 0; i < apc->num_queues; i++) { +- err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq); ++ for (i = 0; i < gc->max_num_queues; i++) { ++ err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); + if (err) + goto out; + } + + return 0; + out: +- mana_destroy_eq(gd->gdma_context, apc); ++ mana_destroy_eq(ac); + return err; + } + +@@ -1157,7 +1157,8 @@ static void mana_destroy_txq(struct mana_port_context *apc) + static int mana_create_txq(struct mana_port_context *apc, + struct net_device *net) + { +- struct gdma_dev *gd = apc->ac->gdma_dev; ++ struct mana_context *ac = apc->ac; ++ struct gdma_dev *gd = ac->gdma_dev; + struct mana_obj_spec wq_spec; + struct mana_obj_spec cq_spec; + struct gdma_queue_spec spec; +@@ -1218,7 +1219,7 @@ static int mana_create_txq(struct mana_port_context *apc, + spec.monitor_avl_buf = false; + spec.queue_size = cq_size; + spec.cq.callback = mana_schedule_napi; +- spec.cq.parent_eq = apc->eqs[i].eq; ++ spec.cq.parent_eq = ac->eqs[i].eq; + spec.cq.context = cq; + err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); + if (err) +@@ -1502,12 +1503,13 @@ out: + static int mana_add_rx_queues(struct mana_port_context *apc, + struct net_device *ndev) + { ++ struct mana_context *ac = apc->ac; + struct mana_rxq *rxq; + int err = 0; + int i; + + for (i = 0; i < apc->num_queues; i++) { +- rxq = mana_create_rxq(apc, i, &apc->eqs[i], ndev); ++ rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); + if (!rxq) { + err = -ENOMEM; + goto out; +@@ -1619,16 +1621,11 @@ reset_apc: + int mana_alloc_queues(struct net_device *ndev) + { + struct mana_port_context *apc = netdev_priv(ndev); +- struct gdma_dev *gd = apc->ac->gdma_dev; + int err; + +- err = mana_create_eq(apc); +- if (err) +- return err; +- + err = mana_create_vport(apc, ndev); + if (err) +- goto destroy_eq; ++ return err; + + err = netif_set_real_num_tx_queues(ndev, apc->num_queues); + if (err) +@@ -1654,8 +1651,6 @@ int mana_alloc_queues(struct net_device *ndev) + + destroy_vport: + mana_destroy_vport(apc); +-destroy_eq: +- mana_destroy_eq(gd->gdma_context, apc); + return err; + } + +@@ -1732,8 +1727,6 @@ static int mana_dealloc_queues(struct net_device *ndev) + + mana_destroy_vport(apc); + +- mana_destroy_eq(apc->ac->gdma_dev->gdma_context, apc); +- + return 0; + } + +@@ -1786,7 +1779,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, + apc->ac = ac; + apc->ndev = ndev; + apc->max_queues = gc->max_num_queues; +- apc->num_queues = min_t(uint, gc->max_num_queues, MANA_MAX_NUM_QUEUES); ++ apc->num_queues = gc->max_num_queues; + apc->port_handle = INVALID_MANA_HANDLE; + apc->port_idx = port_idx; + +@@ -1857,6 +1850,10 @@ int mana_probe(struct gdma_dev *gd) + ac->num_ports = 1; + gd->driver_data = ac; + ++ err = mana_create_eq(ac); ++ if (err) ++ goto out; ++ + err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, + MANA_MICRO_VERSION, &ac->num_ports); + if (err) +@@ -1906,6 +1903,9 @@ void mana_remove(struct gdma_dev *gd) + + free_netdev(ndev); + } ++ ++ mana_destroy_eq(ac); ++ + out: + mana_gd_deregister_device(gd); + gd->driver_data = NULL; diff --git a/patches.suse/msft-hv-2432-net-mana-Add-WARN_ON_ONCE-in-case-of-CQE-read-overfl.patch b/patches.suse/msft-hv-2432-net-mana-Add-WARN_ON_ONCE-in-case-of-CQE-read-overfl.patch new file mode 100644 index 0000000..76397ff --- /dev/null +++ b/patches.suse/msft-hv-2432-net-mana-Add-WARN_ON_ONCE-in-case-of-CQE-read-overfl.patch @@ -0,0 +1,30 @@ +From: Haiyang Zhang +Date: Tue, 24 Aug 2021 09:46:01 -0700 +Patch-mainline: v5.15-rc1 +Subject: net: mana: Add WARN_ON_ONCE in case of CQE read overflow +Git-commit: c1a3e9f98dde4782290d2c4871e42b7e76ad5593 +References: jsc#SLE-18779, bsc#1185727 + +This is not an expected case normally. +Add WARN_ON_ONCE in case of CQE read overflow, instead of failing +silently. + +Signed-off-by: Haiyang Zhang +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/ethernet/microsoft/mana/gdma_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c +--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c ++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c +@@ -1093,7 +1093,7 @@ static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp) + + new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK; + /* Return -1 if overflow detected. */ +- if (owner_bits != new_bits) ++ if (WARN_ON_ONCE(owner_bits != new_bits)) + return -1; + + comp->wq_num = cqe->cqe_info.wq_num; diff --git a/patches.suse/msft-hv-2437-net-mana-Prefer-struct_size-over-open-coded-arithmet.patch b/patches.suse/msft-hv-2437-net-mana-Prefer-struct_size-over-open-coded-arithmet.patch new file mode 100644 index 0000000..414e248 --- /dev/null +++ b/patches.suse/msft-hv-2437-net-mana-Prefer-struct_size-over-open-coded-arithmet.patch @@ -0,0 +1,42 @@ +From: Len Baker +Date: Sat, 11 Sep 2021 12:28:18 +0200 +Patch-mainline: v5.15-rc2 +Subject: net: mana: Prefer struct_size over open coded arithmetic +Git-commit: f11ee2ad25b22c2ee587045dd6999434375532f7 +References: jsc#SLE-18779, bsc#1185727 + +As noted in the "Deprecated Interfaces, Language Features, Attributes, +and Conventions" documentation [1], size calculations (especially +multiplication) should not be performed in memory allocator (or similar) +function arguments due to the risk of them overflowing. This could lead +to values wrapping around and a smaller allocation being made than the +caller was expecting. Using those allocations could lead to linear +overflows of heap memory and other misbehaviors. + +So, use the struct_size() helper to do the arithmetic instead of the +argument "size + count * size" in the kzalloc() function. + +[1] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments + +Signed-off-by: Len Baker +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/ethernet/microsoft/mana/hw_channel.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c +--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c ++++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c +@@ -398,9 +398,7 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, + int err; + u16 i; + +- dma_buf = kzalloc(sizeof(*dma_buf) + +- q_depth * sizeof(struct hwc_work_request), +- GFP_KERNEL); ++ dma_buf = kzalloc(struct_size(dma_buf, reqs, q_depth), GFP_KERNEL); + if (!dma_buf) + return -ENOMEM; + diff --git a/patches.suse/net-mana-Fix-a-memory-leak-in-an-error-handling-path.patch b/patches.suse/net-mana-Fix-a-memory-leak-in-an-error-handling-path.patch new file mode 100644 index 0000000..4b3ff0b --- /dev/null +++ b/patches.suse/net-mana-Fix-a-memory-leak-in-an-error-handling-path.patch @@ -0,0 +1,35 @@ +From: Christophe JAILLET +Date: Sun, 20 Jun 2021 15:43:28 +0200 +Patch-mainline: v5.14 +Subject: net: mana: Fix a memory leak in an error handling path in +Git-commit: b90788459cd6d140171b046f0b37fad341ade0a3 +References: jsc#SLE-18779, bsc#1185727 + +If this test fails we must free some resources as in all the other error +handling paths of this function. + +Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") +Signed-off-by: Christophe JAILLET +Reviewed-by: Dexuan Cui +Signed-off-by: David S. Miller +Acked-by: Olaf Hering +--- + drivers/net/ethernet/microsoft/mana/mana_en.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1230,8 +1230,10 @@ static int mana_create_txq(struct mana_port_context *apc, + + cq->gdma_id = cq->gdma_cq->id; + +- if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) +- return -EINVAL; ++ if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { ++ err = -EINVAL; ++ goto out; ++ } + + gc->cq_table[cq->gdma_id] = cq->gdma_cq; + diff --git a/patches.suse/powerpc-mm-radix-Free-PUD-table-when-freeing-pagetab.patch b/patches.suse/powerpc-mm-radix-Free-PUD-table-when-freeing-pagetab.patch new file mode 100644 index 0000000..d7c1179 --- /dev/null +++ b/patches.suse/powerpc-mm-radix-Free-PUD-table-when-freeing-pagetab.patch @@ -0,0 +1,58 @@ +From 9ce8853b4a735c8115f55ac0e9c2b27a4c8f80b5 Mon Sep 17 00:00:00 2001 +From: Bharata B Rao +Date: Thu, 9 Jul 2020 18:49:23 +0530 +Subject: [PATCH] powerpc/mm/radix: Free PUD table when freeing pagetable + +References: bsc#1065729 +Patch-mainline: v5.9-rc1 +Git-commit: 9ce8853b4a735c8115f55ac0e9c2b27a4c8f80b5 + +remove_pagetable() isn't freeing PUD table. This causes memory +leak during memory unplug. Fix this. + +Fixes: 4b5d62ca17a1 ("powerpc/mm: add radix__remove_section_mapping()") +Signed-off-by: Bharata B Rao +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20200709131925.922266-3-aneesh.kumar@linux.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/mm/pgtable-radix.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c +--- a/arch/powerpc/mm/pgtable-radix.c ++++ b/arch/powerpc/mm/pgtable-radix.c +@@ -707,6 +707,21 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud) + pud_clear(pud); + } + ++static void free_pud_table(pud_t *pud_start, pgd_t *pgd) ++{ ++ pud_t *pud; ++ int i; ++ ++ for (i = 0; i < PTRS_PER_PUD; i++) { ++ pud = pud_start + i; ++ if (!pud_none(*pud)) ++ return; ++ } ++ ++ pud_free(&init_mm, pud_start); ++ pgd_clear(pgd); ++} ++ + struct change_mapping_params { + pte_t *pte; + unsigned long start; +@@ -881,6 +896,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) + + pud_base = (pud_t *)pgd_page_vaddr(*pgd); + remove_pud_table(pud_base, addr, next); ++ free_pud_table(pud_base, pgd); + } + + spin_unlock(&init_mm.page_table_lock); +-- +2.31.1 + diff --git a/patches.suse/powerpc-numa-Early-request-for-home-node-associativi.patch b/patches.suse/powerpc-numa-Early-request-for-home-node-associativi.patch new file mode 100644 index 0000000..647478f --- /dev/null +++ b/patches.suse/powerpc-numa-Early-request-for-home-node-associativi.patch @@ -0,0 +1,107 @@ +From dc909d8b0c9c0d2c42dc1cf34216c4830f639f7b Mon Sep 17 00:00:00 2001 +From: Srikar Dronamraju +Date: Wed, 29 Jan 2020 19:23:00 +0530 +Subject: [PATCH] powerpc/numa: Early request for home node associativity + +References: bsc#1190914 +Patch-mainline: v5.7-rc1 +Git-commit: dc909d8b0c9c0d2c42dc1cf34216c4830f639f7b + +Currently the kernel detects if its running on a shared lpar platform +and requests home node associativity before the scheduler sched_domains +are setup. However between the time NUMA setup is initialized and the +request for home node associativity, workqueue initializes its per node +cpumask. The per node workqueue possible cpumask may turn invalid +after home node associativity resulting in weird situations like +workqueue possible cpumask being a subset of workqueue online cpumask. + +This can be fixed by requesting home node associativity earlier just +before NUMA setup. However at the NUMA setup time, kernel may not be in +a position to detect if its running on a shared lpar platform. So +request for home node associativity and if the request fails, fallback +on the device tree property. + +Signed-off-by: Srikar Dronamraju +Reported-by: Abdul Haleem +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20200129135301.24739-5-srikar@linux.vnet.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/mm/numa.c | 41 ++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 40 insertions(+), 1 deletion(-) + +diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c +--- a/arch/powerpc/mm/numa.c ++++ b/arch/powerpc/mm/numa.c +@@ -461,6 +461,41 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb) + return nid; + } + ++#ifdef CONFIG_PPC_SPLPAR ++static int vphn_get_nid(long lcpu) ++{ ++ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; ++ long rc, hwid; ++ ++ /* ++ * On a shared lpar, device tree will not have node associativity. ++ * At this time lppaca, or its __old_status field may not be ++ * updated. Hence kernel cannot detect if its on a shared lpar. So ++ * request an explicit associativity irrespective of whether the ++ * lpar is shared or dedicated. Use the device tree property as a ++ * fallback. cpu_to_phys_id is only valid between ++ * smp_setup_cpu_maps() and smp_setup_pacas(). ++ */ ++ if (firmware_has_feature(FW_FEATURE_VPHN)) { ++ if (cpu_to_phys_id) ++ hwid = cpu_to_phys_id[lcpu]; ++ else ++ hwid = get_hard_smp_processor_id(lcpu); ++ ++ rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity); ++ if (rc == H_SUCCESS) ++ return associativity_to_nid(associativity); ++ } ++ ++ return NUMA_NO_NODE; ++} ++#else ++static int vphn_get_nid(long unused) ++{ ++ return NUMA_NO_NODE; ++} ++#endif /* CONFIG_PPC_SPLPAR */ ++ + /* + * Figure out to which domain a cpu belongs and stick it there. + * Return the id of the domain used. +@@ -485,6 +520,10 @@ static int numa_setup_cpu(unsigned long lcpu) + return nid; + } + ++ nid = vphn_get_nid(lcpu); ++ if (nid != NUMA_NO_NODE) ++ goto out_present; ++ + cpu = of_get_cpu_node(lcpu, NULL); + + if (!cpu) { +@@ -496,6 +535,7 @@ static int numa_setup_cpu(unsigned long lcpu) + } + + nid = of_node_to_nid_single(cpu); ++ of_node_put(cpu); + + out_present: + if (nid < 0 || !node_possible(nid)) +@@ -515,7 +555,6 @@ static int numa_setup_cpu(unsigned long lcpu) + nid = first_online_node; + + map_cpu_to_node(lcpu, nid); +- of_node_put(cpu); + out: + return nid; + } +-- +2.31.1 + diff --git a/patches.suse/powerpc-numa-remove-timed_topology_update.patch b/patches.suse/powerpc-numa-remove-timed_topology_update.patch index db37077..368b869 100644 --- a/patches.suse/powerpc-numa-remove-timed_topology_update.patch +++ b/patches.suse/powerpc-numa-remove-timed_topology_update.patch @@ -47,8 +47,8 @@ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1124,14 +1124,6 @@ u64 memory_hotplug_max(void) - return rc; - } + #ifdef CONFIG_PPC_SPLPAR + static int topology_inited; -/* - * Change polling interval for associativity changes. diff --git a/patches.suse/powerpc-numa-remove-unreachable-topology-timer-code.patch b/patches.suse/powerpc-numa-remove-unreachable-topology-timer-code.patch index 8f22086..01931f0 100644 --- a/patches.suse/powerpc-numa-remove-unreachable-topology-timer-code.patch +++ b/patches.suse/powerpc-numa-remove-unreachable-topology-timer-code.patch @@ -35,7 +35,7 @@ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c -static int topology_timer_secs = 1; static int topology_inited; - static long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity) + /* @@ -1143,15 +1139,6 @@ static int topology_inited; */ int timed_topology_update(int nsecs) diff --git a/patches.suse/powerpc-numa-remove-vphn_enabled-and-prrn_enabled-in.patch b/patches.suse/powerpc-numa-remove-vphn_enabled-and-prrn_enabled-in.patch index 317a2fe..5de5b95 100644 --- a/patches.suse/powerpc-numa-remove-vphn_enabled-and-prrn_enabled-in.patch +++ b/patches.suse/powerpc-numa-remove-vphn_enabled-and-prrn_enabled-in.patch @@ -21,6 +21,7 @@ Acked-by: Michal Suchanek 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c +index 8415481a7f13..8749d7f2b1a6 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1130,8 +1130,6 @@ struct topology_update_data { @@ -31,7 +32,7 @@ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c -static const int prrn_enabled; static int topology_inited; - static long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity) + /* @@ -1292,7 +1290,7 @@ int numa_update_cpu_topology(bool cpus_locked) struct device *dev; int weight, new_nid, i = 0; @@ -51,5 +52,5 @@ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c static int topology_update_init(void) -- -2.26.2 +2.31.1 diff --git a/patches.suse/powerpc-numa-stub-out-numa_update_cpu_topology.patch b/patches.suse/powerpc-numa-stub-out-numa_update_cpu_topology.patch index 5eaa977..3fdcbe8 100644 --- a/patches.suse/powerpc-numa-stub-out-numa_update_cpu_topology.patch +++ b/patches.suse/powerpc-numa-stub-out-numa_update_cpu_topology.patch @@ -28,8 +28,8 @@ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1122,14 +1122,6 @@ u64 memory_hotplug_max(void) - #include "vphn.h" - + /* Virtual Processor Home Node (VPHN) support */ + #ifdef CONFIG_PPC_SPLPAR -struct topology_update_data { - struct topology_update_data *next; - unsigned int cpu; @@ -40,7 +40,7 @@ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c -static cpumask_t cpu_associativity_changes_mask; static int topology_inited; - static long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity) + /* @@ -1219,192 +1211,9 @@ int find_and_online_cpu_nid(int cpu) return new_nid; } @@ -236,5 +236,5 @@ diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c int arch_update_cpu_topology(void) -- -2.26.2 +2.31.1 diff --git a/patches.suse/powerpc-perf-Drop-the-case-of-returning-0-as-instruc.patch b/patches.suse/powerpc-perf-Drop-the-case-of-returning-0-as-instruc.patch new file mode 100644 index 0000000..ff7698a --- /dev/null +++ b/patches.suse/powerpc-perf-Drop-the-case-of-returning-0-as-instruc.patch @@ -0,0 +1,38 @@ +From cc90c6742ef5b438f4cb86029d7a794bd0a44a06 Mon Sep 17 00:00:00 2001 +From: Kajol Jain +Date: Wed, 18 Aug 2021 22:45:55 +0530 +Subject: [PATCH] powerpc/perf: Drop the case of returning 0 as instruction + pointer + +References: bsc#1065729 +Patch-mainline: v5.15-rc1 +Git-commit: cc90c6742ef5b438f4cb86029d7a794bd0a44a06 + +Drop the case of returning 0 as instruction pointer since kernel +never executes at 0 and userspace almost never does either. + +Fixes: e6878835ac47 ("powerpc/perf: Sample only if SIAR-Valid bit is set in P7+") +Signed-off-by: Kajol Jain +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20210818171556.36912-2-kjain@linux.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/perf/core-book3s.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c +index 3a782a35100d..9bb466d2d99e 100644 +--- a/arch/powerpc/perf/core-book3s.c ++++ b/arch/powerpc/perf/core-book3s.c +@@ -2270,8 +2270,6 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) + return regs->nip; + } else if (use_siar && siar_valid(regs)) + return siar + perf_ip_adjust(regs); +- else if (use_siar) +- return 0; // no valid instruction pointer + else + return regs->nip; + } +-- +2.31.1 + diff --git a/patches.suse/powerpc-perf-Fix-crash-in-perf_instruction_pointer-w.patch b/patches.suse/powerpc-perf-Fix-crash-in-perf_instruction_pointer-w.patch new file mode 100644 index 0000000..0ee6081 --- /dev/null +++ b/patches.suse/powerpc-perf-Fix-crash-in-perf_instruction_pointer-w.patch @@ -0,0 +1,68 @@ +From 60b7ed54a41b550d50caf7f2418db4a7e75b5bdc Mon Sep 17 00:00:00 2001 +From: Athira Rajeev +Date: Thu, 17 Jun 2021 13:55:06 -0400 +Subject: [PATCH] powerpc/perf: Fix crash in perf_instruction_pointer() when + ppmu is not set + +References: bsc#1065729 +Patch-mainline: v5.13-rc7 +Git-commit: 60b7ed54a41b550d50caf7f2418db4a7e75b5bdc + +On systems without any specific PMU driver support registered, running +perf record causes Oops. + +The relevant portion from call trace: + + BUG: Kernel NULL pointer dereference on read at 0x00000040 + Faulting instruction address: 0xc0021f0c + Oops: Kernel access of bad area, sig: 11 [#1] + BE PAGE_SIZE=4K PREEMPT CMPCPRO + SAF3000 DIE NOTIFICATION + CPU: 0 PID: 442 Comm: null_syscall Not tainted 5.13.0-rc6-s3k-dev-01645-g7649ee3d2957 #5164 + NIP: c0021f0c LR: c00e8ad8 CTR: c00d8a5c + NIP perf_instruction_pointer+0x10/0x60 + LR perf_prepare_sample+0x344/0x674 + Call Trace: + perf_prepare_sample+0x7c/0x674 (unreliable) + perf_event_output_forward+0x3c/0x94 + __perf_event_overflow+0x74/0x14c + perf_swevent_hrtimer+0xf8/0x170 + __hrtimer_run_queues.constprop.0+0x160/0x318 + hrtimer_interrupt+0x148/0x3b0 + timer_interrupt+0xc4/0x22c + Decrementer_virt+0xb8/0xbc + +During perf record session, perf_instruction_pointer() is called to +capture the sample IP. This function in core-book3s accesses +ppmu->flags. If a platform specific PMU driver is not registered, ppmu +is set to NULL and accessing its members results in a crash. Fix this +crash by checking if ppmu is set. + +Fixes: 2ca13a4cc56c ("powerpc/perf: Use regs->nip when SIAR is zero") +Cc: stable@vger.kernel.org # v5.11+ +Reported-by: Christophe Leroy +Signed-off-by: Athira Rajeev +Tested-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/1623952506-1431-1-git-send-email-atrajeev@linux.vnet.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/perf/core-book3s.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c +index 16d4d1b6a1ff..51622411a7cc 100644 +--- a/arch/powerpc/perf/core-book3s.c ++++ b/arch/powerpc/perf/core-book3s.c +@@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) + bool use_siar = regs_use_siar(regs); + unsigned long siar = mfspr(SPRN_SIAR); + +- if (ppmu->flags & PPMU_P10_DD1) { ++ if (ppmu && (ppmu->flags & PPMU_P10_DD1)) { + if (siar) + return siar; + else +-- +2.31.1 + diff --git a/patches.suse/powerpc-perf-Fix-the-check-for-SIAR-value.patch b/patches.suse/powerpc-perf-Fix-the-check-for-SIAR-value.patch new file mode 100644 index 0000000..3c96411 --- /dev/null +++ b/patches.suse/powerpc-perf-Fix-the-check-for-SIAR-value.patch @@ -0,0 +1,56 @@ +From 3c69a5f22223fa3e312689ec218b5059784d49d7 Mon Sep 17 00:00:00 2001 +From: Kajol Jain +Date: Wed, 18 Aug 2021 22:45:56 +0530 +Subject: [PATCH] powerpc/perf: Fix the check for SIAR value + +References: bsc#1065729 +Patch-mainline: v5.15-rc1 +Git-commit: 3c69a5f22223fa3e312689ec218b5059784d49d7 + +Incase of random sampling, there can be scenarios where +Sample Instruction Address Register(SIAR) may not latch +to the sampled instruction and could result in +the value of 0. In these scenarios it is preferred to +return regs->nip. These corner cases are seen in the +previous generation (p9) also. + +Patch adds the check for SIAR value along with regs_use_siar +and siar_valid checks so that the function will return +regs->nip incase SIAR is zero. + +Patch drops the code under PPMU_P10_DD1 flag check +which handles SIAR 0 case only for Power10 DD1. + +Fixes: 2ca13a4cc56c9 ("powerpc/perf: Use regs->nip when SIAR is zero") +Signed-off-by: Kajol Jain +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20210818171556.36912-3-kjain@linux.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/perf/core-book3s.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c +index 9bb466d2d99e..73e62e9b179b 100644 +--- a/arch/powerpc/perf/core-book3s.c ++++ b/arch/powerpc/perf/core-book3s.c +@@ -2260,15 +2260,9 @@ unsigned long perf_misc_flags(struct pt_regs *regs) + */ + unsigned long perf_instruction_pointer(struct pt_regs *regs) + { +- bool use_siar = regs_use_siar(regs); + unsigned long siar = mfspr(SPRN_SIAR); + +- if (ppmu && (ppmu->flags & PPMU_P10_DD1)) { +- if (siar) +- return siar; +- else +- return regs->nip; +- } else if (use_siar && siar_valid(regs)) ++ if (regs_use_siar(regs) && siar_valid(regs) && siar) + return siar + perf_ip_adjust(regs); + else + return regs->nip; +-- +2.31.1 + diff --git a/patches.suse/powerpc-perf-Use-regs-nip-when-SIAR-is-zero.patch b/patches.suse/powerpc-perf-Use-regs-nip-when-SIAR-is-zero.patch new file mode 100644 index 0000000..320ac01 --- /dev/null +++ b/patches.suse/powerpc-perf-Use-regs-nip-when-SIAR-is-zero.patch @@ -0,0 +1,65 @@ +From 2ca13a4cc56c920a6c9fc8ee45d02bccacd7f46c Mon Sep 17 00:00:00 2001 +From: Madhavan Srinivasan +Date: Wed, 21 Oct 2020 14:23:29 +0530 +Subject: [PATCH] powerpc/perf: Use regs->nip when SIAR is zero + +References: bsc#1065729 +Patch-mainline: v5.11-rc1 +Git-commit: 2ca13a4cc56c920a6c9fc8ee45d02bccacd7f46c + +In power10 DD1, there is an issue where the SIAR (Sampled Instruction +Address Register) is not latching to the sampled address during random +sampling. This results in value of 0s in the SIAR. Add a check to use +regs->nip when SIAR is zero. + +Signed-off-by: Madhavan Srinivasan +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20201021085329.384535-5-maddy@linux.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/perf/core-book3s.c | 21 +++++++++++++++++---- + 1 file changed, 17 insertions(+), 4 deletions(-) + +diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c +index 6be0349e01ad..3c8c6ce634c5 100644 +--- a/arch/powerpc/perf/core-book3s.c ++++ b/arch/powerpc/perf/core-book3s.c +@@ -263,9 +263,16 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs) + * DD1. + */ + if (marked && (ppmu->flags & PPMU_P10_DD1)) { +- if (is_kernel_addr(mfspr(SPRN_SIAR))) +- return PERF_RECORD_MISC_KERNEL; +- return PERF_RECORD_MISC_USER; ++ unsigned long siar = mfspr(SPRN_SIAR); ++ if (siar) { ++ if (is_kernel_addr(siar)) ++ return PERF_RECORD_MISC_KERNEL; ++ return PERF_RECORD_MISC_USER; ++ } else { ++ if (is_kernel_addr(regs->nip)) ++ return PERF_RECORD_MISC_KERNEL; ++ return PERF_RECORD_MISC_USER; ++ } + } + + /* +@@ -2199,8 +2206,14 @@ unsigned long perf_misc_flags(struct pt_regs *regs) + unsigned long perf_instruction_pointer(struct pt_regs *regs) + { + bool use_siar = regs_use_siar(regs); ++ unsigned long siar = mfspr(SPRN_SIAR); + +- if (use_siar && siar_valid(regs)) ++ if (ppmu->flags & PPMU_P10_DD1) { ++ if (siar) ++ return siar; ++ else ++ return regs->nip; ++ } else if (use_siar && siar_valid(regs)) + return mfspr(SPRN_SIAR) + perf_ip_adjust(regs); + else if (use_siar) + return 0; // no valid instruction pointer +-- +2.31.1 + diff --git a/patches.suse/powerpc-perf-Use-stack-siar-instead-of-mfspr.patch b/patches.suse/powerpc-perf-Use-stack-siar-instead-of-mfspr.patch new file mode 100644 index 0000000..55ea7ca --- /dev/null +++ b/patches.suse/powerpc-perf-Use-stack-siar-instead-of-mfspr.patch @@ -0,0 +1,38 @@ +From b1643084d164cea0c107a39bcdf0119fc52619af Mon Sep 17 00:00:00 2001 +From: Kajol Jain +Date: Wed, 18 Aug 2021 22:45:54 +0530 +Subject: [PATCH] powerpc/perf: Use stack siar instead of mfspr + +References: bsc#1065729 +Patch-mainline: v5.15-rc1 +Git-commit: b1643084d164cea0c107a39bcdf0119fc52619af + +Minor optimization in the 'perf_instruction_pointer' function code by +making use of stack siar instead of mfspr. + +Fixes: 75382aa72f06 ("powerpc/perf: Move code to select SIAR or pt_regs into perf_read_regs") +Signed-off-by: Kajol Jain +Tested-by: Nageswara R Sastry +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20210818171556.36912-1-kjain@linux.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/perf/core-book3s.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c +index 91203ed9d0ff..3a782a35100d 100644 +--- a/arch/powerpc/perf/core-book3s.c ++++ b/arch/powerpc/perf/core-book3s.c +@@ -2269,7 +2269,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) + else + return regs->nip; + } else if (use_siar && siar_valid(regs)) +- return mfspr(SPRN_SIAR) + perf_ip_adjust(regs); ++ return siar + perf_ip_adjust(regs); + else if (use_siar) + return 0; // no valid instruction pointer + else +-- +2.31.1 + diff --git a/patches.suse/powerpc-perf-Use-the-address-from-SIAR-register-to-s.patch b/patches.suse/powerpc-perf-Use-the-address-from-SIAR-register-to-s.patch new file mode 100644 index 0000000..aeaa609 --- /dev/null +++ b/patches.suse/powerpc-perf-Use-the-address-from-SIAR-register-to-s.patch @@ -0,0 +1,70 @@ +From d9f7088dd6d8859f385565ca8acd2681e1f700f9 Mon Sep 17 00:00:00 2001 +From: Athira Rajeev +Date: Wed, 21 Oct 2020 14:23:27 +0530 +Subject: [PATCH] powerpc/perf: Use the address from SIAR register to set + cpumode flags + +References: bsc#1065729 +Patch-mainline: v5.11-rc1 +Git-commit: d9f7088dd6d8859f385565ca8acd2681e1f700f9 + +While setting the processor mode for any sample, perf_get_misc_flags() +expects the privilege level to differentiate the userspace and kernel +address. On power10 DD1, there is an issue that causes MSR_HV MSR_PR +bits of Sampled Instruction Event Register (SIER) not to be set for +marked events. Hence add a check to use the address in SIAR (Sampled +Instruction Address Register) to identify the privilege level. + +Signed-off-by: Athira Rajeev +Signed-off-by: Madhavan Srinivasan +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20201021085329.384535-3-maddy@linux.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/perf/core-book3s.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c +index 3b62dbb94796..6be0349e01ad 100644 +--- a/arch/powerpc/perf/core-book3s.c ++++ b/arch/powerpc/perf/core-book3s.c +@@ -250,10 +250,24 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs) + static inline u32 perf_get_misc_flags(struct pt_regs *regs) + { + bool use_siar = regs_use_siar(regs); ++ unsigned long mmcra = regs->dsisr; ++ int marked = mmcra & MMCRA_SAMPLE_ENABLE; + + if (!use_siar) + return perf_flags_from_msr(regs); + ++ /* ++ * Check the address in SIAR to identify the ++ * privilege levels since the SIER[MSR_HV, MSR_PR] ++ * bits are not set for marked events in power10 ++ * DD1. ++ */ ++ if (marked && (ppmu->flags & PPMU_P10_DD1)) { ++ if (is_kernel_addr(mfspr(SPRN_SIAR))) ++ return PERF_RECORD_MISC_KERNEL; ++ return PERF_RECORD_MISC_USER; ++ } ++ + /* + * If we don't have flags in MMCRA, rather than using + * the MSR, we intuit the flags from the address in +-- +2.31.1 + +diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h +index e60aeb46d6a0..ae78f3e58b5e 100644 +--- a/arch/powerpc/include/asm/perf_event_server.h ++++ b/arch/powerpc/include/asm/perf_event_server.h +@@ -73,6 +73,7 @@ struct power_pmu { + #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ + #define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ + #define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */ ++#define PPMU_P10_DD1 0 /* P10 not supported, ignore */ + + /* + * Values for flags to get_alternatives() diff --git a/patches.suse/powerpc-perf-hv-gpci-Fix-counter-value-parsing.patch b/patches.suse/powerpc-perf-hv-gpci-Fix-counter-value-parsing.patch new file mode 100644 index 0000000..774f7a3 --- /dev/null +++ b/patches.suse/powerpc-perf-hv-gpci-Fix-counter-value-parsing.patch @@ -0,0 +1,72 @@ +From f9addd85fbfacf0d155e83dbee8696d6df5ed0c7 Mon Sep 17 00:00:00 2001 +From: Kajol Jain +Date: Fri, 13 Aug 2021 13:51:58 +0530 +Subject: [PATCH] powerpc/perf/hv-gpci: Fix counter value parsing + +References: bsc#1065729 +Patch-mainline: v5.15-rc1 +Git-commit: f9addd85fbfacf0d155e83dbee8696d6df5ed0c7 + +H_GetPerformanceCounterInfo (0xF080) hcall returns the counter data in +the result buffer. Result buffer has specific format defined in the PAPR +specification. One of the fields is counter offset and width of the +counter data returned. + +Counter data are returned in a unsigned char array in big endian byte +order. To get the final counter data, the values must be left shifted +byte at a time. But commit 220a0c609ad17 ("powerpc/perf: Add support for +the hv gpci (get performance counter info) interface") made the shifting +bitwise and also assumed little endian order. Because of that, hcall +counters values are reported incorrectly. + +In particular this can lead to counters go backwards which messes up the +counter prev vs now calculation and leads to huge counter value +reporting: + + #: perf stat -e hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + -C 0 -I 1000 + time counts unit events + 1.000078854 18,446,744,073,709,535,232 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 2.000213293 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 3.000320107 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 4.000428392 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 5.000537864 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 6.000649087 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 7.000760312 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 8.000865218 16,448 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 9.000978985 18,446,744,073,709,535,232 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 10.001088891 16,384 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 11.001201435 0 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + 12.001307937 18,446,744,073,709,535,232 hv_gpci/system_tlbie_count_and_time_tlbie_instructions_issued/ + +Fix the shifting logic to correct match the format, ie. read bytes in +big endian order. + +Fixes: e4f226b1580b ("powerpc/perf/hv-gpci: Increase request buffer size") +Cc: stable@vger.kernel.org # v4.6+ +Reported-by: Nageswara R Sastry +Signed-off-by: Kajol Jain +Tested-by: Nageswara R Sastry +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20210813082158.429023-1-kjain@linux.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/perf/hv-gpci.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c +index d48413e28c39..c756228a081f 100644 +--- a/arch/powerpc/perf/hv-gpci.c ++++ b/arch/powerpc/perf/hv-gpci.c +@@ -175,7 +175,7 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, + */ + count = 0; + for (i = offset; i < offset + length; i++) +- count |= arg->bytes[i] << (i - offset); ++ count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); + + *value = count; + out: +-- +2.31.1 + diff --git a/patches.suse/powerpc-powernv-Fix-machine-check-reporting-of-async.patch b/patches.suse/powerpc-powernv-Fix-machine-check-reporting-of-async.patch new file mode 100644 index 0000000..0d201f6 --- /dev/null +++ b/patches.suse/powerpc-powernv-Fix-machine-check-reporting-of-async.patch @@ -0,0 +1,108 @@ +From 3729e0ec59a20825bd4c8c70996b2df63915e1dd Mon Sep 17 00:00:00 2001 +From: Nicholas Piggin +Date: Tue, 18 May 2021 00:03:55 +1000 +Subject: [PATCH] powerpc/powernv: Fix machine check reporting of async store + errors + +References: bsc#1065729 +Patch-mainline: v5.14-rc1 +Git-commit: 3729e0ec59a20825bd4c8c70996b2df63915e1dd + +POWER9 and POWER10 asynchronous machine checks due to stores have their +cause reported in SRR1 but SRR1[42] is set, which in other cases +indicates DSISR cause. + +Check for these cases and clear SRR1[42], so the cause matching uses +the i-side (SRR1) table. + +Fixes: 7b9f71f974a1 ("powerpc/64s: POWER9 machine check handler") +Fixes: 201220bb0e8c ("powerpc/powernv: Machine check handler for POWER10") +Signed-off-by: Nicholas Piggin +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20210517140355.2325406-1-npiggin@gmail.com +Acked-by: Michal Suchanek +--- + arch/powerpc/kernel/mce_power.c | 48 +++++++++++++++++++++++++++------ + 1 file changed, 40 insertions(+), 8 deletions(-) + +diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c +--- a/arch/powerpc/kernel/mce_power.c ++++ b/arch/powerpc/kernel/mce_power.c +@@ -481,11 +481,10 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, + MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + { 0, false, 0, 0, 0, 0 } }; + +-static int mce_handle_ierror(struct pt_regs *regs, ++static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1, + const struct mce_ierror_table table[], + struct mce_error_info *mce_err, uint64_t *addr) + { +- uint64_t srr1 = regs->msr; + int handled = 0; + int i; + +@@ -695,18 +694,18 @@ static long mce_handle_ue_error(struct pt_regs *regs, + } + + static long mce_handle_error(struct pt_regs *regs, ++ unsigned long srr1, + const struct mce_derror_table dtable[], + const struct mce_ierror_table itable[]) + { + struct mce_error_info mce_err = { 0 }; + uint64_t addr; +- uint64_t srr1 = regs->msr; + long handled; + + if (SRR1_MC_LOADSTORE(srr1)) + handled = mce_handle_derror(regs, dtable, &mce_err, &addr); + else +- handled = mce_handle_ierror(regs, itable, &mce_err, &addr); ++ handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr); + + if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE) + handled = mce_handle_ue_error(regs); +@@ -723,16 +722,20 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) + /* P7 DD1 leaves top bits of DSISR undefined */ + regs->dsisr &= 0x0000ffff; + +- return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table); ++ return mce_handle_error(regs, regs->msr, ++ mce_p7_derror_table, mce_p7_ierror_table); + } + + long __machine_check_early_realmode_p8(struct pt_regs *regs) + { +- return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table); ++ return mce_handle_error(regs, regs->msr, ++ mce_p8_derror_table, mce_p8_ierror_table); + } + + long __machine_check_early_realmode_p9(struct pt_regs *regs) + { ++ unsigned long srr1 = regs->msr; ++ + /* + * On POWER9 DD2.1 and below, it's possible to get a machine check + * caused by a paste instruction where only DSISR bit 25 is set. This +@@ -746,5 +749,19 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs) + if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000) + return 1; + +- return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table); ++ /* ++ * Async machine check due to bad real address from store or foreign ++ * link time out comes with the load/store bit (PPC bit 42) set in ++ * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're ++ * directed to the ierror table so it will find the cause (which ++ * describes it correctly as a store error). ++ */ ++ if (SRR1_MC_LOADSTORE(srr1) && ++ ((srr1 & 0x081c0000) == 0x08140000 || ++ (srr1 & 0x081c0000) == 0x08180000)) { ++ srr1 &= ~PPC_BIT(42); ++ } ++ ++ return mce_handle_error(regs, srr1, ++ mce_p9_derror_table, mce_p9_ierror_table); + } diff --git a/patches.suse/powerpc-pseries-Move-mm-book3s64-vphn.c-under-platfo.patch b/patches.suse/powerpc-pseries-Move-mm-book3s64-vphn.c-under-platfo.patch new file mode 100644 index 0000000..bbf6c58 --- /dev/null +++ b/patches.suse/powerpc-pseries-Move-mm-book3s64-vphn.c-under-platfo.patch @@ -0,0 +1,192 @@ +From 5a1ea4774ddc2c6bc3ba1415880091eccf1a901e Mon Sep 17 00:00:00 2001 +From: "Naveen N. Rao" +Date: Wed, 3 Jul 2019 22:33:59 +0530 +Subject: [PATCH] powerpc/pseries: Move mm/book3s64/vphn.c under + platforms/pseries/ + +References: bsc#1190914 +Patch-mainline: v5.3-rc1 +Git-commit: 5a1ea4774ddc2c6bc3ba1415880091eccf1a901e (partial) + +hcall_vphn() is specific to pseries and will be used in a subsequent +patch. So, move it to a more appropriate place under +arch/powerpc/platforms/pseries. Also merge vphn.h into lppaca.h +and update vphn selftest to use the new files. + +Signed-off-by: Naveen N. Rao +Signed-off-by: Michael Ellerman +Acked-by: Michal Suchanek +--- + arch/powerpc/include/asm/lppaca.h | 24 +++++++++++++++++++ + arch/powerpc/mm/book3s64/Makefile | 1 - + arch/powerpc/mm/book3s64/vphn.h | 24 ------------------- + arch/powerpc/mm/numa.c | 14 ----------- + arch/powerpc/platforms/pseries/Makefile | 1 + + .../{mm/book3s64 => platforms/pseries}/vphn.c | 20 ++++++++++++++-- + tools/testing/selftests/powerpc/vphn/Makefile | 2 +- + .../selftests/powerpc/vphn/asm/lppaca.h | 1 + + tools/testing/selftests/powerpc/vphn/vphn.c | 2 +- + tools/testing/selftests/powerpc/vphn/vphn.h | 1 - + 10 files changed, 46 insertions(+), 44 deletions(-) + delete mode 100644 arch/powerpc/mm/book3s64/vphn.h + rename arch/powerpc/{mm/book3s64 => platforms/pseries}/vphn.c (80%) + create mode 120000 tools/testing/selftests/powerpc/vphn/asm/lppaca.h + delete mode 120000 tools/testing/selftests/powerpc/vphn/vphn.h + +diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h +--- a/arch/powerpc/include/asm/lppaca.h ++++ b/arch/powerpc/include/asm/lppaca.h +@@ -18,6 +18,29 @@ + */ + #ifndef _ASM_POWERPC_LPPACA_H + #define _ASM_POWERPC_LPPACA_H ++ ++/* ++ * The below VPHN macros are outside the __KERNEL__ check since these are ++ * used for compiling the vphn selftest in userspace ++ */ ++ ++/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */ ++#define VPHN_REGISTER_COUNT 6 ++ ++/* ++ * 6 64-bit registers unpacked into up to 24 be32 associativity values. To ++ * form the complete property we have to add the length in the first cell. ++ */ ++#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1) ++ ++/* ++ * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags: ++ * 1 for retrieving associativity information for a guest cpu ++ * 2 for retrieving associativity information for a host/hypervisor cpu ++ */ ++#define VPHN_FLAG_VCPU 1 ++#define VPHN_FLAG_PCPU 2 ++ + #ifdef __KERNEL__ + + /* +@@ -160,6 +183,7 @@ extern struct kmem_cache *dtl_cache; + * called once for each DTL entry that gets processed. + */ + extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); ++extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); + + #endif /* CONFIG_PPC_BOOK3S */ + #endif /* __KERNEL__ */ +diff --git a/arch/powerpc/mm/vphn.h b/arch/powerpc/mm/vphn.h +--- a/arch/powerpc/mm/vphn.h ++++ b/arch/powerpc/mm/vphn.h +@@ -1,23 +0,1 @@ ++#error This file should not be used anymore. +-#ifndef _ARCH_POWERPC_MM_VPHN_H_ +-#define _ARCH_POWERPC_MM_VPHN_H_ +- +-/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */ +-#define VPHN_REGISTER_COUNT 6 +- +-/* +- * 6 64-bit registers unpacked into up to 24 be32 associativity values. To +- * form the complete property we have to add the length in the first cell. +- */ +-#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1) +- +-/* +- * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags: +- * 1 for retrieving associativity information for a guest cpu +- * 2 for retrieving associativity information for a host/hypervisor cpu +- */ +-#define VPHN_FLAG_VCPU 1 +-#define VPHN_FLAG_PCPU 2 +- +-extern int vphn_unpack_associativity(const long *packed, __be32 *unpacked); +- +-#endif +diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c +--- a/arch/powerpc/mm/numa.c ++++ b/arch/powerpc/mm/numa.c +@@ -1059,9 +1059,6 @@ u64 memory_hotplug_max(void) + + /* Virtual Processor Home Node (VPHN) support */ + #ifdef CONFIG_PPC_SPLPAR +- +-#include "vphn.h" +- + struct topology_update_data { + struct topology_update_data *next; + unsigned int cpu; +@@ -1079,17 +1076,6 @@ static void reset_topology_timer(void); + static int topology_timer_secs = 1; + static int topology_inited; + +-static long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity) +-{ +- long rc; +- long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; +- +- rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu); +- vphn_unpack_associativity(retbuf, associativity); +- +- return rc; +-} +- + /* + * Change polling interval for associativity changes. + */ +--- a/arch/powerpc/mm/vphn.c ++++ b/arch/powerpc/mm/vphn.c +@@ -1,5 +1,5 @@ + #include +-#include "vphn.h" ++#include + + /* + * The associativity domain numbers are returned from the hypervisor as a +@@ -21,7 +21,7 @@ + * + * Convert to the sequence they would appear in the ibm,associativity property. + */ +-int vphn_unpack_associativity(const long *packed, __be32 *unpacked) ++static int vphn_unpack_associativity(const long *packed, __be32 *unpacked) + { + __be64 be_packed[VPHN_REGISTER_COUNT]; + int i, nr_assoc_doms = 0; +@@ -70,3 +70,19 @@ int vphn_unpack_associativity(const long + + return nr_assoc_doms; + } ++ ++/* NOTE: This file is included by a selftest and built in userspace. */ ++#ifdef __KERNEL__ ++#include ++ ++long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity) ++{ ++ long rc; ++ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; ++ ++ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu); ++ vphn_unpack_associativity(retbuf, associativity); ++ ++ return rc; ++} ++#endif +diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile +index f8ced26748f8..2ed787414e25 100644 +--- a/tools/testing/selftests/powerpc/vphn/Makefile ++++ b/tools/testing/selftests/powerpc/vphn/Makefile +@@ -1,6 +1,6 @@ + TEST_GEN_PROGS := test-vphn + +-CFLAGS += -m64 ++CFLAGS += -m64 -I$(CURDIR) + + include ../../lib.mk + +diff --git a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h +new file mode 100644 +index 000000000000..30777dc2968d +--- /dev/null ++++ b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h +@@ -0,0 +1 @@ ++#include "../../../../../../arch/powerpc/include/asm/lppaca.h" diff --git a/patches.suse/powerpc-pseries-dlpar-use-rtas_get_sensor.patch b/patches.suse/powerpc-pseries-dlpar-use-rtas_get_sensor.patch new file mode 100644 index 0000000..de55646 --- /dev/null +++ b/patches.suse/powerpc-pseries-dlpar-use-rtas_get_sensor.patch @@ -0,0 +1,46 @@ +From bfb0c9fcf5870036e54081b28cae2af5f9ee7088 Mon Sep 17 00:00:00 2001 +From: Nathan Lynch +Date: Mon, 3 May 2021 21:53:29 -0500 +Subject: [PATCH] powerpc/pseries/dlpar: use rtas_get_sensor() + +References: bsc#1065729 +Patch-mainline: v5.14-rc1 +Git-commit: bfb0c9fcf5870036e54081b28cae2af5f9ee7088 + +Instead of making bare calls to get-sensor-state, use +rtas_get_sensor(), which correctly handles busy and extended delay +statuses. + +Fixes: ab519a011caa ("powerpc/pseries: Kernel DLPAR Infrastructure") +Signed-off-by: Nathan Lynch +Reviewed-by: Laurent Dufour +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20210504025329.1713878-1-nathanl@linux.ibm.com +Acked-by: Michal Suchanek +--- + arch/powerpc/platforms/pseries/dlpar.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c +--- a/arch/powerpc/platforms/pseries/dlpar.c ++++ b/arch/powerpc/platforms/pseries/dlpar.c +@@ -289,8 +289,7 @@ int dlpar_acquire_drc(u32 drc_index) + { + int dr_status, rc; + +- rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, +- DR_ENTITY_SENSE, drc_index); ++ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); + if (rc || dr_status != DR_ENTITY_UNUSABLE) + return -1; + +@@ -311,8 +310,7 @@ int dlpar_release_drc(u32 drc_index) + { + int dr_status, rc; + +- rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, +- DR_ENTITY_SENSE, drc_index); ++ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status); + if (rc || dr_status != DR_ENTITY_PRESENT) + return -1; + diff --git a/patches.suse/scsi-qla2xxx-Fix-spelling-mistakes-allloc-alloc.patch b/patches.suse/scsi-qla2xxx-Fix-spelling-mistakes-allloc-alloc.patch index eccc842..333843e 100644 --- a/patches.suse/scsi-qla2xxx-Fix-spelling-mistakes-allloc-alloc.patch +++ b/patches.suse/scsi-qla2xxx-Fix-spelling-mistakes-allloc-alloc.patch @@ -3,7 +3,7 @@ Date: Thu, 29 Jul 2021 09:24:13 +0100 Subject: scsi: qla2xxx: Fix spelling mistakes "allloc" -> "alloc" Patch-mainline: v5.15-rc1 Git-commit: 0f783c2d640ac03ad3bb3ba6b7a1287ddf18031d -References: bsc#1189384 +References: bsc#1189392 There are two spelling mistakes with the same triple l in alloc, one in a comment, the other in a ql_dbg() debug message. Fix them. diff --git a/patches.suse/scsi-qla2xxx-Fix-use-after-free-in-debug-code.patch b/patches.suse/scsi-qla2xxx-Fix-use-after-free-in-debug-code.patch index 3703aa5..b2d0f46 100644 --- a/patches.suse/scsi-qla2xxx-Fix-use-after-free-in-debug-code.patch +++ b/patches.suse/scsi-qla2xxx-Fix-use-after-free-in-debug-code.patch @@ -3,7 +3,7 @@ Date: Tue, 3 Aug 2021 18:56:25 +0300 Subject: scsi: qla2xxx: Fix use after free in debug code Patch-mainline: v5.15-rc1 Git-commit: e3d2612f583ba6e234cb7fe4559132c8f28905f1 -References: bsc#1189384 +References: bsc#1189392 The sp->free(sp); call frees "sp" and then the debug code dereferences it on the next line. Swap the order. diff --git a/patches.suse/scsi-qla2xxx-Remove-redundant-initialization-of-vari.patch b/patches.suse/scsi-qla2xxx-Remove-redundant-initialization-of-vari.patch index db34c05..2e912a5 100644 --- a/patches.suse/scsi-qla2xxx-Remove-redundant-initialization-of-vari.patch +++ b/patches.suse/scsi-qla2xxx-Remove-redundant-initialization-of-vari.patch @@ -3,7 +3,7 @@ Date: Wed, 4 Aug 2021 14:13:44 +0100 Subject: scsi: qla2xxx: Remove redundant initialization of variable num_cnt Patch-mainline: v5.15-rc1 Git-commit: 77d0f07abada8c9aeb54caba879a298a0b94c02a -References: bsc#1189384 +References: bsc#1189392 The variable num_cnt is being initialized with a value that is never read, it is being updated later on. The assignment is redundant and can be diff --git a/patches.suse/scsi-qla2xxx-Update-version-to-10.02.00.107-k.patch b/patches.suse/scsi-qla2xxx-Update-version-to-10.02.00.107-k.patch index 92aca06..f140a76 100644 --- a/patches.suse/scsi-qla2xxx-Update-version-to-10.02.00.107-k.patch +++ b/patches.suse/scsi-qla2xxx-Update-version-to-10.02.00.107-k.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:26:06 -0700 Subject: scsi: qla2xxx: Update version to 10.02.00.107-k Patch-mainline: v5.15-rc1 Git-commit: 9798c653547d35cebef59d35edbbc269d85fb1b3 -References: bsc#1189384 +References: bsc#1189392 Link: https://lore.kernel.org/r/20210624052606.21613-12-njavali@marvell.com Signed-off-by: Nilesh Javali diff --git a/patches.suse/scsi-qla2xxx-edif-Add-authentication-pass-fail-bsgs.patch b/patches.suse/scsi-qla2xxx-edif-Add-authentication-pass-fail-bsgs.patch index 135c897..051f880 100644 --- a/patches.suse/scsi-qla2xxx-edif-Add-authentication-pass-fail-bsgs.patch +++ b/patches.suse/scsi-qla2xxx-edif-Add-authentication-pass-fail-bsgs.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:26:01 -0700 Subject: scsi: qla2xxx: edif: Add authentication pass + fail bsgs Patch-mainline: v5.15-rc1 Git-commit: 8a4bb2c1dd623b5a71609de5b04ef3b5086b0a3e -References: bsc#1189384 +References: bsc#1189392 Some FC adapters from Marvell offer the ability to encrypt data in flight (EDIF). This feature requires an application to act as an authenticator. diff --git a/patches.suse/scsi-qla2xxx-edif-Add-detection-of-secure-device.patch b/patches.suse/scsi-qla2xxx-edif-Add-detection-of-secure-device.patch index 667d469..b09556e 100644 --- a/patches.suse/scsi-qla2xxx-edif-Add-detection-of-secure-device.patch +++ b/patches.suse/scsi-qla2xxx-edif-Add-detection-of-secure-device.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:26:02 -0700 Subject: scsi: qla2xxx: edif: Add detection of secure device Patch-mainline: v5.15-rc1 Git-commit: 9efea843a906c6674ac6728f3f5db2cbfa3e1830 -References: bsc#1189384 +References: bsc#1189392 Some FC adapters from Marvell offer the ability to encrypt data in flight (EDIF). This feature requires an application to act as an authenticator. diff --git a/patches.suse/scsi-qla2xxx-edif-Add-doorbell-notification-for-app.patch b/patches.suse/scsi-qla2xxx-edif-Add-doorbell-notification-for-app.patch index aaa4028..1d94617 100644 --- a/patches.suse/scsi-qla2xxx-edif-Add-doorbell-notification-for-app.patch +++ b/patches.suse/scsi-qla2xxx-edif-Add-doorbell-notification-for-app.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:26:03 -0700 Subject: scsi: qla2xxx: edif: Add doorbell notification for app Patch-mainline: v5.15-rc1 Git-commit: 7a09e8d92c6d56121910ccb2e8bc0d1affff66ee -References: bsc#1189384 +References: bsc#1189392 Some FC adapters from Marvell offer the ability to encrypt data in flight (EDIF). This feature requires an application to act as an authenticator. diff --git a/patches.suse/scsi-qla2xxx-edif-Add-encryption-to-I-O-path.patch b/patches.suse/scsi-qla2xxx-edif-Add-encryption-to-I-O-path.patch index fb5431c..aa1a877 100644 --- a/patches.suse/scsi-qla2xxx-edif-Add-encryption-to-I-O-path.patch +++ b/patches.suse/scsi-qla2xxx-edif-Add-encryption-to-I-O-path.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:26:04 -0700 Subject: scsi: qla2xxx: edif: Add encryption to I/O path Patch-mainline: v5.15-rc1 Git-commit: 44d018577f179383ea2c409f3a392e9dbd1a155e -References: bsc#1189384 +References: bsc#1189392 Some FC adapters from Marvell offer the ability to encrypt data in flight (EDIF). This feature requires an application to act as an authenticator. diff --git a/patches.suse/scsi-qla2xxx-edif-Add-extraction-of-auth_els-from-th.patch b/patches.suse/scsi-qla2xxx-edif-Add-extraction-of-auth_els-from-th.patch index 266c75b..385aec9 100644 --- a/patches.suse/scsi-qla2xxx-edif-Add-extraction-of-auth_els-from-th.patch +++ b/patches.suse/scsi-qla2xxx-edif-Add-extraction-of-auth_els-from-th.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:25:59 -0700 Subject: scsi: qla2xxx: edif: Add extraction of auth_els from the wire Patch-mainline: v5.15-rc1 Git-commit: fac2807946c10b9a509b9c348afd442fa823c5f7 -References: bsc#1189384 +References: bsc#1189392 Some FC adapters from Marvell offer the ability to encrypt data in flight (EDIF). This feature requires an application to act as an authenticator. diff --git a/patches.suse/scsi-qla2xxx-edif-Add-getfcinfo-and-statistic-bsgs.patch b/patches.suse/scsi-qla2xxx-edif-Add-getfcinfo-and-statistic-bsgs.patch index 01286c2..e1b7d6a 100644 --- a/patches.suse/scsi-qla2xxx-edif-Add-getfcinfo-and-statistic-bsgs.patch +++ b/patches.suse/scsi-qla2xxx-edif-Add-getfcinfo-and-statistic-bsgs.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:25:57 -0700 Subject: scsi: qla2xxx: edif: Add getfcinfo and statistic bsgs Patch-mainline: v5.15-rc1 Git-commit: 7878f22a2e03b69baf792f74488962981a1c9547 -References: bsc#1189384 +References: bsc#1189392 Some FC adapters from Marvell offer the ability to encrypt data in flight (EDIF). This feature requires an application to act as an authenticator. diff --git a/patches.suse/scsi-qla2xxx-edif-Add-key-update.patch b/patches.suse/scsi-qla2xxx-edif-Add-key-update.patch index ab72050..b250333 100644 --- a/patches.suse/scsi-qla2xxx-edif-Add-key-update.patch +++ b/patches.suse/scsi-qla2xxx-edif-Add-key-update.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:26:00 -0700 Subject: scsi: qla2xxx: edif: Add key update Patch-mainline: v5.15-rc1 Git-commit: dd30706e73b70d67e88fdaca688db7a3374fd5de -References: bsc#1189384 +References: bsc#1189392 Some FC adapters from Marvell offer the ability to encrypt data in flight (EDIF). This feature requires an application to act as an authenticator. diff --git a/patches.suse/scsi-qla2xxx-edif-Add-send-receive-and-accept-for-au.patch b/patches.suse/scsi-qla2xxx-edif-Add-send-receive-and-accept-for-au.patch index 8670032..3074083 100644 --- a/patches.suse/scsi-qla2xxx-edif-Add-send-receive-and-accept-for-au.patch +++ b/patches.suse/scsi-qla2xxx-edif-Add-send-receive-and-accept-for-au.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:25:58 -0700 Subject: scsi: qla2xxx: edif: Add send, receive, and accept for auth_els Patch-mainline: v5.15-rc1 Git-commit: 84318a9f01ce13650ea23eb6362066bb95ccc9fe -References: bsc#1189384 +References: bsc#1189392 Some FC adapters from Marvell offer the ability to encrypt data in flight (EDIF). This feature requires an application to act as an authenticator. diff --git a/patches.suse/scsi-qla2xxx-edif-Increment-command-and-completion-c.patch b/patches.suse/scsi-qla2xxx-edif-Increment-command-and-completion-c.patch index e901b21..8ee1c4e 100644 --- a/patches.suse/scsi-qla2xxx-edif-Increment-command-and-completion-c.patch +++ b/patches.suse/scsi-qla2xxx-edif-Increment-command-and-completion-c.patch @@ -3,7 +3,7 @@ Date: Wed, 23 Jun 2021 22:26:05 -0700 Subject: scsi: qla2xxx: edif: Increment command and completion counts Patch-mainline: v5.15-rc1 Git-commit: 71bef5020cd13e1aaa878d10481aafc1ecd4a8f6 -References: bsc#1189384 +References: bsc#1189392 Increment the command and the completion counts. diff --git a/patches.suse/suse-hv-mana-ndo_select_queue-API.patch b/patches.suse/suse-hv-mana-ndo_select_queue-API.patch new file mode 100644 index 0000000..d7dd9c9 --- /dev/null +++ b/patches.suse/suse-hv-mana-ndo_select_queue-API.patch @@ -0,0 +1,16 @@ +Subject: hv: mana: adjust mana_select_queue to old API +From: Olaf Hering +Patch-mainline: Never, old kernel +References: jsc#SLE-18779, bsc#1185727 + +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -352,7 +352,7 @@ static int mana_get_tx_queue(struct net_ + } + + static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, +- struct net_device *sb_dev) ++ struct net_device *sb_dev, select_queue_fallback_t fallback) + { + int txq; + diff --git a/patches.suse/suse-hv-mana-netdev_lockdep_set_classes.patch b/patches.suse/suse-hv-mana-netdev_lockdep_set_classes.patch new file mode 100644 index 0000000..2c88148 --- /dev/null +++ b/patches.suse/suse-hv-mana-netdev_lockdep_set_classes.patch @@ -0,0 +1,16 @@ +Subject: hv: mana: remove netdev_lockdep_set_classes usage +From: Olaf Hering +Patch-mainline: Never, old kernel +References: jsc#SLE-18779, bsc#1185727 + +--- a/drivers/net/ethernet/microsoft/mana/mana_en.c ++++ b/drivers/net/ethernet/microsoft/mana/mana_en.c +@@ -1787,7 +1787,7 @@ static int mana_probe_port(struct mana_c + if (err) + goto free_net; + +- netdev_lockdep_set_classes(ndev); ++ + + ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + ndev->hw_features |= NETIF_F_RXCSUM; diff --git a/patches.suse/usb-musb-tusb6010-uninitialized-data-in-tusb_fifo_wr.patch b/patches.suse/usb-musb-tusb6010-uninitialized-data-in-tusb_fifo_wr.patch new file mode 100644 index 0000000..77d73b0 --- /dev/null +++ b/patches.suse/usb-musb-tusb6010-uninitialized-data-in-tusb_fifo_wr.patch @@ -0,0 +1,38 @@ +From 517c7bf99bad3d6b9360558414aae634b7472d80 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Thu, 16 Sep 2021 16:57:37 +0300 +Subject: [PATCH] usb: musb: tusb6010: uninitialized data in + tusb_fifo_write_unaligned() +Git-commit: 517c7bf99bad3d6b9360558414aae634b7472d80 +References: git-fixes +Patch-mainline: v5.15-rc3 + +This is writing to the first 1 - 3 bytes of "val" and then writing all +four bytes to musb_writel(). The last byte is always going to be +garbage. Zero out the last bytes instead. + +Fixes: 550a7375fe72 ("USB: Add MUSB and TUSB support") +Signed-off-by: Dan Carpenter +Cc: stable +Link: https://lore.kernel.org/r/20210916135737.GI25094@kili +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Oliver Neukum +--- + drivers/usb/musb/tusb6010.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c +index c42937692207..c968ecda42aa 100644 +--- a/drivers/usb/musb/tusb6010.c ++++ b/drivers/usb/musb/tusb6010.c +@@ -190,6 +190,7 @@ tusb_fifo_write_unaligned(void __iomem *fifo, const u8 *buf, u16 len) + } + if (len > 0) { + /* Write the rest 1 - 3 bytes to FIFO */ ++ val = 0; + memcpy(&val, buf, len); + musb_writel(fifo, 0, val); + } +-- +2.26.2 + diff --git a/patches.suse/x86-mm-fix-kern_addr_valid-to-cope-with-existing-but-not-present-entries.patch b/patches.suse/x86-mm-fix-kern_addr_valid-to-cope-with-existing-but-not-present-entries.patch new file mode 100644 index 0000000..89c3392 --- /dev/null +++ b/patches.suse/x86-mm-fix-kern_addr_valid-to-cope-with-existing-but-not-present-entries.patch @@ -0,0 +1,116 @@ +From: Mike Rapoport +Date: Thu, 19 Aug 2021 16:27:17 +0300 +Subject: x86/mm: Fix kern_addr_valid() to cope with existing but not present + entries +Git-commit: 34b1999da935a33be6239226bfa6cd4f704c5c88 +Patch-mainline: v5.15-rc2 +References: bsc#1114648 + +Jiri Olsa reported a fault when running: + + # cat /proc/kallsyms | grep ksys_read + ffffffff8136d580 T ksys_read + # objdump -d --start-address=0xffffffff8136d580 --stop-address=0xffffffff8136d590 /proc/kcore + + /proc/kcore: file format elf64-x86-64 + + Segmentation fault + + general protection fault, probably for non-canonical address 0xf887ffcbff000: 0000 [#1] SMP PTI + CPU: 12 PID: 1079 Comm: objdump Not tainted 5.14.0-rc5qemu+ #508 + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-4.fc34 04/01/2014 + RIP: 0010:kern_addr_valid + Call Trace: + read_kcore + ? rcu_read_lock_sched_held + ? rcu_read_lock_sched_held + ? rcu_read_lock_sched_held + ? trace_hardirqs_on + ? rcu_read_lock_sched_held + ? lock_acquire + ? lock_acquire + ? rcu_read_lock_sched_held + ? lock_acquire + ? rcu_read_lock_sched_held + ? rcu_read_lock_sched_held + ? rcu_read_lock_sched_held + ? lock_release + ? _raw_spin_unlock + ? __handle_mm_fault + ? rcu_read_lock_sched_held + ? lock_acquire + ? rcu_read_lock_sched_held + ? lock_release + proc_reg_read + ? vfs_read + vfs_read + ksys_read + do_syscall_64 + entry_SYSCALL_64_after_hwframe + +The fault happens because kern_addr_valid() dereferences existent but not +present PMD in the high kernel mappings. + +Such PMDs are created when free_kernel_image_pages() frees regions larger +than 2Mb. In this case, a part of the freed memory is mapped with PMDs and +the set_memory_np_noalias() -> ... -> __change_page_attr() sequence will +mark the PMD as not present rather than wipe it completely. + +Have kern_addr_valid() check whether higher level page table entries are +present before trying to dereference them to fix this issue and to avoid +similar issues in the future. + +Stable backporting note: +------------------------ + +Note that the stable marking is for all active stable branches because +there could be cases where pagetable entries exist but are not valid - +see 9a14aefc1d28 ("x86: cpa, fix lookup_address"), for example. So make +sure to be on the safe side here and use pXY_present() accessors rather +than pXY_none() which could #GP when accessing pages in the direct map. + +Also see: + + c40a56a7818c ("x86/mm/init: Remove freed kernel image areas from alias mapping") + +for more info. + +Reported-by: Jiri Olsa +Signed-off-by: Mike Rapoport +Signed-off-by: Borislav Petkov +Reviewed-by: David Hildenbrand +Acked-by: Dave Hansen +Tested-by: Jiri Olsa +Cc: # 4.4+ +Link: https://lkml.kernel.org/r/20210819132717.19358-1-rppt@kernel.org +--- + arch/x86/mm/init_64.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c +index ddeaba947eb3..879886c6cc53 100644 +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -1433,18 +1433,18 @@ int kern_addr_valid(unsigned long addr) + return 0; + + p4d = p4d_offset(pgd, addr); +- if (p4d_none(*p4d)) ++ if (!p4d_present(*p4d)) + return 0; + + pud = pud_offset(p4d, addr); +- if (pud_none(*pud)) ++ if (!pud_present(*pud)) + return 0; + + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + + pmd = pmd_offset(pud, addr); +- if (pmd_none(*pmd)) ++ if (!pmd_present(*pmd)) + return 0; + + if (pmd_large(*pmd)) + diff --git a/series.conf b/series.conf index 4e0a19f..175bd24 100644 --- a/series.conf +++ b/series.conf @@ -49643,6 +49643,7 @@ patches.suse/RDMA-efa-Add-Amazon-EFA-driver.patch patches.suse/ipoib-Do-not-overreact-to-SM-LID-change-even.patch patches.suse/RDMA-ipoib-Allow-user-space-differentiate-between-va.patch + patches.suse/SUNRPC-Ensure-to-ratelimit-the-server-not-responding.patch patches.suse/NFS-Don-t-interrupt-file-writeout-due-to-fatal-error.patch patches.suse/NFS-Don-t-call-generic_error_remove_page-while-holdi.patch patches.suse/NFS-make-nfs_match_client-killable.patch @@ -50981,6 +50982,7 @@ patches.suse/KVM-PPC-Book3S-HV-Fix-CR0-setting-in-TM-emulation.patch patches.suse/powerpc-mm-mark-more-tlb-functions-as-__always_inlin.patch patches.suse/powerpc-pseries-Generalize-hcall_vphn.patch + patches.suse/powerpc-pseries-Move-mm-book3s64-vphn.c-under-platfo.patch patches.suse/powerpc-mm-hash-4k-Don-t-use-64K-page-size-for-vmemm.patch patches.suse/powerpc-mm-radix-Use-the-right-page-size-for-vmemmap.patch patches.suse/powerpc-mm-drconf-Use-NUMA_NO_NODE-on-failures-inste.patch @@ -55327,6 +55329,7 @@ patches.suse/random-always-use-batched-entropy-for-get_random_u-3.patch patches.suse/powerpc-drmem-avoid-NULL-pointer-dereference-when-dr.patch patches.suse/powerpc-smp-Use-nid-as-fallback-for-package_id.patch + patches.suse/powerpc-numa-Early-request-for-home-node-associativi.patch patches.suse/powerpc-numa-Remove-late-request-for-home-node-assoc.patch patches.suse/cpufreq-powernv-Fix-use-after-free.patch patches.suse/cpufreq-powernv-Fix-unsafe-notifiers.patch @@ -55571,6 +55574,7 @@ patches.suse/0001-drm-edid-Fix-off-by-one-in-DispID-DTD-pixel-clock.patch patches.suse/drm-qxl-qxl_release-leak-in-qxl_draw_dirty_fb.patch patches.suse/drm-qxl-qxl_release-leak-in-qxl_hw_surface_alloc.patch + patches.suse/drm-qxl-lost-qxl_bo_kunmap_atomic_page-in-qxl_image_.patch patches.suse/drm-qxl-qxl_release-use-after-free.patch patches.suse/ALSA-hda-Match-both-PCI-ID-and-SSID-for-driver-black.patch patches.suse/ALSA-pcm-oss-Place-the-plugin-buffer-overflow-checks.patch @@ -56805,6 +56809,7 @@ patches.suse/pseries-Fix-64-bit-logical-memory-block-panic.patch patches.suse/powerpc-vdso-Fix-vdso-cpu-truncation.patch patches.suse/powerpc-prom-Enable-Radix-GTSE-in-cpu-pa-features.patch + patches.suse/powerpc-mm-radix-Free-PUD-table-when-freeing-pagetab.patch patches.suse/powerpc-book3s64-pkeys-Use-PVR-check-instead-of-cpu-.patch patches.suse/powerpc-perf-Fix-missing-is_sier_aviable-during-buil.patch patches.suse/powerpc-pseries-PCIE-PHB-reset.patch @@ -58248,6 +58253,8 @@ patches.suse/module-delay-kobject-uevent-until-after-module-init-.patch patches.suse/powerpc-64-Set-up-a-kernel-stack-for-secondaries-bef.patch patches.suse/Revert-powerpc-pseries-hotplug-cpu-Remove-double-fre.patch + patches.suse/powerpc-perf-Use-the-address-from-SIAR-register-to-s.patch + patches.suse/powerpc-perf-Use-regs-nip-when-SIAR-is-zero.patch patches.suse/powerpc-perf-Fix-crash-with-is_sier_available-when-p.patch patches.suse/powerpc-pci-Remove-LSI-mappings-on-device-teardown.patch patches.suse/powerpc-xmon-Change-printk-to-pr_cont.patch @@ -58485,6 +58492,11 @@ patches.suse/vmxnet3-Remove-buf_info-from-device-accessible-struc.patch patches.suse/ibmvnic-rework-to-ensure-SCRQ-entry-reads-are-proper.patch patches.suse/ibmvnic-remove-unnecessary-rmb-inside-ibmvnic_poll.patch + patches.suse/ath-Use-safer-key-clearing-with-key-cache-entries.patch + patches.suse/ath9k-Clear-key-cache-explicitly-on-disabling-hardwa.patch + patches.suse/ath-Export-ath_hw_keysetmac.patch + patches.suse/ath-Modify-ath_key_delete-to-not-need-full-key-entry.patch + patches.suse/ath9k-Postpone-key-cache-entry-deletion-for-TXQ-fram.patch patches.suse/wlcore-Fix-command-execute-failure-19-for-wl12xx.patch patches.suse/net-bridge-use-switchdev-for-port-flags-set-through-.patch patches.suse/ibmvnic-prefer-unsigned-long-over-unsigned-long-int.patch @@ -58573,6 +58585,7 @@ patches.suse/ima-Free-IMA-measurement-buffer-after-kexec-syscall patches.suse/tpm_tis-Fix-check_locality-for-correct-locality-acqu.patch patches.suse/tpm_tis-clean-up-locality-release.patch + patches.suse/crypto-x86-aes-ni-xts-use-direct-calls-to-and-4-way-stride.patch patches.suse/hwrng-timeriomem-Fix-cooldown-period-calculation.patch patches.suse/mmc-usdhi6rol0-Fix-a-resource-leak-in-the-error-hand.patch patches.suse/mmc-sdhci-esdhc-imx-fix-kernel-panic-when-remove-mod.patch @@ -59134,6 +59147,7 @@ patches.suse/Bluetooth-Set-CONF_NOT_COMPLETE-as-l2cap_chan-defaul.patch patches.suse/Bluetooth-verify-AMP-hci_chan-before-amp_destroy.patch patches.suse/Bluetooth-initialize-skb_queue_head-at-l2cap_chan_cr.patch + patches.suse/Bluetooth-check-for-zapped-sk-before-connecting.patch patches.suse/cxgb4-Fix-unintentional-sign-extension-issues.patch patches.suse/net-thunderx-Fix-unintentional-sign-extension-issue.patch patches.suse/ibmvnic-clean-up-the-remaining-debugfs-data-structur.patch @@ -59144,13 +59158,18 @@ patches.suse/ibmvnic-queue-reset-work-in-system_long_wq.patch patches.suse/rtlwifi-8821ae-upgrade-PHY-and-RF-parameters.patch patches.suse/ipw2x00-potential-buffer-overflow-in-libipw_wx_set_e.patch + patches.suse/msft-hv-2332-net-mana-Add-a-driver-for-Microsoft-Azure-Network-Ad.patch patches.suse/mac80211-clear-the-beacon-s-CRC-after-channel-switch.patch patches.suse/mac80211-bail-out-if-cipher-schemes-are-invalid.patch patches.suse/cfg80211-scan-drop-entry-from-hidden_list-on-overflo.patch + patches.suse/msft-hv-2337-net-mana-remove-redundant-initialization-of-variable.patch + patches.suse/msft-hv-2340-net-mana-fix-PCI_HYPERV-dependency.patch + patches.suse/msft-hv-2341-net-mana-Use-int-to-check-the-return-value-of-mana_g.patch patches.suse/ath9k-Fix-error-check-in-ath9k_hw_read_revisions-for.patch patches.suse/wl3501_cs-Fix-out-of-bounds-warnings-in-wl3501_send_.patch patches.suse/wl3501_cs-Fix-out-of-bounds-warnings-in-wl3501_mgmt_.patch patches.suse/net-xdp-Update-pkt_type-if-generic-XDP-changes-unica.patch + patches.suse/msft-hv-2342-hv_netvsc-Make-netvsc-VF-binding-check-both-MAC-and-.patch patches.suse/vsock-vmci-log-once-the-failed-queue-pair-allocation.patch patches.suse/net-phy-intel-xway-enable-integrated-led-functions.patch patches.suse/bnxt_en-fix-ternary-sign-extension-bug-in-bnxt_show_.patch @@ -59414,6 +59433,7 @@ patches.suse/be2net-Fix-an-error-handling-path-in-be_probe.patch patches.suse/cxgb4-fix-wrong-shift.patch patches.suse/s390-sles12sp5-zcrypt-fix-hanging-ioctl-caused-by-wrong-msg-counter.patch + patches.suse/powerpc-perf-Fix-crash-in-perf_instruction_pointer-w.patch patches.suse/x86-fpu-Reset-state-for-all-signal-restore-failures.patch patches.suse/Revert-PCI-PM-Do-not-read-power-state-in-pci_enable_.patch patches.suse/drm-nouveau-wait-for-moving-fence-after-pinning-v2.patch @@ -59493,6 +59513,7 @@ patches.suse/nvme-verify-MNAN-value-if-ANA-is-enabled.patch patches.suse/net-pch_gbe-Propagate-error-from-devm_gpio_request_o.patch patches.suse/mISDN-fix-possible-use-after-free-in-HFC_cleanup.patch + patches.suse/msft-hv-2344-net-mana-Use-struct_size-in-kzalloc.patch patches.suse/ibmvnic-remove-default-label-from-to_string-switch.patch patches.suse/e100-handle-eeprom-as-little-endian.patch patches.suse/can-hi311x-hi3110_can_probe-silence-clang-warning.patch @@ -59540,6 +59561,7 @@ patches.suse/gve-DQO-Fix-off-by-one-in-gve_rx_dqo.patch patches.suse/can-bcm-delay-release-of-struct-bcm_op-after-synchro.patch patches.suse/can-gw-synchronize-rcu-operations-before-removing-gw.patch + patches.suse/net-mana-Fix-a-memory-leak-in-an-error-handling-path.patch patches.suse/xfrm-xfrm_state_mtu-should-return-at-least-1280-for-.patch patches.suse/Revert-ibmvnic-simplify-reset_long_term_buff-functio.patch patches.suse/Revert-ibmvnic-remove-duplicate-napi_schedule-call-i.patch @@ -59567,7 +59589,9 @@ patches.suse/lib-decompress_unlz4.c-correctly-handle-zero-padding.patch patches.suse/lib-decompressors-remove-set-but-not-used-variabled-.patch patches.suse/mwifiex-re-fix-for-unaligned-accesses.patch + patches.suse/powerpc-powernv-Fix-machine-check-reporting-of-async.patch patches.suse/powerpc-papr_scm-Properly-handle-UUID-types-and-API.patch + patches.suse/powerpc-pseries-dlpar-use-rtas_get_sensor.patch patches.suse/iommu-vt-d-define-counter-explicitly-as-unsigned-int patches.suse/iommu-amd-fix-extended-features-logging patches.suse/scsi-ibmvfc-Handle-move-login-failure.patch @@ -59773,6 +59797,7 @@ patches.suse/PCI-MSI-Do-not-set-invalid-bits-in-MSI-mask.patch patches.suse/PCI-MSI-Correct-misleading-comments.patch patches.suse/PCI-MSI-Use-msi_mask_irq-in-pci_msi_shutdown.patch + patches.suse/btrfs-prevent-rename2-from-exchanging-a-subvol-with-a-directory-from-different-parents.patch patches.suse/bnxt-disable-napi-before-canceling-DIM.patch patches.suse/bnxt_en-Add-missing-DMA-memory-barriers.patch patches.suse/net-qlcnic-add-missed-unlock-in-qlcnic_83xx_flash_re.patch @@ -59809,6 +59834,9 @@ patches.suse/Bluetooth-Move-shutdown-callback-before-flushing-tx-.patch patches.suse/Bluetooth-add-timeout-sanity-check-to-hci_inquiry.patch patches.suse/mac80211-Fix-insufficient-headroom-issue-for-AMSDU.patch + patches.suse/msft-hv-2430-net-mana-Move-NAPI-from-EQ-to-CQ.patch + patches.suse/msft-hv-2431-net-mana-Add-support-for-EQ-sharing.patch + patches.suse/msft-hv-2432-net-mana-Add-WARN_ON_ONCE-in-case-of-CQE-read-overfl.patch patches.suse/ath6kl-wmi-fix-an-error-code-in-ath6kl_wmi_sync_poin.patch patches.suse/bcma-Fix-memory-leak-for-internally-handled-cores.patch patches.suse/net-sched-Fix-qdisc_rate_table-refcount-leak-when-ge.patch @@ -59894,13 +59922,23 @@ patches.suse/mm-vmscan-guarantee-drop_slab_node-termination.patch patches.suse/pseries-drmem-update-LMBs-after-LPM.patch patches.suse/powerpc-pseries-Prevent-free-CPU-ids-being-reused-on.patch + patches.suse/powerpc-perf-hv-gpci-Fix-counter-value-parsing.patch + patches.suse/powerpc-perf-Use-stack-siar-instead-of-mfspr.patch + patches.suse/powerpc-perf-Drop-the-case-of-returning-0-as-instruc.patch + patches.suse/powerpc-perf-Fix-the-check-for-SIAR-value.patch patches.suse/SUNRPC-Simplify-socket-shutdown-when-not-reusing-TCP.patch + patches.suse/fuse-truncate-pagecache-on-atomic_o_trunc.patch patches.suse/qlcnic-Remove-redundant-unlock-in-qlcnic_pinit_from_.patch patches.suse/profiling-fix-shift-out-of-bounds-bugs.patch patches.suse/SUNRPC-improve-error-response-to-over-size-gss-crede.patch patches.suse/s390-unwind-use-current_frame_address-to-unwind-current-task.patch patches.suse/time-Handle-negative-seconds-correctly-in-timespec64.patch + patches.suse/ipc-remove-memcg-accounting-for-sops-objects-in-do_semtimedop.patch patches.suse/ibmvnic-check-failover_pending-in-login-response.patch + patches.suse/msft-hv-2437-net-mana-Prefer-struct_size-over-open-coded-arithmet.patch + patches.suse/x86-mm-fix-kern_addr_valid-to-cope-with-existing-but-not-present-entries.patch + patches.suse/usb-musb-tusb6010-uninitialized-data-in-tusb_fifo_wr.patch + patches.suse/USB-serial-option-remove-duplicate-USB-device-ID.patch patches.suse/scsi-lpfc-Fix-CPU-to-from-endian-warnings-introduced.patch patches.suse/scsi-lpfc-Fix-compilation-errors-on-kernels-with-no-.patch patches.suse/scsi-lpfc-Remove-unneeded-variable.patch @@ -59952,7 +59990,6 @@ patches.suse/blk-mq-move-_blk_mq_update_nr_hw_queues-synchronize_rcu-call patches.suse/proc-Avoid-mixing-integer-types-in-mem_rw.patch patches.suse/scsi-smartpqi-create-module-parameters-for-LUN-reset.patch - patches.suse/ipc-remove-memcg-accounting-for-sops-objects.patch patches.suse/crypto_ccp-fix_resource_leaks_in_ccp_run_aes_gcm_cmd.patch ######################################################## @@ -60220,6 +60257,8 @@ ######################################################## # Suse specific stuff ######################################################## + patches.suse/suse-hv-mana-netdev_lockdep_set_classes.patch + patches.suse/suse-hv-mana-ndo_select_queue-API.patch patches.suse/suse-hv-netvsc-bpf-api.patch patches.suse/suse-hv-hv-iommu-api.patch patches.suse/suse-hv-guest-os-id.patch @@ -60867,6 +60906,7 @@ patches.kabi/kabi-fix-after-kvm-vcpu-id-array-fix.patch patches.kabi/kabi-mask-changes-to-vhost_dev_init-and-struct-vhost.patch patches.kabi/kabi-fix-bpf_insn_aux_data-revert-sanitize_stack_spill.patch + patches.kabi/ath_key_delete-kABI-fix.patch ######################################################## # You'd better have a good reason for adding a patch diff --git a/supported.conf b/supported.conf index 0b181ea..c26bc96 100644 --- a/supported.conf +++ b/supported.conf @@ -1595,6 +1595,7 @@ - drivers/net/ethernet/micrel/ks8842 - drivers/net/ethernet/micrel/ks8851_mll - drivers/net/ethernet/micrel/ksz884x ++external +base drivers/net/ethernet/microsoft/mana/mana drivers/net/ethernet/myricom/myri10ge/myri10ge drivers/net/ethernet/natsemi/natsemi # National Semiconductor DP8381x series PCI Ethernet driver drivers/net/ethernet/natsemi/ns83820 # National Semiconductor DP83820 10/100/1000 driver