From 4c5c94400e8045cba1a615ddbda63f84176b0d13 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Mar 28 2024 14:28:04 +0000 Subject: scsi: qla2xxx: Fix command flush on cable pull (bsc#1221816). --- diff --git a/patches.suse/scsi-qla2xxx-Fix-command-flush-on-cable-pull.patch b/patches.suse/scsi-qla2xxx-Fix-command-flush-on-cable-pull.patch new file mode 100644 index 0000000..4604ce9 --- /dev/null +++ b/patches.suse/scsi-qla2xxx-Fix-command-flush-on-cable-pull.patch @@ -0,0 +1,93 @@ +From: Quinn Tran +Date: Tue, 27 Feb 2024 22:11:22 +0530 +Subject: scsi: qla2xxx: Fix command flush on cable pull +Patch-mainline: Queued in subsystem maintainer repository +Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git +Git-commit: a27d4d0e7de305def8a5098a614053be208d1aa1 +References: bsc#1221816 + +System crash due to command failed to flush back to SCSI layer. + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 + PGD 0 P4D 0 + Oops: 0000 [#1] SMP NOPTI + CPU: 27 PID: 793455 Comm: kworker/u130:6 Kdump: loaded Tainted: G OE --------- - - 4.18.0-372.9.1.el8.x86_64 #1 + Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 09/03/2021 + Workqueue: nvme-wq nvme_fc_connect_ctrl_work [nvme_fc] + RIP: 0010:__wake_up_common+0x4c/0x190 + Code: 24 10 4d 85 c9 74 0a 41 f6 01 04 0f 85 9d 00 00 00 48 8b 43 08 48 83 c3 08 4c 8d 48 e8 49 8d 41 18 48 39 c3 0f 84 f0 00 00 00 <49> 8b 41 18 89 54 24 08 31 ed 4c 8d 70 e8 45 8b 29 41 f6 c5 04 75 + RSP: 0018:ffff95f3e0cb7cd0 EFLAGS: 00010086 + RAX: 0000000000000000 RBX: ffff8b08d3b26328 RCX: 0000000000000000 + RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffff8b08d3b26320 + RBP: 0000000000000001 R08: 0000000000000000 R09: ffffffffffffffe8 + R10: 0000000000000000 R11: ffff95f3e0cb7a60 R12: ffff95f3e0cb7d20 + R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff8b2fdf6c0000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000000 CR3: 0000002f1e410002 CR4: 00000000007706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + __wake_up_common_lock+0x7c/0xc0 + qla_nvme_ls_req+0x355/0x4c0 [qla2xxx] + qla2xxx [0000:12:00.1]-f084:3: qlt_free_session_done: se_sess 0000000000000000 / sess ffff8ae1407ca000 from port 21:32:00:02:ac:07:ee:b8 loop_id 0x02 s_id 01:02:00 logout 1 keep 0 els_logo 0 + ? __nvme_fc_send_ls_req+0x260/0x380 [nvme_fc] + qla2xxx [0000:12:00.1]-207d:3: FCPort 21:32:00:02:ac:07:ee:b8 state transitioned from ONLINE to LOST - portid=010200. + ? nvme_fc_send_ls_req.constprop.42+0x1a/0x45 [nvme_fc] + qla2xxx [0000:12:00.1]-2109:3: qla2x00_schedule_rport_del 21320002ac07eeb8. rport ffff8ae598122000 roles 1 + ? nvme_fc_connect_ctrl_work.cold.63+0x1e3/0xa7d [nvme_fc] + qla2xxx [0000:12:00.1]-f084:3: qlt_free_session_done: se_sess 0000000000000000 / sess ffff8ae14801e000 from port 21:32:01:02:ad:f7:ee:b8 loop_id 0x04 s_id 01:02:01 logout 1 keep 0 els_logo 0 + ? __switch_to+0x10c/0x450 + ? process_one_work+0x1a7/0x360 + qla2xxx [0000:12:00.1]-207d:3: FCPort 21:32:01:02:ad:f7:ee:b8 state transitioned from ONLINE to LOST - portid=010201. + ? worker_thread+0x1ce/0x390 + ? create_worker+0x1a0/0x1a0 + qla2xxx [0000:12:00.1]-2109:3: qla2x00_schedule_rport_del 21320102adf7eeb8. rport ffff8ae3b2312800 roles 70 + ? kthread+0x10a/0x120 + qla2xxx [0000:12:00.1]-2112:3: qla_nvme_unregister_remote_port: unregister remoteport on ffff8ae14801e000 21320102adf7eeb8 + ? set_kthread_struct+0x40/0x40 + qla2xxx [0000:12:00.1]-2110:3: remoteport_delete of ffff8ae14801e000 21320102adf7eeb8 completed. + ? ret_from_fork+0x1f/0x40 + qla2xxx [0000:12:00.1]-f086:3: qlt_free_session_done: waiting for sess ffff8ae14801e000 logout + +The system was under memory stress where driver was not able to allocate an +SRB to carry out error recovery of cable pull. The failure to flush causes +upper layer to start modifying scsi_cmnd. When the system frees up some +memory, the subsequent cable pull trigger another command flush. At this +point the driver access a null pointer when attempting to DMA unmap the +SGL. + +Add a check to make sure commands are flush back on session tear down to +prevent the null pointer access. + +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240227164127.36465-7-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Acked-by: Daniel Wagner +--- + drivers/scsi/qla2xxx/qla_target.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/scsi/qla2xxx/qla_target.c ++++ b/drivers/scsi/qla2xxx/qla_target.c +@@ -1062,6 +1062,16 @@ void qlt_free_session_done(struct work_s + "%s: sess %p logout completed\n", __func__, sess); + } + ++ /* check for any straggling io left behind */ ++ if (!(sess->flags & FCF_FCP2_DEVICE) && ++ qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) { ++ ql_log(ql_log_warn, vha, 0x3027, ++ "IO not return. Resetting.\n"); ++ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); ++ qla2xxx_wake_dpc(vha); ++ qla2x00_wait_for_chip_reset(vha); ++ } ++ + if (sess->logo_ack_needed) { + sess->logo_ack_needed = 0; + qla24xx_async_notify_ack(vha, sess, diff --git a/series.conf b/series.conf index 6725db0..85e2866 100644 --- a/series.conf +++ b/series.conf @@ -20060,6 +20060,7 @@ patches.suse/scsi-qla2xxx-Split-FCE-EFT-trace-control.patch patches.suse/scsi-qla2xxx-Update-manufacturer-detail.patch patches.suse/scsi-qla2xxx-NVME-FCP-prefer-flag-not-being-honored.patch + patches.suse/scsi-qla2xxx-Fix-command-flush-on-cable-pull.patch ######################################################## # end of sorted patches